User:OrphanBot/orphanbot.pl
From Wikipedia, the free encyclopedia
The source code for OrphanBot's image-removal task. Requires libBot.pl and libPearle2.pl.
#!/usr/bin/perl
# OrphanBot
#
# A bot to remove images from pages in preparation for deletion
use strict;
use warnings;
use Date::Calc qw(Delta_Days Decode_Month Month_to_Text Today);
use Getopt::Long;
require "libBot.pl";
my $permit_interruptions = 0; # Allow talkpage messages to stop the bot?
my $last_image = undef;
my @last_images;
my $task = ""; # One of "source", "copyright", "unsure", "special", "fairuse", "disputed"
my %users_notified; # List of users notifed. 0, undef = no; 1 = notified once; 2 = notified and second notice
my %notifications; # List of user,image pairs, used to ensure that no user is ever notified about an image twice.
my %dont_notify = (); # List of users to never notify
my %image_whitelist = (); # Images to never remove
my ($remove_type, $removal_comment, $removal_prefix, $template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date, $test_only); # Params for changing tasks
GetOptions('task=s' => \$task);
# Generate a signature
sub sig
{
return " ~~~~~";
}
# No-error-checking removal routine, for special requests
sub RemoveImageSpecial
{
my $image = shift;
my $page = shift;
my $image_regex = shift;
my $removal_prefix = shift;
my $removal_comment = shift;
my ($text, $editTime, $startTime, $token);
my ($match1, $match2) = (0,0);
my $old_length;
my $new_length;
my $change_len;
my $match_len = 0;
# Fetch an article page
($text, $editTime, $startTime, $token) = Pearle::getPage($page);
$old_length = length($text);
if($text =~ /#redirect/i)
{
Pearle::myLog("Redirect found for page [[$page]] (image [[:$image]])\n");
userwarnlog("*Redirect found for page [[$page]] (image [[:$image]])\n");
return 0;
}
# Remove the image
my $regex3 = "(\\[\\[${image_regex}.*?(\\[\\[.*?\\]\\].*?|)+\\]\\][ \\t]*)"; # Regex to match images
Pearle::myLog("Regex 3: $regex3\n");
notelog("Regex 3: $regex3\n");
if($text =~ /$regex3/)
{
$match_len = length($1);
if(defined($removal_prefix))
{
$match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g;
}
else
{
$match2 = $text =~ s/$regex3//g;
}
if($match2)
{
if($match_len < (4 + length($image)))
{
notelog("*Short replacement of $match_len bytes in [[$page]]\n");
Pearle::myLog("Short replacement of $match_len bytes (min " . (length($image) + 4) . ") in [[$page]] ($match2 matches). Exiting.\n");
Pearle::myLog("Text:\n$text\n");
exit;
}
if($match2 > 100)
{
Pearle::myLog("Too many matches ($match2) in page [[$page]]. Skipping.\n");
notelog("Too many matches ($match2) in page [[$page]]. Skipping.\n");
exit;
}
if($text =~ /-->\]/)
{
Pearle::myLog("Possible bracket mixup in page [[$page]]\n");
userwarnlog(FixupLinks("*Possible bracket mixup in page [[$page]]\n"));
}
}
}
$new_length = length($text);
print "Num: $match2 Len: $match_len\n";
if($test_only)
{
notelog("Special removal for page\n");
}
else
{
Pearle::postPage($page, $editTime, $startTime, $token, $text, $removal_comment, "no");
}
return ($match2)
}
%notifications = loadNotificationList("./orphanbot.note");
%dont_notify = loadNotificationList("./orphanbot.whitelist");
%image_whitelist = loadNotificationList("./orphanbot.imagewhitelist");
Pearle::init(<< INSERT BOT NAME HERE >>, << INSERT BOT PASSWORD HERE >>, "./orphanbot.log","./cookies.pearle.txt");
Pearle::config(nullOK => 1);
config(username => << INSERT BOT NAME HERE >>);
if(!Pearle::login())
{
exit;
}
my $last_run = 0;
my @stbot_images;
#while(1)
{
$last_image =~ s/^[Ii]mage:// if(defined($last_image)); # Remove any prepended namespacing
my @images;
my $image;
my $edited = 0;
my $images_removed = 0;
my $nolimits = 0;
# my ($remove_type, $removal_comment, $removal_prefix, $template_match, $uploader_warning, $uploader_warning_summary, $write_remove_log, $limit_by_date); # Params for changing tasks
@images = ();
userwarnlog("=== Beginning set at " . time() . " for task '$task' ===\n");
{
if($task eq "source")
{
my $cat = "Category:All images with unknown source";
@images = Pearle::getCategoryImages($cat);
$remove_type = 'normal';
$removal_comment = "Removing image with no source information. Such images that are older than seven days may be deleted at any time.";
$removal_prefix = "Unsourced image removed:";
$template_match = "Unless the copyright status is provided|Unless this information is added to this page";
$uploader_warning = "{{subst:User:OrphanBot/nosource|";
$uploader_warning_summary = "You've uploaded an unsourced image";
$write_remove_log = 1;
$limit_by_date = 1;
$nolimits = 0;
$test_only = 0;
}
elsif($task eq "copyright")
{
my $cat = "Category:All images with unknown copyright status";
@images = Pearle::getCategoryImages($cat);
$remove_type = 'normal';
$removal_comment = "Removing image with no copyright information. Such images that are older than seven days may be deleted at any time.";
$removal_prefix = "Image with unknown copyright status removed:";
$template_match = "Unless the copyright status is provided|Unless this information is added to this page|This image was uploaded under good faith using the above tag";
$uploader_warning = "{{subst:User:OrphanBot/nocopyright|";
$uploader_warning_summary = "You've uploaded an image with unknown copyright";
$write_remove_log = 1;
$limit_by_date = 1;
$nolimits = 0;
$test_only = 0;
}
elsif($task eq 'replaceable')
{
my $cat = "Category:All replaceable fair use images";
@images = Pearle::getCategoryImages($cat);
$remove_type = 'normal';
$removal_comment = "Removing replaceable fair-use image.";
$removal_prefix = "Replaceable fair-use image removed:";
$template_match = "for which a free image might reasonably be found";
$uploader_warning = undef;
$uploader_warning_summary = undef;
$write_remove_log = 1;
$limit_by_date = 1;
$nolimits = 0;
$test_only = 0;
}
elsif($task eq 'special')
{
# Special requests
@images = Pearle::getLogArticles("upload", 408, 10, "Johnsatchmo");
@images = map {$_->[0]} @images;
notelog("Found " . scalar(@images) . " images\n");
$remove_type = 'normal'; # Use the standard removal system;
$removal_comment = "Removing image by request; see [[User talk:Carnildo#Bot help.3F]]" ;
$removal_prefix = "Image with questionable copyright removed:";
$template_match = undef;
$uploader_warning = undef;
$uploader_warning_summary = undef;
$write_remove_log = 1;
$limit_by_date = 0;
$nolimits = 0;
$test_only = 0;
}
else
{
notelog("Unknown task: $task\n");
exit;
}
}
if(scalar(@images) == 0)
{
print "Finished with category.\n";
Pearle::myLog("Finished with category.\n");
exit;
}
image: foreach $image (@images)
{
my $image_url;
my $image_regex = $image;
my $page;
my @pages = ();
my $page_remove_log;
my ($day, $month, $year);
# Fetch an image page
my $query = "http://en.wikipedia.org/wiki/$image";
my $image_text = Pearle::getURL(Pearle::escapeUrl($query));
my $full_comment = "";
$page_remove_log = '';
$last_image = $image;
if($permit_interruptions and DoIHaveMessages($image_text))
{
print "Talkpage message found; exiting on image $image.\n";
Pearle::myLog("Talkpage message found; exiting on image $image.\n");
exit;
}
if($image_whitelist{$image})
{
userwarnlog("*Image $image on whitelist\n");
next;
}
# Images from Commons
if($image_text =~ /Wikimedia Commons<\/a>. The description on its /)
{
userwarnlog("*Commons image [[:$image]] found\n");
next;
}
# The odd case of an image description page without an image
if($image_text =~ /<p>No file by this name exists; you can <a href=/ and $image_text =~ /$template_match/)
{
userwarnlog("*Image [[:$image]] does not appear to exist.\n");
my @historylist = Pearle::parseHistory($image);
my $first_entry = pop @historylist;
if($first_entry->[4] eq 'STBotI')
{
push @stbot_images, $image;
}
next;
}
# Check for image existance
if($image_text =~ /<p>No file by this name exists; you can <a href=/)
{
Pearle::myLog("Image [[:$image]] has been deleted.\n");
notelog("Image [[:$image]] has been deleted.\n");
next;
}
# Check for image copyright tag
if(defined($template_match) and ($image_text !~ /$template_match/))
{
userwarnlog("*Image [[:$image]] in category does not have an appropriate template\n");
next;
}
if($task eq 'source')
{
if($image_text =~ /I, the creator of this work/)
{
next image;
}
if($image_text =~ /title="Category:[^"]*[Ll]ogos"|title="Category:[^"]*[Cc]overs"/)
{
Pearle::myLog("*Image [[:$image]] with self-sourcing template found\n");
next image;
}
}
if($task eq 'replaceable')
{
if($image_text =~ /It is disputed whether or not this image violates/)
{
Pearle::myLog("*Disputed replaceable fair-use image [[:$image]] found\n");
next image;
}
}
my ($raw_image) = $image =~ /Image:(.*)/;
$raw_image = MakeWikiRegex($raw_image);
if($image !~ /(\.jpg|\.jpeg|\.png|\.gif|\.svg)$/i)
{
$image_regex = "[ _]*(:?[Ii]mage|[Mm]edia)[ _]*:[ _]*${raw_image}[ _]*";
}
else
{
$image_regex = "[ _]*[Ii]mage[ _]*:[ _]*${raw_image}[ _]*";
}
# Sanity check
if(!defined($raw_image) or $image !~ /$raw_image/)
{
Pearle::myLog("Parse error on image [[:$image]] ($raw_image)\n");
userwarnlog("*Parse error on image [[:$image]] ($raw_image)\n");
next;
}
Pearle::myLog("Image regex: $image_regex\n");
notelog("Image regex: $image_regex\n");
($day, $month, $year) = getDate($image_text);
# Notify the user
my $uploader = getUploader($image_text);
my $is_notified = 0;
if(defined($uploader_warning) and defined($uploader))
{
$is_notified = isNotified($image_text, $uploader, $image_regex, $image, \%notifications, \%dont_notify);
}
if(defined($uploader_warning) and 1 != $is_notified)
{
if(defined($uploader))
{
if(!($users_notified{$uploader}))
{
Pearle::myLog("Warning user $uploader\n");
userwarnlog("${uploader_warning}${image}}}" . sig() . "\n", $uploader, $uploader_warning_summary, $is_notified);
$notifications{"$uploader,$image"} = 1;
$users_notified{$uploader} = 1;
}
else
{
Pearle::myLog("User $uploader has already been warned repeatedly\n");
$users_notified{$uploader} += 1;
}
}
else
{
Pearle::myLog("Could not determine uploader for [[:$image]]\n");
}
}
if(!Date::Calc::check_date($year, Decode_Month($month), $day))
{
Pearle::myLog("Date error for image [[:$image]]\n");
userwarnlog("*Date error for image [[:$image]]\n");
}
if((Date::Calc::check_date($year, Decode_Month($month), $day) and (Delta_Days($year, Decode_Month($month), $day, Today() ) >= 4)) or !($limit_by_date))
{
# Ignore any old removal logs
$image_text =~ s/<ol>.*?<\/ol>//gs;
if($nolimits)
{
@pages = GetFullPageList($image, $image_text);
}
else
{
@pages = GetPageList($image, $image_text);
}
if(scalar(@pages) == 0)
{
notelog("Image $image may already be orphaned\n");
Pearle::myLog("Image $image may already be orphaned\n");
}
if(scalar(@pages) > 3)
{
my $warningtext;
$warningtext = "*Found image [[:$image]] on " . scalar(@pages) . " content pages\n";
userwarnlog($warningtext);
}
if(scalar(@pages) > 0)
{
$images_removed += 1;
}
foreach $page (@pages)
{
print "Page for removal: $page\n";
my $parsed_removal_comment = $removal_comment;
$parsed_removal_comment =~ s/image/[[:$image|image]]/;
if(defined($remove_type) and $remove_type eq 'special')
{
RemoveImageSpecial($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment);
Pearle::limit();
}
else
{
if(my $hits = RemoveImageFromPage($image, $page, $image_regex, $removal_prefix, $parsed_removal_comment)) # Don't limit if we just touched the article
{
$page_remove_log .= "#[[$page]]\n";
notelog("Removed image: $hits hits.\n");
Pearle::myLog("Removed image $image from article $page\n");
Pearle::limit();
}
}
$edited = 1;
}
}
else
{
Pearle::myLog("Recent image: notification only\n");
notelog("Recent image: notification only\n");
}
# Update image description page
if($write_remove_log)
{
my $edited_idp = 0;
# Log all removals on the image description page
my ($text, $editTime, $startTime, $token);
print "Will write\n";
($text, $editTime, $startTime, $token) = Pearle::getPage($image);
if($task eq "source")
{
if(!isDated($image_text))
{
my ($cur_y, $cur_m, $cur_d) = Today();
$cur_m = Month_to_Text($cur_m);
print "Changing date\n";
my $new_template = "{{no source|month=$cur_m|day=$cur_d|year=$cur_y}}";
if($text =~ /{{(?:[Nn]o source|[Nn]sn|[Nn]osource|[Uu]nverified|Di-no source).*?}}/)
{
# Build the substitution regex to replace the notify tag
$text =~ s/{{(?:[Nn]o source|[Nn]sn|[Nn]osource|[Uu]nverified).*?}}/$new_template/;
$full_comment .= "Changing nosource template format; ";
}
else
{
userwarnlog("*Template in [[:$image]] was probably subst'd\n");
Pearle::myLog("Template was probably subst'd\n");
$text .= "\n\n$new_template\n";
$full_comment .= "Adding nosource template. This may add a second template: the original was probably subst'd. ";
}
$edited_idp = 1;
}
}
elsif($task eq "copyright")
{
if(!isDated($image_text))
{
my ($cur_y, $cur_m, $cur_d) = Today();
$cur_m = Month_to_Text($cur_m);
print "Changing date\n";
my $new_template = "{{no license|month=$cur_m|day=$cur_d|year=$cur_y}}";
if($text =~ /{{(?:[Nn]o licen[cs]e|[Uu]nknown|[Nn]olicen[cs]e).*?}}/)
{
# Build the substitution regex to replace the notify tag
$text =~ s/{{(?:[Nn]o licen[cs]e|[Uu]nknown|[Nn]olicen[cs]e).*?}}/$new_template/;
$full_comment .= "Changing nolicense template format; ";
}
else
{
userwarnlog("*Template in [[:$image]] was probably subst'd\n");
Pearle::myLog("Template was probably subst'd\n");
$text .= "\n\n$new_template\n";
$full_comment .= "Adding nolicense template. This may add a second template: the original was probably subst'd. ";
}
$edited_idp = 1;
}
}
if($page_remove_log ne "")
{
$text .= "\n\nRemoved from the following pages:\n";
$text .= FixupLinks($page_remove_log);
$text .= "--~~~~\n";
$full_comment .= "Listing pages that the image has been removed from";
$edited_idp = 1;
print "Remove log\n";
}
if($edited_idp)
{
if($test_only)
{
notelog("Edited image description page\n");
}
else
{
Pearle::postPage($image, $editTime, $startTime, $token, $text, $full_comment, "no");
}
}
}
if($edited)
{
print "Sleeping for 10 seconds\n";
sleep(10);
}
else
{
print "Sleeping for two seconds\n";
sleep(2);
}
$edited = 0;
}
notelog("Saving notification list\n");
saveNotificationList("./orphanbot.note", %notifications);
Pearle::myLog("Finished with category.\n");
notelog("Finished with category.\n");
}

