From Wikipedia, the free encyclopedia
#! /usr/bin/python
# PhotoCatBot
#
# Walk through [[Category:Wikipedia requested photographs]], looking
# for articles with unqualified {{reqphoto}} tags. If the article
# also has a WikiProject tag identifying the subject matter, put the
# put the photo request in an appropriate subcategory.
import wikipedia, catlib, pagegenerators
import time
import re
startCat = 'Category:Wikipedia requested photographs';
editComment = 'photo categorization by the [[User:PhotoCatBot|PhotoCat]]'
# This pattern matches unqualified photo request templates.
photoReqPat = re.compile(r'{{([Rr]eqphoto|[Pp]hotoreq|[Pp]hotorequest|[Pp]hotoneeded|[Ii]mages-wanted|[Ii]mage-reqphoto)}}');
# This pattern matches location-oriented WikiProjects.
wikiLocationPat = re.compile(r'{{(WikiProject|Project|WP)[ _]?(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Florida|GeorgiaUS|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Louisville|Maine|Maryland|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New Hampshire|New Jersey|New Mexico|New York|North Carolina|North Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Rhode Island|South Carolina|South Dakota|Tennessee|Texas|Utah|Virginia|Washington|West Virginia|Wisconsin|Wyoming|Poland|Australia|India|Israel|Korea|China|Canada|France|Cambodia|Bangladesh|Taiwan)\s*[|}]');
# add_template_param(text, template, parameter, val)
#
# Set PARAMETER=VAL in the first instance of {{TEMPLATE}} found in
# TEXT.
#
# Example:
# add_template_param(text, 'WPBiography', 'needs-photo', 'yes')
#
def add_template_param(text, template, parameter, val):
# Finds whether the template is present in TEXT, and save
# the template name and any parameters that were supplied.
templatePat = re.compile(r'{{(%s\b\s*)(\|?.*?)}}' % template, re.DOTALL)
match = templatePat.search(text)
if match:
templatename = match.group(1)
oldparams = match.group(2)
#
# If PARAMETER is already present, override it with the new VAL.
# Otherwise, append it to the existing template.
if oldparams.find(parameter) >= 0:
newparams = re.sub(r'%s\s*=[\s\w]*' % parameter, '%s=%s' % (parameter, val), oldparams)
else:
newparams = oldparams + '|%s=%s' % (parameter, val)
return templatePat.sub(r'{{%s%s}}' % (templatename, newparams), text)
# fix_photoreqs(text)
#
# If TEXT contains an unqualified photo request template, look for any
# WikiProject templates that identify the subject matter. Modify or replace
# the photoreq template if necessary.
#
# Returns the modified article text if any changes were made, or False
# if nothing should be done.
#
def fix_photoreqs(text):
newtext = None
# Templates which take "needs-photo" parameters are handled first,
# because there is no way to supply them with additional qualifiers
if re.search(r'{{AARTalk', text):
newtext = add_template_param(text, 'AARTalk', 'needs-photo', 'yes')
elif re.search(r'{{BirdTalk', text):
newtext = add_template_param(text, 'BirdTalk', 'needs-photo', 'yes')
elif re.search(r'{{[Cc]omicsproj', text):
newtext = add_template_param(text, '[Cc]omicsproj', 'image', 'yes')
elif re.search(r'{{[Cc]vgproj', text):
newtext = add_template_param(text, '[Cc]vgproj', 'screenshot', 'yes')
#
# It is important to perform the {{WPBeatles}} match before
# {{WPBiography}}, since some of the Beatles articles include
# a retired WPBiography template in an HTML comment, and that
# recipe will add the "needs-photo" parameter to the unused template.
#
elif re.search(r'{{WPBeatles', text):
newtext = photoReqPat.sub(r'{{reqphoto|Beatles-related subjects}}', text)
elif re.search(r'{{WikiProject Plants', text):
newtext = add_template_param(text, 'WikiProject Plants', 'needs-photo', 'yes')
else:
#
# Find relevant locations
#
location = [ ]
subcat = [ ]
match = wikiLocationPat.search(text);
if match:
location.append(match.group(2))
if re.search(r'{{WPTR\s*\|', text):
location.append('Turkey')
if re.search(r'{{OttawaProject', text):
location.append('Ottawa')
if re.search(r'{{Saskatchewanproject', text):
location.append('Saskatchewan')
if re.search(r'{{SFBAProject', text):
location.append('California')
if re.search(r'{{BRWikiProject', text):
location.append('Kentucky')
if re.search(r'{{SG\s*\|', text):
location.append('Singapore')
if re.search(r'{{WPCHINA\s*\|', text):
location.append('China')
if re.search(r'{{BCproject\s*\|', text):
location.append('British Columbia')
if re.search(r'{{WPVN\s*\|', text):
location.append('Vietnam')
if re.search(r'{{WPTAIWAN\s*\|', text):
location.append('Taiwan')
#
# Find relevant subjects
#
if re.search(r'{{[Aa]lbum\s*[|}]}}', text):
subcat.append('albums')
if re.search(r'{{[Aa]rchitecture', text):
subcat.append('architecture')
if re.search(r'{{[Bb]eer}}', text):
subcat.append('food')
if re.search(r'{{disaster management}}', text):
subcat.append('disaster management')
if re.search(r'{{[Ff]ishproject', text) or \
re.search(r'{{WikiProject aquarium fishes', text):
subcat.append('fish')
if re.search(r'{{[Mm]otorcycling', text):
subcat.append('motorcycles')
if re.search(r'{{[Pp]ro-wrestling', text):
subcat.append('sportspeople')
if re.search(r'{{[Tt]elevisionWikiProject', text) or \
re.search(r'{{British TV shows project', text):
subcat.append('television programs')
if re.search(r'{{[Tt]rainsWikiProject', text):
subcat.append('transport')
if re.search(r'{{[Vv]isual arts', text):
subcat.append('art')
if re.search(r'{{WikiProject Anime and manga', text):
subcat.append('anime and manga')
if re.search(r'{{WikiProject Filmmaking', text):
subcat.append('filmmaking')
if re.search(r'{{WikiProject Food and drink', text):
subcat.append('food')
if re.search(r'{{WikiProject Textile Arts', text):
subcat.append('textiles and fabrics')
if re.search(r'{{WPGUNS', text):
subcat.append('firearms')
if re.search(r'{{WPMILHIST', text):
subcat.append('military history')
if re.search(r'{{WPMusInst', text):
subcat.append('musical instruments')
if re.search(r'{{WPSchools', text):
subcat.append('schools')
# Check for WPBiography, which needs special attention
match = re.search(r'{{WPBiography(.*?)}}', text, re.S)
if match:
workgroups = re.findall(r'([\w&]+)-work-group\s*=\s*yes', match.group(1))
people = [ ]
for g in workgroups:
if g == 'politician':
people.append('politicians and government-people')
elif g == 'royalty and nobility':
people.append('royalty')
elif g == 'military':
people.append('military-people')
elif g == 'sports':
people.append('sportspeople')
elif g == 'a&e':
people.append('artists and entertainers')
elif g == 's&a':
people.append('scientists and academics')
elif g == 'musician':
people.append('musicians')
elif g == 'peerage':
people.append('peers')
elif g == 'baronets':
people.append('baronets')
elif g == 'filmbio':
people.append('actors and filmmakers')
if len(people) > 0:
subcat = subcat + people
else:
subcat.append('people')
# If any location or subject tags were found,
# replace the reqphoto tag.
args = ['reqphoto'] + subcat[0:3]
if len(location) > 0:
args.append("in=" + location[0])
if len(location) > 1:
args.append("in2=" + location[1])
if len(location) > 2:
args.append("in3=" + location[2])
if len(args) > 1:
newtext = photoReqPat.sub('{{%s}}' % '|'.join(args), text)
if newtext:
# remove any remaining *unqualified* photo request template
newtext = photoReqPat.sub(r'', newtext)
return newtext
def main():
site = wikipedia.getSite()
cat = catlib.Category(site, startCat)
gen = pagegenerators.CategorizedPageGenerator(cat)
for page in gen:
text = page.get()
newtext = False
if photoReqPat.search(text):
newtext = fix_photoreqs(text)
if newtext:
try:
page.put(newtext, editComment)
except wikipedia.LockedPage:
pass
time.sleep(30)
try:
main()
finally:
wikipedia.stopme()