User:PhotoCatBot/Src

From Wikipedia, the free encyclopedia

#! /usr/bin/python

# PhotoCatBot
#
# Walk through [[Category:Wikipedia requested photographs]], looking
# for articles with unqualified {{reqphoto}} tags.  If the article
# also has a WikiProject tag identifying the subject matter, put the
# put the photo request in an appropriate subcategory.

import wikipedia, catlib, pagegenerators
import time
import re

startCat = 'Category:Wikipedia requested photographs';
editComment = 'photo categorization by the [[User:PhotoCatBot|PhotoCat]]'

# This pattern matches unqualified photo request templates.
photoReqPat = re.compile(r'{{([Rr]eqphoto|[Pp]hotoreq|[Pp]hotorequest|[Pp]hotoneeded|[Ii]mages-wanted|[Ii]mage-reqphoto)}}');

# This pattern matches location-oriented WikiProjects.
wikiLocationPat = re.compile(r'{{(WikiProject|Project|WP)[ _]?(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|Delaware|Florida|GeorgiaUS|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|Kentucky|Louisiana|Louisville|Maine|Maryland|Michigan|Minnesota|Mississippi|Missouri|Montana|Nebraska|Nevada|New Hampshire|New Jersey|New Mexico|New York|North Carolina|North Dakota|Ohio|Oklahoma|Oregon|Pennsylvania|Rhode Island|South Carolina|South Dakota|Tennessee|Texas|Utah|Virginia|Washington|West Virginia|Wisconsin|Wyoming|Poland|Australia|India|Israel|Korea|China|Canada|France|Cambodia|Bangladesh|Taiwan)\s*[|}]');

# add_template_param(text, template, parameter, val)
#
# Set PARAMETER=VAL in the first instance of {{TEMPLATE}} found in
# TEXT.
#
# Example:
# add_template_param(text, 'WPBiography', 'needs-photo', 'yes')
#
def add_template_param(text, template, parameter, val):
    # Finds whether the template is present in TEXT, and save
    # the template name and any parameters that were supplied.
    templatePat = re.compile(r'{{(%s\b\s*)(\|?.*?)}}' % template, re.DOTALL)
    match = templatePat.search(text)
    if match:
        templatename = match.group(1)
        oldparams = match.group(2)
        #
        # If PARAMETER is already present, override it with the new VAL.
        # Otherwise, append it to the existing template.
        if oldparams.find(parameter) >= 0:
            newparams = re.sub(r'%s\s*=[\s\w]*' % parameter, '%s=%s' % (parameter, val), oldparams)
        else:
            newparams = oldparams + '|%s=%s' % (parameter, val)
        return templatePat.sub(r'{{%s%s}}' % (templatename, newparams), text)

# fix_photoreqs(text)
#
# If TEXT contains an unqualified photo request template, look for any
# WikiProject templates that identify the subject matter.  Modify or replace
# the photoreq template if necessary.
#
# Returns the modified article text if any changes were made, or False
# if nothing should be done.
#
def fix_photoreqs(text):
    newtext = None

    # Templates which take "needs-photo" parameters are handled first,
    # because there is no way to supply them with additional qualifiers
    if re.search(r'{{AARTalk', text):
        newtext = add_template_param(text, 'AARTalk', 'needs-photo', 'yes')
    elif re.search(r'{{BirdTalk', text):
        newtext = add_template_param(text, 'BirdTalk', 'needs-photo', 'yes')
    elif re.search(r'{{[Cc]omicsproj', text):
        newtext = add_template_param(text, '[Cc]omicsproj', 'image', 'yes')
    elif re.search(r'{{[Cc]vgproj', text):
        newtext = add_template_param(text, '[Cc]vgproj', 'screenshot', 'yes')
    #
    # It is important to perform the {{WPBeatles}} match before
    # {{WPBiography}}, since some of the Beatles articles include
    # a retired WPBiography template in an HTML comment, and that
    # recipe will add the "needs-photo" parameter to the unused template.
    #
    elif re.search(r'{{WPBeatles', text):
        newtext = photoReqPat.sub(r'{{reqphoto|Beatles-related subjects}}', text)
    elif re.search(r'{{WikiProject Plants', text):
        newtext = add_template_param(text, 'WikiProject Plants', 'needs-photo', 'yes')
    else:
        #
        # Find relevant locations
        #
        location = [ ]
        subcat = [ ]

        match = wikiLocationPat.search(text);
        if match:
            location.append(match.group(2))
        if re.search(r'{{WPTR\s*\|', text):
            location.append('Turkey')
        if re.search(r'{{OttawaProject', text):
            location.append('Ottawa')
        if re.search(r'{{Saskatchewanproject', text):
            location.append('Saskatchewan')
        if re.search(r'{{SFBAProject', text):
            location.append('California')
        if re.search(r'{{BRWikiProject', text):
            location.append('Kentucky')
        if re.search(r'{{SG\s*\|', text):
            location.append('Singapore')
        if re.search(r'{{WPCHINA\s*\|', text):
            location.append('China')
        if re.search(r'{{BCproject\s*\|', text):
            location.append('British Columbia')
        if re.search(r'{{WPVN\s*\|', text):
            location.append('Vietnam')
        if re.search(r'{{WPTAIWAN\s*\|', text):
            location.append('Taiwan')
        #
        # Find relevant subjects
        #
        if re.search(r'{{[Aa]lbum\s*[|}]}}', text):
            subcat.append('albums')
        if re.search(r'{{[Aa]rchitecture', text):
            subcat.append('architecture')
        if re.search(r'{{[Bb]eer}}', text):
            subcat.append('food')
        if re.search(r'{{disaster management}}', text):
            subcat.append('disaster management')
        if re.search(r'{{[Ff]ishproject', text) or \
           re.search(r'{{WikiProject aquarium fishes', text):
            subcat.append('fish')
        if re.search(r'{{[Mm]otorcycling', text):
            subcat.append('motorcycles')
        if re.search(r'{{[Pp]ro-wrestling', text):
            subcat.append('sportspeople')
        if re.search(r'{{[Tt]elevisionWikiProject', text) or \
           re.search(r'{{British TV shows project', text):
            subcat.append('television programs')
        if re.search(r'{{[Tt]rainsWikiProject', text):
            subcat.append('transport')
        if re.search(r'{{[Vv]isual arts', text):
            subcat.append('art')
        if re.search(r'{{WikiProject Anime and manga', text):
            subcat.append('anime and manga')
        if re.search(r'{{WikiProject Filmmaking', text):
            subcat.append('filmmaking')
        if re.search(r'{{WikiProject Food and drink', text):
            subcat.append('food')
        if re.search(r'{{WikiProject Textile Arts', text):
            subcat.append('textiles and fabrics')
        if re.search(r'{{WPGUNS', text):
            subcat.append('firearms')
        if re.search(r'{{WPMILHIST', text):
            subcat.append('military history')
        if re.search(r'{{WPMusInst', text):
            subcat.append('musical instruments')
        if re.search(r'{{WPSchools', text):
            subcat.append('schools')

        # Check for WPBiography, which needs special attention
        match = re.search(r'{{WPBiography(.*?)}}', text, re.S)
        if match:
            workgroups = re.findall(r'([\w&]+)-work-group\s*=\s*yes', match.group(1))
            people = [ ]
            for g in workgroups:
                if g == 'politician':
                    people.append('politicians and government-people')
                elif g == 'royalty and nobility':
                    people.append('royalty')
                elif g == 'military':
                    people.append('military-people')
                elif g == 'sports':
                    people.append('sportspeople')
                elif g == 'a&e':
                    people.append('artists and entertainers')
                elif g == 's&a':
                    people.append('scientists and academics')
                elif g == 'musician':
                    people.append('musicians')
                elif g == 'peerage':
                    people.append('peers')
                elif g == 'baronets':
                    people.append('baronets')
                elif g == 'filmbio':
                    people.append('actors and filmmakers')
            if len(people) > 0:
                subcat = subcat + people
            else:
                subcat.append('people')

        # If any location or subject tags were found,
        # replace the reqphoto tag.
        args = ['reqphoto'] + subcat[0:3]
        if len(location) > 0:
            args.append("in=" + location[0])
        if len(location) > 1:
            args.append("in2=" + location[1])
        if len(location) > 2:
            args.append("in3=" + location[2])
        if len(args) > 1:
            newtext = photoReqPat.sub('{{%s}}' % '|'.join(args), text)

    if newtext:
        # remove any remaining *unqualified* photo request template
        newtext = photoReqPat.sub(r'', newtext)
        return newtext

def main():
    site = wikipedia.getSite()
    cat = catlib.Category(site, startCat)
    gen = pagegenerators.CategorizedPageGenerator(cat)
    for page in gen:
        text = page.get()
        newtext = False
        if photoReqPat.search(text):
            newtext = fix_photoreqs(text)
        if newtext:
            try:
                page.put(newtext, editComment)
            except wikipedia.LockedPage:
                pass
            time.sleep(30)

try:
    main()
finally:
    wikipedia.stopme()