User:Drinibot/CapitalizationRedirects
From Wikipedia, the free encyclopedia
The following is based on pywikipedia and was done by the kind es: user Yrithinnd.
# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
You can run the bot with the following commandline parameters:
-xml - Retrieve information from a local XML dump (pages_current, see
http://download.wikimedia.org).
Argument can also be given as "-xml:filename".
-file - Work on all pages given in a local text file.
Will read any [[wiki link]] and use these articles.
Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a single page.
Argument can also be given as "-page:pagename". You can give this
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagename".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning.
NOTE: You are advised to use -xml instead of this option; this is
meant for cases where there is no recent XML dump.
-regex - Make replacements using regular expressions. If this argument
isn't given, the bot will make simple text replacements.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-fix:XYZ - Perform one of the predefined replacements tasks, which are given
in the dictionary 'fixes' defined inside this file.
The -regex argument and given replacements will be ignored if
you use -fix.
Currently available predefined fixes are:
* HTML - convert HTML tags to wiki syntax, and fix XHTML
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g. want to
iterate over all user pages starting at User:M, use
-start:User:M.
-always - Don't prompt you for each replacement
other: - First argument is the old text, second argument is the new text.
If the -regex argument is given, the first argument will be
regarded as a regular expression, and the second argument might
contain expressions like \\1 or \g<name>.
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
new syntax, e.g. {{Stub}}, download an XML dump file (cur table) from
http://download.wikimedia.org, then use this command:
python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"
If you have a dump called foobar.xml and want to fix typos, e.g.
Errror -> Error, use this:
python replace.py -xml:foobar.xml "Errror" "Error"
If you have a page called 'John Doe' and want to convert HTML tags to wiki
syntax, use:
python replace.py -page:John_Doe -fix:HTML
"""
#
# (C) Daniel Herding, 2004
#
# Distributed under the terms of the MIT license.
#
__version__='$Id: replace.py,v 1.87 2006/01/26 19:08:27 leogregianin Exp $'
from __future__ import generators
import sys, re
import wikipedia, pagegenerators, catlib, config
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.
msg = {
'es':u'Robot: Borrado masivo de artÃculos',
}
#
# (C) Yrithinnd
# Class licensed under terms of the MIT license
#
class Drinibot:
def __init__(self, generator, acceptall = False):
self.generator = generator
self.acceptall = False
def run(self):
"""
Starts the robot.
"""
# Run the generator which will yield Pages which might need to be
# changed.
for page in self.generator:
titulo=page.title()
np=wikipedia.Page(wikipedia.getSite(), page.title().capitalize())
if not np.exists():
wikipedia.output(u'%s no existe' % np.title())
if not self.acceptall:
choice = wikipedia.inputChoice(u'Quieres crear la redireccion asociada?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
if choice in ['a', 'A']:
self.acceptall = True
if self.acceptall or choice in ['y', 'Y']:
np.put(u"#REDIRECT [[%s]]" % titulo, u"capitalization redirect")
else:
wikipedia.output(u'%s ya existe\nNo se hace nada' % np.title())
# def put(self, newtext, comment=None, watchArticle = None, minorEdit = True):
def main():
gen = None
# How we want to retrieve information on which pages need to be changed.
# Can either be 'xmldump', 'textfile' or 'userinput'.
source = None
# Array which will collect commandline parameters.
# First element is original text, second element is replacement text.
textfilename = None
# the category name which will be used when source is 'category'.
categoryname = None
# pages which will be processed when the -page parameter is used
pageNames = []
# a page whose referrers will be processed when the -ref parameter is used
referredPageName = None
# will become True when the user presses a ('yes to all') or uses the -always
# commandline paramater.
acceptall = False
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
# Which page to start
startpage = None
# Load default summary message.
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))
# Read commandline parameters.
for arg in sys.argv[1:]:
arg = wikipedia.argHandler(arg, 'replace')
if arg:
if arg.startswith('-file'):
if len(arg) == 5:
textfilename = wikipedia.input(u'Please enter the filename:')
else:
textfilename = arg[6:]
source = 'textfile'
elif arg.startswith('-cat'):
if len(arg) == 4:
categoryname = wikipedia.input(u'Please enter the category name:')
else:
categoryname = arg[5:]
source = 'category'
elif arg.startswith('-page'):
if len(arg) == 5:
pageNames.append(wikipedia.input(u'Which page do you want to chage?'))
else:
pageNames.append(arg[6:])
source = 'singlepage'
elif arg.startswith('-ref'):
if len(arg) == 4:
referredPageName = wikipedia.input(u'Links to which page should be processed?')
else:
referredPageName = arg[5:]
source = 'ref'
elif arg.startswith('-start'):
if len(arg) == 6:
firstPageTitle = wikipedia.input(u'Which page do you want to chage?')
else:
firstPageTitle = arg[7:]
source = 'allpages'
elif arg == '-always':
acceptall = True
elif arg.startswith('-namespace:'):
namespaces.append(int(arg[11:]))
else:
commandline_replacements.append(arg)
if source == 'textfile':
gen = pagegenerators.TextfilePageGenerator(textfilename)
elif source == 'category':
cat = catlib.Category(wikipedia.getSite(), categoryname)
gen = pagegenerators.CategorizedPageGenerator(cat)
elif source == 'singlepage':
pages = [wikipedia.Page(wikipedia.getSite(), pageName) for pageName in pageNames]
gen = iter(pages)
elif source == 'allpages':
namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
gen = pagegenerators.AllpagesPageGenerator(firstPageTitle, namespace)
elif source == 'ref':
referredPage = wikipedia.Page(wikipedia.getSite(), referredPageName)
gen = pagegenerators.ReferringPageGenerator(referredPage)
elif source == None or len(commandline_replacements) not in [0, 2]:
# syntax error, show help text from the top of this file
wikipedia.output(__doc__, 'utf-8')
wikipedia.stopme()
sys.exit()
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
bot = Drinibot(preloadingGen, acceptall)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()

