User:Kotbot/Source/Module
From Wikipedia, the free encyclopedia
< User:Kotbot | Source
(recreating with correct code)
- import all custom utilities
from utes import *
def botlog(la,Art,st):
"""Writes la:Art:st (and newline) to bot log file, returns just st"""
logf=open('botlog.txt','a')
logf.write(la+':'+Art+':'+st+'\n'); logf.close()
return st
def unders(st):
"""Replaces all spaces in st by underscores (for use in http address)"""
return st.replace(' ','_')
def getval(form,name):
"""Find the given value of a named imput in an HTML form"""
"""Returns 'KBfailKB' if not found"""
val='KBfailKB'; n=sf(form,'name="'+name+'"')
m=sflastr(form,n,'<') #go back to find start of tag
m0=sfrj(form,m,'value="') #find start of value
m1=sfr(form,m0,'"') #find end of value
if m1>-1: #if everything was found:
val=form[m0:m1] #get value
return val
def mup(html):
"""Marks up &#ref and &ref; chars. in an html string without tags""" global MuPsTr; MuPsTr=[] #output string as 1-list import sgmllib pa=sgmllib.SGMLParser() pa.handle_data=writetoMuPsTr #how to handle input pa.feed(html) #read and handle input pa.close() return MuPsTr[0]
def writetoMuPsTr(s):
"""Appends a string to sole element of the global list MuPsTr""" """Used in mup() as the data handler""" MuPsTr[0]=MuPsTr[0]+s return
def doreq(url,dl=):
"""Makes an HTTP request and sends it; supplies and extracts cookies"""
"""Returns the response html content, or 'KBerrKB on failure"""
"""dl is the optional data list (of key/value pairs) for a POST"""
"""The global cookie jar is botjar"""
import urllib, urllib2, cookielib
req=urllib2.Request(url) #make Request object
botjar.add_cookie_header(req) #add relevant cookies
req.add_header('User-agent','Agent.Kotbot') #add User Agent header
if dl!=:
req.add_data(urllib.urlencode(dl)) #add data (changes type to POST)
while 0==0:
try:
u=urllib2.urlopen(req) #send the request
botjar.extract_cookies(u,req) #extract cookies from response
html=u.read() #get content from response
u.close()
return html
except: #if error, retry
pass
def php(la):
"""Returns main part of generic php access string to Wikipedia"""
#la is the language: 'pl' or 'en'
return 'http://'+la+'.wikipedia.org/w/index.php?title='
def login(*las):
"""Initializes global cookiejar, and logs in as the bot"""
"""Languages (las...) can include 'en' and/or 'pl'"""
"""Returns list of responses: login success page html or 'KBerrKB' """
import cookielib
global botjar
botjar=cookielib.CookieJar() #makes global cookie jar
ol=[] #the return list
for la in las: #now for each requested language:
ur0=php(la)+'Special:Userlogin'
doreq(ur0) #get login page
ur1=ur0+'&action=submitlogin&type=login'
dl1=[('wpName','Kotbot'),('wpPassword','*****'),
('wpRemember','0'),('wpLoginattempt','Log+in')]
ol.append(doreq(ur1,dl1)) #submit form and add response to return list
return ol
def read(la,Art):
"""Reads current raw content of Wikipedia page"""
ur0=php(la)+unders(Art)+'&action=raw' #make edit page url
raw=doreq(ur0) #get raw content
if raw=='KBerrKB':
return botlog(la,Art,'*NETFAULTREADINGRAW')
botlog(la,Art,'*NORAWTOREAD' if raw== else 'READRAW')
return raw
def edit(la,Art,instr,*params):
"""Edits Wikipedia page according to instruction (and parameters)"""
""" la is the language code, Art is the page name"""
""" instr is the instruction (the function 'edit_'instr will be called)"""
"""Confirms if edit made (adds result to return message)"""
"""Botlogs la:Art:message, returns message and any output"""
ur0=php(la)+unders(Art)+'&action=edit' #make edit page url
html=doreq(ur0) #get edit page
if html=='KBerrKB':
return botlog(la,Art,'*NETWORKFAULT1'),
if sf(html,'Kotbot')<0:
return botlog(la,Art,'*NOTLOGGEDIN'),
nn=sf(html,'name="wpTextbox1"') #find tag for text area
m1=sfrj(html,nn,'>') #find start of text area (after tag)
m2=sfr(html,m1,'</textarea>') #find end of text area
if m2<0:
return botlog(la,Art,'*BADEDITPAGE'+html),
old=mup(html[m1:m2]) #get and mark up content of text area
#(this will be the empty string if the page does not yet exist)
FUNC=eval('edit_'+instr) #function to be called
#now call the function and get new text, edit summary, message and any output
newtext,summ,mess,outp=FUNC(la,Art,old,*params)
if newtext==: #end if no new text provided
return botlog(la,Art,mess),outp
#now make data list (lp) for POST request
lp=[]; n=sf(html,'id="editform"') #find edit form
for a in ['wpSection','wpStarttime','wpEdittime','wpScrolltop']:
val=getval(html[n:],a) #find each named value on form
lp=lp+[(a,val)] #add key/value pair to list
lp=lp+[('wpTextbox1',newtext)]+[('wpSummary',summ)] \
+[('wpSave','Save page')] #add more pairs (edit text&summary)
for a in ['wpEditToken','wpAutoSummary']:
val=getval(html[n:],a) #more values from form
lp=lp+[(a,val)]
#now submit the form
ur1=php(la)+unders(Art)+'&action=submit' #make the url
htmr=doreq(ur1,lp) #submit the POST request, get response
if htmr=='KBerrKB':
return botlog(la,Art,'*NETWORKFAULT2'),
#check if the edit was successful
ur2=php(la)+unders(Art)+'&action=raw'
now=doreq(ur2) #this is the page text now
conf='[OK]' if now==newtext or now+'\n'==newtext else '[*NOCONFIRM]'
return botlog(la,Art,mess+conf),outp #log and return appropriate info
- Each edit_xx function must take as arguments:
- la (lang. code), Art (page name), old (old page text or empty), params...
- It must return:
- new page text (empty=no edit needed), edit summary, message, output
-
- def edit_vcorr(la,Art,old,pt,gt):
- """Corrects village geobox"""
- if not starts(old,'{{Geobox'):
- return ,,'*NOGEOBOX',
- boxend=sfj(old,'}}'); obox=old[:boxend]; rest=old[boxend:]
- zs=fldval(obox,'state')
- if zs!=:
- vn=target(zs); pn=target(fldval(obox,'region'))
- gn=target(fldval(obox,'district'))
- else:
- vn=target(fldval(obox,'region')); pn=target(fldval(obox,'district'))
- gn=target(fldval(obox,'municipality'))
- if not (ends(vn,'eship') and ends(pn,'County') and starts(gn,'Gmina')):
- return ,,'*REGIONNAMESWRONG'+vn+pn+gn,
- pop=fldval(obox,'population'); pnote=fldval(obox,'population_note')
- c0=fldval(obox,'lat_d'); c1=fldval(obox,'lat_m'); c2=fldval(obox,'lat_s')
- c3=fldval(obox,'lat_NS'); c4=fldval(obox,'long_d');c5=fldval(obox,'long_m');
- c6=fldval(obox,'long_s');c7=fldval(obox,'long_EW')
- coa=fldval(obox,'symbol'); flag=fldval(obox,'flag'); mapp=fldval(obox,'map')
- ele=fldval(obox,'elevation'); ell=fldval(obox,'lowest_elevation')
- elh=fldval(obox,'lowest_elevation'); web=fldval(obox,'website')
- nbox=vbox0(nam(Art),flag,coa,vn,pn,pt,gn,gt,ele,c0,c1,c2,c3,c4,c5,c6,c7,
- elh,ell,pop,pnote!=,mapp,web)
- mess='CORRECTEDBOX' if coa+flag+mapp+ell+elh+web== else '*BOXBUTCHECK'
- return nbox+rest,'bot standardizing Geobox',mess,
- def vbox0(nam,flag,coa,vn,pn,pt,gn,gt,ele,c0,c1,c2,c3,c4,c5,c6,c7,elh,ell,
- pop,popa,mapp,web):
- """Makes Geobox for current village"""
- l1=['Geobox}','Settlement',,('name',nam),
- ('other_name',),('category','Village'),,
- ('etymology',),('official_name',),('motto',),('nickname',),
- ,('image',),('image_caption',),
- ,('flag',flag),('symbol',coa),
- ('symbol_type','Coat of arms'),,
- ('country','Poland'),('country_flag','true'),('region_type',
- 'Voivodeship'),('region',
- esslinkto(vn)),('district_type','County'),
- ('district',esslinkto(pn))]
- l2=[('district_note','(land county)')] if pt=='Le' else []
- l3=[('municipality_type','Gmina'),('municipality',esslinkto(gn))]
- l4=[('municipality_note','(rural gmina)')] if gt=='re' else []
- l5=[,('part',),('landmark',),('river',),
- ,('location',),('elevation',ele),
- ('prominence',),('lat_d',c0,),('lat_m ',c1,),
- ('lat_s',c2,),('lat_NS',c3),('long_d',c4,),
- ('long_m',c5,),('long_s',c6,),('long_EW',c7),
- ('highest',),('highest_location',),('highest_elevation',elh),
- ('highest_lat_d',,),('highest_lat_m',,),('highest_lat_s',,),
- ('highest_lat_NS',),('highest_long_d',,),('highest_long_m',,),
- ('highest_long_s',,),('highest_long_EW',),('lowest',),
- ('lowest_location',),('lowest_elevation',ell),('lowest_lat_d',,),
- ('lowest_lat_m',,),('lowest_lat_s',,),('lowest_lat_NS',),
- ('lowest_long_d',,),('lowest_long_m',,),('lowest_long_s',,),
- ('lowest_long_EW',),,('length',,),
- ('length_orientation',),('width',,),('width_orientation',),
- ('area',),('area_land',),('area_water',),
- ,('population',pop,)]
- l6=[('population_note','(approximate)',)] if popa else []
- l7=[('population_date',),('population_density',),
- ,('established',),
- ('date',),('government',),('government_location',),
- ('government_elevation',),('government_lat_d',,),
- ('government_lat_m',,),('government_lat_s',,),
- ('government_lat_NS',),('government_long_d',,),
- ('government_long_m',,),('government_long_s',,),
- ('government_long_EW',),('mayor',),('leader',),
- ,
- ('timezone','CET'),
- ('utc_offset','+1'),
- ('timezone_DST','CEST'),
- ('utc_offset_DST','+2'),('postal_code',),('area_code',),('code',),
- ,('whs_name',),('whs_year',),
- ('whs_number',),('whs_region',),('whs_criteria',),
- ('iucn_category',),,('free',),
- ('free_type',),,('map',mapp),
- ('map_caption',),('map_background',),('map_locator',),
- ,
- ('commons',),('statistics',),('website',web),
- ,('footnotes',)]
- return l2tem(l1+l2+l3+l4+l5+l6+l7)
- def edit_gmcorr(la,Art,old,pt):
- """Corrects gmina geobox"""
- if not starts(old,'{{Geobox'):
- return ,,'*NOGEOBOX',
- boxend=sfj(old,'}}'); obox=old[:boxend]; rest=old[boxend:]
- zs=fldval(obox,'state')
- if zs!=:
- vn=target(zs); pn=target(fldval(obox,'region'))
- else:
- vn=target(fldval(obox,'region')); pn=target(fldval(obox,'district'))
- if not (ends(vn,'eship') and ends(pn,'County')):
- return ,,'*REGIONNAMESWRONG'+vn+pn+gn,
- area=fldval(obox,'area')
- pop=fldval(obox,'population')
- #urban pop
- seat=fldval(obox,'location')
- partk=fldval(obox,'part'); n=1; partl=[]
- while partk!=:
- partl.append(unlink(partk))
- partk=fldval(obox,'part'+str(n)); n=n+1
- c0=fldval(obox,'lat_d'); c1=fldval(obox,'lat_m'); c2=fldval(obox,'lat_s')
- c3=fldval(obox,'lat_NS'); c4=fldval(obox,'long_d');c5=fldval(obox,'long_m');
- c6=fldval(obox,'long_s');c7=fldval(obox,'long_EW')
- coa=fldval(obox,'symbol'); flag=fldval(obox,'flag'); mapp=fldval(obox,'map')
- web=fldval(obox,'website')
- nbox=gmbox0(essnam(Art),flag,coa,vn,pn,pt,partl,seat,
- c0,c1,c2,c3,c4,c5,c6,c7,area,pop,mapp,web)
- mess='CORRECTEDBOX' if coa+flag+mapp+web== else '*BOXBUTCHECK'
- return nbox+rest,'bot standardizing Geobox',mess,
- def gmbox0(nam,flag,coa,vn,pn,pt,partl,seat,c0,c1,c2,c3,c4,c5,c6,c7,area
- pop,mapp,web):
- """Makes Geobox for current gmina"""
- l1=['Geobox}','Region',,('name',nam),
- ('category','Commune'),('native_category','Gmina'),
- ,
- ('etymology',),('official_name','Gmina '+nam),('motto',),
- ('nickname',),
- ,('image',),('image_caption',),
- ,('flag',flag),('symbol',coa),
- ('symbol_type','Coat of arms'),,
- ('country','Poland'),('country_flag','true'),('region_type',
- 'Voivodeship'),('region',
- esslinkto(vn)),('district_type','County'),
- ('district',esslinkto(pn))]
- l2=[('district_note','(land county)')] if pt=='Le' else []
- l3=[,('border',),
- ('part_type','So\xc5\x82ectwos')]
- l4=[('part',)] if len(partl)==0 else [('part',partl[0])]
- while n in range(1,len(partl)):
- l4.append(('part'+n,partl[n]))
- l5=[('city',),('landmark',),('river',),,
- ('location_type','Seat'),('location',seat),('elevation',),
- ('prominence',),('lat_d',c0,),('lat_m ',c1,),
- ('lat_s',c2,),('lat_NS',c3),('long_d',c4,),
- ('long_m',c5,),('long_s',c6,),('long_EW',c7),
- ('highest',),('highest_location',),('highest_elevation',),
- ('highest_lat_d',,),('highest_lat_m',,),('highest_lat_s',,),
- ('highest_lat_NS',),('highest_long_d',,),('highest_long_m',,),
- ('highest_long_s',,),('highest_long_EW',),('lowest',),
- ('lowest_location',),('lowest_elevation',),('lowest_lat_d',,),
- ('lowest_lat_m',,),('lowest_lat_s',,),('lowest_lat_NS',),
- ('lowest_long_d',,),('lowest_long_m',,),('lowest_long_s',,),
- ('lowest_long_EW',),,('length',,),
- ('length_orientation',),('width',,),('width_orientation',),
- ('area',area),('area_land',),('area_water',),
- ,('population',pop,)]
- l6=[('population_date','2006'),('population_density','auto'),
- ,('established',),
- ('date',),('government',),('government_location',),
- ('government_elevation',),('government_lat_d',,),
- ('government_lat_m',,),('government_lat_s',,),
- ('government_lat_NS',),('government_long_d',,),
- ('government_long_m',,),('government_long_s',,),
- ('government_long_EW',),('mayor',),('leader',),
- ,
- ('timezone','CET'),
- ('utc_offset','+1'),
- ('timezone_DST','CEST'),
- ('utc_offset_DST','+2'),('postal_code',),('area_code',),('code',),
- ,('whs_name',),('whs_year',),
- ('whs_number',),('whs_region',),('whs_criteria',),
- ('iucn_category',),,('free',),
- ('free_type',),,('map',mapp),
- ('map_caption',),('map_background',),('map_locator',),
- ,('website',web),
- ,('footnotes',)]
- return l2tem(l1+l2+l3+l4+l5+l6)
def edit_Read(la,Art,old):
"""Instruction Read just reads the existing text without changing it"""
if old!=:
return ,,'*ARTDOESNOTEXIST',
return ,,'ONLYREAD',old
def edit_New(la,Art,old,text):
"""Instruction 'New' makes new page (if it does not yet exist)"""
"""Parameter: text (the text to be placed on the new page)"""
if old!=:
return ,,'*ARTEXISTS',
summ='bot:utw' if la=='pl' else 'bot creating page'
return text,summ,'CREATED',
def edit_Force(la,Art,old,text):
"""Instruction 'Force' makes new page or replaces existing."""
"""Parameter: text (the text to be placed on the new page)"""
summ='bot:edycja' if la=='pl' else 'bot: standardizing'
return text,summ,('CREATED' if old== else 'REPLACEDARTICLE'),
def edit_Subst(la,Art,old,a,b):
"""Instruction 'Subst' replaces first occur of a with b"""
n=sf(old,a)
if n<0:
return ,,'*NOTHINGTOREPLACE',
summ='bot:zastep' if la=='pl' else 'bot changing category'
text=old[:n]+b+old[n+len(a):]
return text,summ,'REPLACED',
def edit_Offer(la,Art,old,text):
"""Instruction 'Offer' makes new page or returns offered text to log"""
if old!=:
return ,,'*EXISTSSODIDNTADD--'+text,
summ='bot:utw' if la=='pl' else 'bot creating page'
return text,summ,'CREATED',
def edit_Redir(la,Art,old,redto):
"""'Redir' makes a redirect to redto"""
if old==:
return '#REDIRECT '+redto+'','bot redirect','REDIRECTED',
return ,,'ALREADYREDIR' if redtarg(old)==redto \
else '*CANTREDIRCOSEXISTS',
def edit_IW(la,Art,old,link):
"""Instr. 'IW' adds/replaces an interwiki link (unless already there)"""
"""Also returns: the complete new text"""
if old==:
return ,,'*NOSUCHARTFORIW',
olink=; inpt=len(old) #found link; input point
on=1; n=0 #flag (off during comments); search cursor
while n<len(old):
if on==1:
if starts(old[n:],):
on=1; n=n+2 #comment ends
n=n+1
if on==0 and inpt==len(old):
old=old+'-->'; inpt=inpt+3 #closes hanging comment
if olink!=:
old=old[:nn]+old[mm+2:] #removes existing link
if inpt>nn:
inpt=inpt-(mm+2-nn) #(and adjusts input point if necessary)
if old[inpt-1]=='\n':
new=old[:inpt]+''+link+'\n'+old[inpt:] #inserting new link
else: #(ensuring newline before)
new=old[:inpt]+'\n'+link+'\n'+old[inpt:]
if olink!=:
mess='SUBST'+link+'FOR'+olink
summ='bot:zast.link '+link[:2] if la=='pl' else 'bot replacing iw link'
else:
mess='ADDED'+link+''
summ='bot:dod.link '+link[:2] if la=='pl' else 'bot adding iw link'
return new,summ,mess,new
def edit_IWN(la,Art,old,link):
"""Instr. 'IW' adds an interwiki link (unless already there)"""
"""Also returns: the complete new text"""
if old==:
return ,,'*NOSUCHARTFORIW',
inpt=len(old) #input point
on=1; n=0 #flag (off during comments); search cursor
while n<len(old):
if on==1:
if starts(old[n:],):
on=1; n=n+2 #comment ends
n=n+1
if on==0 and inpt==len(old):
old=old+'-->'; inpt=inpt+3 #closes hanging comment
if old[inpt-1]=='\n':
new=old[:inpt]+''+link+'\n'+old[inpt:] #inserting new link
else: #(ensuring newline before)
new=old[:inpt]+'\n'+link+'\n'+old[inpt:]
if olink!=:
mess='SUBST'+link+'FOR'+olink
summ='bot:zast.link '+link[:2] if la=='pl' else 'bot replacing iw link'
else:
mess='ADDED'+link+''
summ='bot:dod.link '+link[:2] if la=='pl' else 'bot adding iw link'
return new,summ,mess,new
def edit_Dis(la,Art,old,linex):
"""Instr. 'Dis' adds a line to a Polish geog disambig. page"""
"""Creates page if necessary"""
"""Includes/removes line linking to Polish Wikipedia"""
line = linex[1:] if starts(linex,'+') else linex
name=upto(Art,'(disamb')
if old==: #if no page exists, create it (with temporary second link)
if line!=linex:
return ,,'*NOPAGEWDISTAG',
new=""+name+" may refer to the following places in Poland:"+\
'\n*'+line+'\n*Other places called '+name+\
" (listed in Polish Wikipedia)\n\n
\n"
summ='bot creating disambiguation page'; mess='NEWDISAMB'
else:
op1=sf(old,'*[[:pl:'); op2=sfrj(old,op1,'\n')
if op2>-1:
old=old[:op1]+old[op2:] #remove any temporary link
mayref=sf(old,'may refer')
np=sfr(old,mayref,'[[Poland'); nd=sfr(old,np,'
')
if '
For other places with the same name, see Kotbot/Source/Module (disambiguation).
' in old or Art+' (disambiguation)' in old:
return ,,'*TRYPAGEWDISTAG',
if nd<0:
return ,,'*NOTPOLGEODISFOR:'+line,
nc2=sf(line,'County'); nc1=sflastr(line,nc2,('[',','))
cy=noinitsp(line[nc1+1:nc2+6]) #the powiat in the new line
ng1=sf(line,'Gmina'); ng2=sfr(line,ng1,('|',']',','))
gm=notrailsp(line[ng1:ng2]) #the gmina in the new line
if (cy in old and ', Gmina' not in line) or (gm!= and gm in old and
gm[6:]!=name): #if equivalent line seems to be there already:
return ,,'*LINEALREADYTHERE',
i1=np; inpt=0
while i1>-1 and i1<=nd: #now find insert point
i1=sfrj(old,i1,'\n')
if i1>-1 and i1<=nd and old[i1]!='*':
inpt=i1; i1=-1
else:
i2=sfr(old,i1,'\n'); nextline=old[i1:i2]
if i2<0 or i2>nd or ('Gmina' not in nextline and ('County'
not in nextline or 'Voivod' not in nextline) and
(('town' not in nextline and 'city' not in nextline and
'river' not in nextline) or ('Voivod' not in nextline
and 'Poland' not in nextline))):
inpt=i1; i1=-1
if inpt==0:
return ,,'*BADPAGEFORMAT',
new=old[:inpt]+'*'+line+'\n'+old[inpt:]
summ='bot adding to disambiguation page'; mess='ADDEDTODISAMB'
return new,summ,mess,

