User:Kotbot/Source/Module

From Wikipedia, the free encyclopedia

(recreating with correct code)

  1. import all custom utilities

from utes import *

def botlog(la,Art,st):

   """Writes la:Art:st (and newline) to bot log file, returns just st"""
   logf=open('botlog.txt','a')
   logf.write(la+':'+Art+':'+st+'\n'); logf.close()
   return st

def unders(st):

   """Replaces all spaces in st by underscores (for use in http address)"""
   return st.replace(' ','_')

def getval(form,name):

   """Find the given value of a named imput in an HTML form"""
   """Returns 'KBfailKB' if not found"""
   val='KBfailKB'; n=sf(form,'name="'+name+'"')
   m=sflastr(form,n,'<')                  #go back to find start of tag
   m0=sfrj(form,m,'value="')              #find start of value
   m1=sfr(form,m0,'"')                    #find end of value
   if m1>-1:                              #if everything was found:
       val=form[m0:m1]                        #get value
   return val       

def mup(html):

   """Marks up &#ref and &ref; chars. in an html string without tags"""
   global MuPsTr; MuPsTr=[]   #output string as 1-list
   import sgmllib
   pa=sgmllib.SGMLParser()
   pa.handle_data=writetoMuPsTr #how to handle input
   pa.feed(html)                #read and handle input
   pa.close()
   return MuPsTr[0]

def writetoMuPsTr(s):

   """Appends a string to sole element of the global list MuPsTr"""
   """Used in mup() as the data handler"""
   MuPsTr[0]=MuPsTr[0]+s
   return

def doreq(url,dl=):

   """Makes an HTTP request and sends it; supplies and extracts cookies"""
   """Returns the response html content, or 'KBerrKB on failure"""
   """dl is the optional data list (of key/value pairs) for a POST"""
   """The global cookie jar is botjar"""
   import urllib, urllib2, cookielib
   req=urllib2.Request(url)                    #make Request object
   botjar.add_cookie_header(req)               #add relevant cookies
   req.add_header('User-agent','Agent.Kotbot') #add User Agent header
   if dl!=:
       req.add_data(urllib.urlencode(dl))     #add data (changes type to POST)
   while 0==0:
       try:
           u=urllib2.urlopen(req)                 #send the request
           botjar.extract_cookies(u,req)       #extract cookies from response
           html=u.read()                       #get content from response
           u.close()
           return html
       except:                                     #if error, retry
           pass

def php(la):

   """Returns main part of generic php access string to Wikipedia"""
       #la is the language: 'pl' or 'en'
   return 'http://'+la+'.wikipedia.org/w/index.php?title='

def login(*las):

   """Initializes global cookiejar, and logs in as the bot"""
   """Languages (las...) can include 'en' and/or 'pl'"""
   """Returns list of responses: login success page html or 'KBerrKB' """
   import cookielib
   global botjar
   botjar=cookielib.CookieJar()            #makes global cookie jar
   ol=[]                                   #the return list
   for la in las:                          #now for each requested language:
       ur0=php(la)+'Special:Userlogin'
       doreq(ur0)                                    #get login page
       ur1=ur0+'&action=submitlogin&type=login'
       dl1=[('wpName','Kotbot'),('wpPassword','*****'), 
            ('wpRemember','0'),('wpLoginattempt','Log+in')]
       ol.append(doreq(ur1,dl1)) #submit form and add response to return list
   return ol

def read(la,Art):

   """Reads current raw content of Wikipedia page"""
   ur0=php(la)+unders(Art)+'&action=raw' #make edit page url
   raw=doreq(ur0)                        #get raw content
   if raw=='KBerrKB':
       return botlog(la,Art,'*NETFAULTREADINGRAW')
   botlog(la,Art,'*NORAWTOREAD' if raw== else 'READRAW')
   return raw

def edit(la,Art,instr,*params):

   """Edits Wikipedia page according to instruction (and parameters)"""
   """   la is the language code, Art is the page name"""
   """   instr is the instruction (the function 'edit_'instr will be called)"""
   """Confirms if edit made (adds result to return message)"""
   """Botlogs la:Art:message, returns message and any output"""
   ur0=php(la)+unders(Art)+'&action=edit' #make edit page url
   html=doreq(ur0)                        #get edit page
   if html=='KBerrKB':
       return botlog(la,Art,'*NETWORKFAULT1'),
   if sf(html,'Kotbot')<0:
       return botlog(la,Art,'*NOTLOGGEDIN'),
   nn=sf(html,'name="wpTextbox1"')    #find tag for text area
   m1=sfrj(html,nn,'>')               #find start of text area (after tag)
   m2=sfr(html,m1,'</textarea>')    #find end of text area
   if m2<0:
       return botlog(la,Art,'*BADEDITPAGE'+html),
   old=mup(html[m1:m2])                 #get and mark up content of text area
   #(this will be the empty string if the page does not yet exist)
   FUNC=eval('edit_'+instr)            #function to be called
#now call the function and get new text, edit summary, message and any output
   newtext,summ,mess,outp=FUNC(la,Art,old,*params)
   if newtext==:                     #end if no new text provided 
       return botlog(la,Art,mess),outp
#now make data list (lp) for POST request
   lp=[]; n=sf(html,'id="editform"')  #find edit form
   for a in ['wpSection','wpStarttime','wpEdittime','wpScrolltop']:
       val=getval(html[n:],a)            #find each named value on form
       lp=lp+[(a,val)]                   #add key/value pair to list
   lp=lp+[('wpTextbox1',newtext)]+[('wpSummary',summ)] \
           +[('wpSave','Save page')]     #add more pairs (edit text&summary) 
   for a in ['wpEditToken','wpAutoSummary']:
       val=getval(html[n:],a)            #more values from form
       lp=lp+[(a,val)]
#now submit the form
   ur1=php(la)+unders(Art)+'&action=submit'         #make the url
   htmr=doreq(ur1,lp)                   #submit the POST request, get response
   if htmr=='KBerrKB':
       return botlog(la,Art,'*NETWORKFAULT2'),
#check if the edit was successful
   ur2=php(la)+unders(Art)+'&action=raw'
   now=doreq(ur2)                          #this is the page text now
   conf='[OK]' if now==newtext or now+'\n'==newtext else '[*NOCONFIRM]'
   return botlog(la,Art,mess+conf),outp    #log and return appropriate info 
  1. Each edit_xx function must take as arguments:
  2. la (lang. code), Art (page name), old (old page text or empty), params...
  3. It must return:
  4. new page text (empty=no edit needed), edit summary, message, output
    1. def edit_vcorr(la,Art,old,pt,gt):
    2. """Corrects village geobox"""
    3. if not starts(old,'{{Geobox'):
    4. return ,,'*NOGEOBOX',
    5. boxend=sfj(old,'}}'); obox=old[:boxend]; rest=old[boxend:]
    6. zs=fldval(obox,'state')
    7. if zs!=:
    8. vn=target(zs); pn=target(fldval(obox,'region'))
    9. gn=target(fldval(obox,'district'))
    10. else:
    11. vn=target(fldval(obox,'region')); pn=target(fldval(obox,'district'))
    12. gn=target(fldval(obox,'municipality'))
    13. if not (ends(vn,'eship') and ends(pn,'County') and starts(gn,'Gmina')):
    14. return ,,'*REGIONNAMESWRONG'+vn+pn+gn,
    15. pop=fldval(obox,'population'); pnote=fldval(obox,'population_note')
    16. c0=fldval(obox,'lat_d'); c1=fldval(obox,'lat_m'); c2=fldval(obox,'lat_s')
    17. c3=fldval(obox,'lat_NS'); c4=fldval(obox,'long_d');c5=fldval(obox,'long_m');
    18. c6=fldval(obox,'long_s');c7=fldval(obox,'long_EW')
    19. coa=fldval(obox,'symbol'); flag=fldval(obox,'flag'); mapp=fldval(obox,'map')
    20. ele=fldval(obox,'elevation'); ell=fldval(obox,'lowest_elevation')
    21. elh=fldval(obox,'lowest_elevation'); web=fldval(obox,'website')
    22. nbox=vbox0(nam(Art),flag,coa,vn,pn,pt,gn,gt,ele,c0,c1,c2,c3,c4,c5,c6,c7,
    23. elh,ell,pop,pnote!=,mapp,web)
    24. mess='CORRECTEDBOX' if coa+flag+mapp+ell+elh+web== else '*BOXBUTCHECK'
    25. return nbox+rest,'bot standardizing Geobox',mess,
    26. def vbox0(nam,flag,coa,vn,pn,pt,gn,gt,ele,c0,c1,c2,c3,c4,c5,c6,c7,elh,ell,
    27. pop,popa,mapp,web):
    28. """Makes Geobox for current village"""
    29. l1=['Geobox}','Settlement',,('name',nam),
    30. ('other_name',),('category','Village'),,
    31. ('etymology',),('official_name',),('motto',),('nickname',),
    32. ,('image',),('image_caption',),
    33. ,('flag',flag),('symbol',coa),
    34. ('symbol_type','Coat of arms'),,
    35. ('country','Poland'),('country_flag','true'),('region_type',
    36. 'Voivodeship'),('region',
    37. esslinkto(vn)),('district_type','County'),
    38. ('district',esslinkto(pn))]
    39. l2=[('district_note','(land county)')] if pt=='Le' else []
    40. l3=[('municipality_type','Gmina'),('municipality',esslinkto(gn))]
    41. l4=[('municipality_note','(rural gmina)')] if gt=='re' else []
    42. l5=[,('part',),('landmark',),('river',),
    43. ,('location',),('elevation',ele),
    44. ('prominence',),('lat_d',c0,),('lat_m ',c1,),
    45. ('lat_s',c2,),('lat_NS',c3),('long_d',c4,),
    46. ('long_m',c5,),('long_s',c6,),('long_EW',c7),
    47. ('highest',),('highest_location',),('highest_elevation',elh),
    48. ('highest_lat_d',,),('highest_lat_m',,),('highest_lat_s',,),
    49. ('highest_lat_NS',),('highest_long_d',,),('highest_long_m',,),
    50. ('highest_long_s',,),('highest_long_EW',),('lowest',),
    51. ('lowest_location',),('lowest_elevation',ell),('lowest_lat_d',,),
    52. ('lowest_lat_m',,),('lowest_lat_s',,),('lowest_lat_NS',),
    53. ('lowest_long_d',,),('lowest_long_m',,),('lowest_long_s',,),
    54. ('lowest_long_EW',),,('length',,),
    55. ('length_orientation',),('width',,),('width_orientation',),
    56. ('area',),('area_land',),('area_water',),
    57. ,('population',pop,)]
    58. l6=[('population_note','(approximate)',)] if popa else []
    59. l7=[('population_date',),('population_density',),
    60. ,('established',),
    61. ('date',),('government',),('government_location',),
    62. ('government_elevation',),('government_lat_d',,),
    63. ('government_lat_m',,),('government_lat_s',,),
    64. ('government_lat_NS',),('government_long_d',,),
    65. ('government_long_m',,),('government_long_s',,),
    66. ('government_long_EW',),('mayor',),('leader',),
    67. ,
    68. ('timezone','CET'),
    69. ('utc_offset','+1'),
    70. ('timezone_DST','CEST'),
    71. ('utc_offset_DST','+2'),('postal_code',),('area_code',),('code',),
    72. ,('whs_name',),('whs_year',),
    73. ('whs_number',),('whs_region',),('whs_criteria',),
    74. ('iucn_category',),,('free',),
    75. ('free_type',),,('map',mapp),
    76. ('map_caption',),('map_background',),('map_locator',),
    77. ,
    78. ('commons',),('statistics',),('website',web),
    79. ,('footnotes',)]
    80. return l2tem(l1+l2+l3+l4+l5+l6+l7)
    81. def edit_gmcorr(la,Art,old,pt):
    82. """Corrects gmina geobox"""
    83. if not starts(old,'{{Geobox'):
    84. return ,,'*NOGEOBOX',
    85. boxend=sfj(old,'}}'); obox=old[:boxend]; rest=old[boxend:]
    86. zs=fldval(obox,'state')
    87. if zs!=:
    88. vn=target(zs); pn=target(fldval(obox,'region'))
    89. else:
    90. vn=target(fldval(obox,'region')); pn=target(fldval(obox,'district'))
    91. if not (ends(vn,'eship') and ends(pn,'County')):
    92. return ,,'*REGIONNAMESWRONG'+vn+pn+gn,
    93. area=fldval(obox,'area')
    94. pop=fldval(obox,'population')
    95. #urban pop
    96. seat=fldval(obox,'location')
    97. partk=fldval(obox,'part'); n=1; partl=[]
    98. while partk!=:
    99. partl.append(unlink(partk))
    100. partk=fldval(obox,'part'+str(n)); n=n+1
    101. c0=fldval(obox,'lat_d'); c1=fldval(obox,'lat_m'); c2=fldval(obox,'lat_s')
    102. c3=fldval(obox,'lat_NS'); c4=fldval(obox,'long_d');c5=fldval(obox,'long_m');
    103. c6=fldval(obox,'long_s');c7=fldval(obox,'long_EW')
    104. coa=fldval(obox,'symbol'); flag=fldval(obox,'flag'); mapp=fldval(obox,'map')
    105. web=fldval(obox,'website')
    106. nbox=gmbox0(essnam(Art),flag,coa,vn,pn,pt,partl,seat,
    107. c0,c1,c2,c3,c4,c5,c6,c7,area,pop,mapp,web)
    108. mess='CORRECTEDBOX' if coa+flag+mapp+web== else '*BOXBUTCHECK'
    109. return nbox+rest,'bot standardizing Geobox',mess,
    110. def gmbox0(nam,flag,coa,vn,pn,pt,partl,seat,c0,c1,c2,c3,c4,c5,c6,c7,area
    111. pop,mapp,web):
    112. """Makes Geobox for current gmina"""
    113. l1=['Geobox}','Region',,('name',nam),
    114. ('category','Commune'),('native_category','Gmina'),
    115. ,
    116. ('etymology',),('official_name','Gmina '+nam),('motto',),
    117. ('nickname',),
    118. ,('image',),('image_caption',),
    119. ,('flag',flag),('symbol',coa),
    120. ('symbol_type','Coat of arms'),,
    121. ('country','Poland'),('country_flag','true'),('region_type',
    122. 'Voivodeship'),('region',
    123. esslinkto(vn)),('district_type','County'),
    124. ('district',esslinkto(pn))]
    125. l2=[('district_note','(land county)')] if pt=='Le' else []
    126. l3=[,('border',),
    127. ('part_type','So\xc5\x82ectwos')]
    128. l4=[('part',)] if len(partl)==0 else [('part',partl[0])]
    129. while n in range(1,len(partl)):
    130. l4.append(('part'+n,partl[n]))
    131. l5=[('city',),('landmark',),('river',),,
    132. ('location_type','Seat'),('location',seat),('elevation',),
    133. ('prominence',),('lat_d',c0,),('lat_m ',c1,),
    134. ('lat_s',c2,),('lat_NS',c3),('long_d',c4,),
    135. ('long_m',c5,),('long_s',c6,),('long_EW',c7),
    136. ('highest',),('highest_location',),('highest_elevation',),
    137. ('highest_lat_d',,),('highest_lat_m',,),('highest_lat_s',,),
    138. ('highest_lat_NS',),('highest_long_d',,),('highest_long_m',,),
    139. ('highest_long_s',,),('highest_long_EW',),('lowest',),
    140. ('lowest_location',),('lowest_elevation',),('lowest_lat_d',,),
    141. ('lowest_lat_m',,),('lowest_lat_s',,),('lowest_lat_NS',),
    142. ('lowest_long_d',,),('lowest_long_m',,),('lowest_long_s',,),
    143. ('lowest_long_EW',),,('length',,),
    144. ('length_orientation',),('width',,),('width_orientation',),
    145. ('area',area),('area_land',),('area_water',),
    146. ,('population',pop,)]
    147. l6=[('population_date','2006'),('population_density','auto'),
    148. ,('established',),
    149. ('date',),('government',),('government_location',),
    150. ('government_elevation',),('government_lat_d',,),
    151. ('government_lat_m',,),('government_lat_s',,),
    152. ('government_lat_NS',),('government_long_d',,),
    153. ('government_long_m',,),('government_long_s',,),
    154. ('government_long_EW',),('mayor',),('leader',),
    155. ,
    156. ('timezone','CET'),
    157. ('utc_offset','+1'),
    158. ('timezone_DST','CEST'),
    159. ('utc_offset_DST','+2'),('postal_code',),('area_code',),('code',),
    160. ,('whs_name',),('whs_year',),
    161. ('whs_number',),('whs_region',),('whs_criteria',),
    162. ('iucn_category',),,('free',),
    163. ('free_type',),,('map',mapp),
    164. ('map_caption',),('map_background',),('map_locator',),
    165. ,('website',web),
    166. ,('footnotes',)]
    167. return l2tem(l1+l2+l3+l4+l5+l6)

def edit_Read(la,Art,old):

   """Instruction Read just reads the existing text without changing it"""
   if old!=:
       return ,,'*ARTDOESNOTEXIST',
   return ,,'ONLYREAD',old

def edit_New(la,Art,old,text):

   """Instruction 'New' makes new page (if it does not yet exist)"""
   """Parameter: text (the text to be placed on the new page)"""
   if old!=:
       return ,,'*ARTEXISTS',
   summ='bot:utw' if la=='pl' else 'bot creating page'
   return text,summ,'CREATED',

def edit_Force(la,Art,old,text):

   """Instruction 'Force' makes new page or replaces existing."""
   """Parameter: text (the text to be placed on the new page)"""
   summ='bot:edycja' if la=='pl' else 'bot: standardizing'
   return text,summ,('CREATED' if old== else 'REPLACEDARTICLE'),

def edit_Subst(la,Art,old,a,b):

   """Instruction 'Subst' replaces first occur of a with b"""
   n=sf(old,a)
   if n<0:
       return ,,'*NOTHINGTOREPLACE',
   summ='bot:zastep' if la=='pl' else 'bot changing category'
   text=old[:n]+b+old[n+len(a):]
   return text,summ,'REPLACED',

def edit_Offer(la,Art,old,text):

   """Instruction 'Offer' makes new page or returns offered text to log"""
   if old!=:
       return ,,'*EXISTSSODIDNTADD--'+text,
   summ='bot:utw' if la=='pl' else 'bot creating page'
   return text,summ,'CREATED',

def edit_Redir(la,Art,old,redto):

   """'Redir' makes a redirect to redto"""
   if old==:
       return '#REDIRECT '+redto+'','bot redirect','REDIRECTED',
   return ,,'ALREADYREDIR' if redtarg(old)==redto \
          else '*CANTREDIRCOSEXISTS',

def edit_IW(la,Art,old,link):

   """Instr. 'IW' adds/replaces an interwiki link (unless already there)"""
   """Also returns: the complete new text"""
   if old==:
       return ,,'*NOSUCHARTFORIW',
   olink=; inpt=len(old) #found link; input point
   on=1; n=0               #flag (off during comments); search cursor 
   while n<len(old):
       if on==1:
           if starts(old[n:],):
               on=1; n=n+2             #comment ends
       n=n+1
   if on==0 and inpt==len(old):
       old=old+'-->'; inpt=inpt+3      #closes hanging comment
   if olink!=:
       old=old[:nn]+old[mm+2:]         #removes existing link
       if inpt>nn:
           inpt=inpt-(mm+2-nn)         #(and adjusts input point if necessary)
   if old[inpt-1]=='\n':
       new=old[:inpt]+''+link+'\n'+old[inpt:]  #inserting new link
   else:                                           #(ensuring newline before)
       new=old[:inpt]+'\n'+link+'\n'+old[inpt:]
   if olink!=:
       mess='SUBST'+link+'FOR'+olink
       summ='bot:zast.link '+link[:2] if la=='pl' else 'bot replacing iw link'
   else:
       mess='ADDED'+link+''
       summ='bot:dod.link '+link[:2] if la=='pl' else 'bot adding iw link'
   return new,summ,mess,new

def edit_IWN(la,Art,old,link):

   """Instr. 'IW' adds an interwiki link (unless already there)"""
   """Also returns: the complete new text"""
   if old==:
       return ,,'*NOSUCHARTFORIW',
   inpt=len(old)           #input point
   on=1; n=0               #flag (off during comments); search cursor 
   while n<len(old):
       if on==1:
           if starts(old[n:],):
               on=1; n=n+2             #comment ends
       n=n+1
   if on==0 and inpt==len(old):
       old=old+'-->'; inpt=inpt+3      #closes hanging comment
   if old[inpt-1]=='\n':
       new=old[:inpt]+''+link+'\n'+old[inpt:]  #inserting new link
   else:                                           #(ensuring newline before)
       new=old[:inpt]+'\n'+link+'\n'+old[inpt:]
   if olink!=:
       mess='SUBST'+link+'FOR'+olink
       summ='bot:zast.link '+link[:2] if la=='pl' else 'bot replacing iw link'
   else:
       mess='ADDED'+link+''
       summ='bot:dod.link '+link[:2] if la=='pl' else 'bot adding iw link'
   return new,summ,mess,new


def edit_Dis(la,Art,old,linex):

   """Instr. 'Dis' adds a line to a Polish geog disambig. page"""
   """Creates page if necessary"""
   """Includes/removes line linking to Polish Wikipedia"""
   line = linex[1:] if starts(linex,'+') else linex
   name=upto(Art,'(disamb')
   if old==:     #if no page exists, create it (with temporary second link)
       if line!=linex:
           return ,,'*NOPAGEWDISTAG',
       new=""+name+" may refer to the following places in Poland:"+\
           '\n*'+line+'\n*Other places called '+name+\

" (listed in Polish Wikipedia)\n\n

\n"

       summ='bot creating disambiguation page'; mess='NEWDISAMB'
   else:
       op1=sf(old,'*[[:pl:'); op2=sfrj(old,op1,'\n')
       if op2>-1:
           old=old[:op1]+old[op2:]   #remove any temporary link
       mayref=sf(old,'may refer')

np=sfr(old,mayref,'[[Poland'); nd=sfr(old,np,'

')

if '

' in old or Art+' (disambiguation)' in old:

           return ,,'*TRYPAGEWDISTAG',
       if nd<0:
           return ,,'*NOTPOLGEODISFOR:'+line,
       nc2=sf(line,'County'); nc1=sflastr(line,nc2,('[',','))
       cy=noinitsp(line[nc1+1:nc2+6])         #the powiat in the new line
       ng1=sf(line,'Gmina'); ng2=sfr(line,ng1,('|',']',','))
       gm=notrailsp(line[ng1:ng2])            #the gmina in the new line
       if (cy in old and ', Gmina' not in line) or (gm!= and gm in old and
           gm[6:]!=name):   #if equivalent line seems to be there already: 
           return ,,'*LINEALREADYTHERE',
       i1=np; inpt=0
       while i1>-1 and i1<=nd:    #now find insert point
           i1=sfrj(old,i1,'\n')
           if i1>-1 and i1<=nd and old[i1]!='*':
               inpt=i1; i1=-1
           else:
               i2=sfr(old,i1,'\n'); nextline=old[i1:i2]
               if i2<0 or i2>nd or ('Gmina' not in nextline and ('County'
                      not in nextline or 'Voivod' not in nextline) and
                     (('town' not in nextline and 'city' not in nextline and
                       'river' not in nextline) or ('Voivod' not in nextline
                                          and 'Poland' not in nextline))):
                   inpt=i1; i1=-1
       if inpt==0:
           return ,,'*BADPAGEFORMAT',
       new=old[:inpt]+'*'+line+'\n'+old[inpt:]
       summ='bot adding to disambiguation page'; mess='ADDEDTODISAMB'
   return new,summ,mess,