User:Drinibot/ExtractWikilinks.py
From Wikipedia, the free encyclopedia
import re
import subprocess
rawfilename="raw.html"
linksfilename="links.txt"
fi=open(rawfilename,'r')
li=open(linksfilename,'w')
regex=re.compile(r".*.org/wiki/(?P<oldcat>.*?)\".*$")
for line in fi:
m=regex.findall(line)
if m:
if not ("Special:" ) in m[0]:
wl= "[["+ m[0] +"]]"
print wl
li.write(wl+"\n")
fi.close()
li.close()

