User:ClueBot III/Source
From Wikipedia, the free encyclopedia
The following is automatically generated by ClueBot III.
- wikibot.classes.php is here.
[edit] Source to ClueBot III
<?PHP
/*
* TODO:
* Backlink fixing. Done
* Archive stats (size, number of topics, most recent, etc).
*/
declare(ticks = 1);
function sig_handler($signo) {
switch ($signo) {
case SIGCHLD:
while (($x = pcntl_waitpid(0, $status, WNOHANG)) != -1) {
if ($x == 0) break;
$status = pcntl_wexitstatus($status);
}
break;
}
}
pcntl_signal(SIGCHLD, "sig_handler");
include 'cluebot3.config.php';
include '../wikibot.classes.php';
function splitintosections ($d) {
// preg_match('/^(.*)((?<=^|\n)==[^=]+==.*)?$/Us',$data,$header);
// echo $data."\n\n\n";
// print_r($header);
// $d = $header[2];
// $header = $header[1];
// preg_match_all('/(?<=^|\n)==([^=]+)==(\n.*(?m)$(?-m))(?===[^=]+==.*|$)/AUs',$d,$sections,PREG_SET_ORDER);
$ret = array();
// $ret[] = $header;
$sections = array();
$th = '';
$tb = '';
$s = 0;
for ($i = 0; $i < strlen($d); $i++) {
if (($d{$i}.$d{$i + 1} == '==') and ($d{$i + 2} != '=') and (($i == 0) or ($d{$i - 1} == "\n"))) {
$j = 0;
while (($d{$i + $j} != "\n") and ($i + $j < strlen($d))) $j++;
if ((substr(trim(substr($d,$i,$j)),-2,2) == '==') and (substr(trim(substr($d,$i,$j)),-3,1) != '=')) {
if ($s == 1) $sections[] = array($th,$tb);
else $header = $tb;
$s = 1;
$th = substr(trim(substr($d,$i,$j)),2,-2);
$tb = '';
$i += $j - 1;
}
} else {
$tb .= $d{$i};
}
}
if ($s == 1) $sections[] = array($th,$tb);
else $header = $tb;
$ret[] = $header;
// print_r($sections);
foreach ($sections as $section) {
$id = trim($section[0]);
$i = 1;
while (isset($ret[$id])) {
$i++;
$id = trim($section[0]).' '.$i;
}
$ret[$id] = array('header'=>$section[0],'content'=>$section[1]);
}
return $ret;
}
function extractnamespace ($page) {
if (preg_match('/^((user|wikipedia|image|mediawiki|template|help|category|portal)? ?(talk)?):(.*)$/i',$page,$m)) {
return array($m[1],$m[4]);
} else {
return array('',$m[4]);
}
}
function namespacetoid ($namespace) {
$convert = array
(
'' => 0, 'talk' => 1,
'user' => 2, 'user talk' => 3,
'wikipedia' => 4, 'wikipedia talk'=> 5,
'image' => 6, 'image talk' => 7,
'mediawiki' => 8, 'mediawiki talk'=> 9,
'template' => 10, 'template talk' => 11,
'help' => 12, 'help talk' => 13,
'category' => 14, 'category talk' => 15,
'portal' => 100, 'portal talk' => 101
);
return $convert[strtolower(str_replace('_',' ',$namespace))];
}
function doarchive ($page,$archiveprefix,$archivename,$age,$minarch,$minkeep,$defaulthead) {
global $wpq;
global $wpapi;
global $wpi;
$cursects = splitintosections($wpq->getpage($page));
$done = false;
$lastrvid = null;
while (!$done) {
$rv = $wpapi->revisions($page,500,$dir = 'older',false,$lastrvid);
foreach ($rv as $rev) {
if (preg_match('/(\d+)\-(\d+)\-(\d+)T(\d+):(\d+):(\d+)/',$rev['timestamp'],$m)) {
$time = gmmktime($m[4],$m[5],$m[6],$m[2],$m[3],$m[1]);
if ((time() - $time) >= ($age * 60 * 60)) {
$done = true;
break;
}
}
}
if ((!isset($rv[499])) and ($done == false)) return;
$lastrvid = $rev['revid'];
}
$tmp = $wpapi->revisions($page,1,'older',true,$lastrvid);
$oldsects = splitintosections($tmp[0]['*']);
$header = $cursects[0];
unset($cursects[0]);
unset($oldsects[0]);
$keepsects = array();
$archsects = array();
foreach ($oldsects as $id => $array) {
if (!isset($cursects[$id])) {
unset($oldsects[$id]);
}
}
foreach ($cursects as $id => $array) {
if ((count($cursects) - count($archsects)) <= $minkeep) {
$keepsects[$id] = $array;
} elseif (strpos($array['content'],'{{User:ClueBot III/ArchiveNow}}') !== false) {
$array['content'] = str_replace('{{User:ClueBot III/ArchiveNow}}','{{tlu|User:ClueBot III/ArchiveNow}}',$array['content']);
$archsects[$id] = $array;
} elseif (!isset($oldsects[$id])) {
$keepsects[$id] = $array;
} elseif (trim($array['content']) == trim($oldsects[$id]['content'])) {
$archsects[$id] = $array;
} else {
$keepsects[$id] = $array;
}
}
foreach ($oldsects as $id => $array) $tmpsectsprintr['oldsects'][] = $id;
foreach ($cursects as $id => $array) $tmpsectsprintr['cursects'][] = $id;
foreach ($keepsects as $id => $array) $tmpsectsprintr['keepsects'][] = $id;
foreach ($archsects as $id => $array) $tmpsectsprintr['archsects'][] = $id;
print_r($tmpsectsprintr);
if ((count($archsects) > 0) and (count($archsects) >= $minarch)) {
$pdata = $header;
foreach ($keepsects as $array) { $pdata .= '=='.$array['header'].'=='.$array['content']; }
// echo '$pdata = '.$pdata."\n\n\n\n";
if (substr(strtolower(str_replace('_',' ',$archiveprefix)),0,strlen($page)) != strtolower($page)) {
$archiveprefix = $page.'/Archives/';
}
$apage = $archiveprefix.gmdate($archivename,(time() - ($age * 60 * 60)));
$adata = (($x = $wpq->getpage($apage))?$x:$defaulthead."\n")."\n";
foreach ($archsects as $array) { $adata .= '=='.$array['header'].'=='.$array['content']; }
// echo '$adata = '.$adata."\n\n\n\n";
$wpi->post($apage,$adata,'Archiving '.count($archsects).' discussions from [['.$page.']]. (BOT)',true);
$wpi->post($page,$pdata,'Archiving '.count($archsects).' discussions to [['.$apage.']]. (BOT)',true);
//generateindex($page,$archiveprefix);
$pid = pcntl_fork();
if ($pid == 0) {
$search = array();
$replace = array();
foreach ($archsects as $header => $data) {
$anchor = str_replace('%','.',urlencode(str_replace(' ','_',$header)));
$search[] = $page.'#'.$anchor;
$replace[] = $apage.'#'.$anchor;
}
$pagelist = array();
$continue = null;
$bl = $wpapi->backlinks($page,500,$continue);
foreach ($bl as $data) { $pagelist[] = $data['title']; }
while (count($bl) >= 500) {
$bl = $wpapi->backlinks($page,500,$continue);
foreach ($bl as $data) { $pagelist[] = $data['title']; }
}
print_r($search);
print_r($replace);
// print_r($pagelist);
$forktasklist = array();
$count = 0;
foreach ($pagelist as $title) {
$count++;
$group = floor($count / 500);
$forktasklist[$group][] = $title;
}
unset($pagelist);
for ($i=0;$i<count($forktasklist);$i++) {
$pid = pcntl_fork();
if ($pid == 0) {
foreach ($forktasklist[$i] as $title) {
$data = $wpq->getpage($title);
$newdata = str_replace($search,$replace,$data);
if ($data != $newdata) {
// echo 'Would post to '.$title."\n";
$wpi->post($title,$newdata,'Fixing links to archived content. (BOT)',true);
}
}
die();
}
}
die();
}
}
generateindex($page,$archiveprefix);
}
function generateindex ($origpage,$archiveprefix) {
global $user;
global $wpapi;
global $wpi;
$tmp = extractnamespace($archiveprefix);
$array = $wpapi->listprefix($tmp[1],namespacetoid($tmp[0]),500);
$data = '';
foreach ($array as $page) {
$tmp = $wpapi->revisions($page['title'],1,'newer');
$newarray[$page['title']] = $tmp[0]['timestamp'];
}
asort($newarray);
foreach ($newarray as $page => $time) {
$data .= '* [['.$page.'|'.str_replace($archiveprefix,'',$page).']]'."\n";
}
$wpi->post('User:'.$user.'/Indices/'.$origpage,$data,'Setting index for [['.$origpage.']]. (BOT)');
}
function parsetemplate ($page) {
global $wpq;
global $user;
$data = $wpq->getpage($page);
if (($x = stripos($data,'{{user:'.$user.'/archivethis')) !== false) {
$data = substr($data,$x);
$pos = 1;
$depth = 1;
$tmp = '{';
while ($depth != 0) {
if ($data{$pos} == '{') $depth++;
if ($data{$pos} == '}') $depth--;
$tmp .= $data{$pos};
$pos++;
}
$data = substr($tmp,2,-2);
unset($pos,$depth,$tmp,$x);
$data = explode('|',$data);
unset($data[0]);
$set = array();
foreach ($data as $argument) {
$argument = trim($argument);
$key = explode('=',$argument,2);
$value = $key[1];
$key = strtolower(trim($key[0]));
$set[$key] = $value;
unset($key,$value,$argument);
}
print_r($set);
doarchive($page,
$set['archiveprefix'],
$set['format'],
$set['age'],
(isset($set['minarchthreads'])?$set['minarchthreads']:0),
(isset($set['minkeepthreads'])?$set['minkeepthreads']:0),
(isset($set['header'])?$set['header']:'{{Talkarchive}}')
);
}
}
$wpq = new wikipediaquery;
$wpi = new wikipediaindex;
$wpapi = new wikipediaapi;
// print_r(splitintosections($wpq->getpage('User talk:Sharkface217/Award Center')));
// parsetemplate('User talk:Sharkface217/Award Center');
// die();
$wpapi->login($user,$pass);
$wpi->forcepost('User:'.$user.'/Source',
'The following is automatically generated by [[User:'.$user.'|'.$user."]].\n\n\n\n" .
'* wikibot.classes.php is [[User:ClueBot/Source|here]].' .
"\n\n\n\n==Source to ".$user."==\n\n" .
'<pre>'.htmlentities(file_get_contents(__FILE__))."</pre>\n\n\n\n~~~~",
'Automated source upload.'); /* Our source code, we force post this because this is *our* page, and it triggers the nobots. */
while (1) {
$pid = pcntl_fork();
if ($pid == 0) {
$titles = array();
$continue = null;
$ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue);
foreach ($ei as $data) { $titles[] = $data['title']; }
while (isset($ei[499])) {
$ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue);
foreach ($ei as $data) { $titles[] = $data['title']; }
}
foreach ($titles as $title) {
parsetemplate($title);
}
die();
}
$time = time();
while ((time() - $time) < 3600) {
sleep(1);
}
}
?>
ClueBot III (talk) 21:54, 20 May 2008 (UTC)

