User:ClueBot III/Source

From Wikipedia, the free encyclopedia

The following is automatically generated by ClueBot III.


  • wikibot.classes.php is here.


[edit] Source to ClueBot III

<?PHP
        /*
         * TODO:
         *  Backlink fixing. Done
         *  Archive stats (size, number of topics, most recent, etc).
         */

        declare(ticks = 1);

        function sig_handler($signo) {
                switch ($signo) {
                        case SIGCHLD:
                                while (($x = pcntl_waitpid(0, $status, WNOHANG)) != -1) {
                                if ($x == 0) break;
                                        $status = pcntl_wexitstatus($status);
                                }
                                break;
                }
        }

        pcntl_signal(SIGCHLD,   "sig_handler");
                                                                                                                                                                                                                                                                        

        include 'cluebot3.config.php';
        include '../wikibot.classes.php';

        function splitintosections ($d) {
//              preg_match('/^(.*)((?<=^|\n)==[^=]+==.*)?$/Us',$data,$header);
//              echo $data."\n\n\n";
//              print_r($header);
//              $d = $header[2];
//              $header = $header[1];
//              preg_match_all('/(?<=^|\n)==([^=]+)==(\n.*(?m)$(?-m))(?===[^=]+==.*|$)/AUs',$d,$sections,PREG_SET_ORDER);
                $ret = array();
//              $ret[] = $header;
                $sections = array();

                $th = '';
                $tb = '';
                $s = 0;
                for ($i = 0; $i < strlen($d); $i++) {
                        if (($d{$i}.$d{$i + 1} == '==') and ($d{$i + 2} != '=') and (($i == 0) or ($d{$i - 1} == "\n"))) {
                                $j = 0;
                                while (($d{$i + $j} != "\n") and ($i + $j < strlen($d))) $j++;
                                if ((substr(trim(substr($d,$i,$j)),-2,2) == '==') and (substr(trim(substr($d,$i,$j)),-3,1) != '=')) {
                                        if ($s == 1) $sections[] = array($th,$tb);
                                        else $header = $tb;
                                        $s = 1;
                                        $th = substr(trim(substr($d,$i,$j)),2,-2);
                                        $tb = '';
                                        $i += $j - 1;
                                }
                        } else {
                                $tb .= $d{$i};
                        }
                }

                if ($s == 1) $sections[] = array($th,$tb);
                else $header = $tb;

                $ret[] = $header;


//              print_r($sections);
                foreach ($sections as $section) {
                        $id = trim($section[0]);
                        $i = 1;
                        while (isset($ret[$id])) {
                                $i++;
                                $id = trim($section[0]).' '.$i;
                        }
                        $ret[$id] = array('header'=>$section[0],'content'=>$section[1]);
                }
                return $ret;
        }

        function extractnamespace ($page) {
                if (preg_match('/^((user|wikipedia|image|mediawiki|template|help|category|portal)? ?(talk)?):(.*)$/i',$page,$m)) {
                        return array($m[1],$m[4]);
                } else {
                        return array('',$m[4]);
                }
        }

        function namespacetoid ($namespace) {
                $convert = array
                        (
                                ''              => 0,        'talk'          => 1,
                                'user'          => 2,        'user talk'     => 3,
                                'wikipedia'     => 4,        'wikipedia talk'=> 5,
                                'image'         => 6,        'image talk'    => 7,
                                'mediawiki'     => 8,        'mediawiki talk'=> 9,
                                'template'      => 10,       'template talk' => 11,
                                'help'          => 12,       'help talk'     => 13,
                                'category'      => 14,       'category talk' => 15,
                                'portal'        => 100,      'portal talk'   => 101
                        );

                return $convert[strtolower(str_replace('_',' ',$namespace))];
        }

        function doarchive ($page,$archiveprefix,$archivename,$age,$minarch,$minkeep,$defaulthead) {
                global $wpq;
                global $wpapi;
                global $wpi;

                $cursects = splitintosections($wpq->getpage($page));

                $done = false;
                $lastrvid = null;
                while (!$done) {
                        $rv = $wpapi->revisions($page,500,$dir = 'older',false,$lastrvid);
                        foreach ($rv as $rev) {
                                if (preg_match('/(\d+)\-(\d+)\-(\d+)T(\d+):(\d+):(\d+)/',$rev['timestamp'],$m)) {
                                        $time = gmmktime($m[4],$m[5],$m[6],$m[2],$m[3],$m[1]);
                                        if ((time() - $time) >= ($age * 60 * 60)) {
                                                $done = true;
                                                break;
                                        }
                                }
                        }
                        if ((!isset($rv[499])) and ($done == false)) return;
                        $lastrvid = $rev['revid'];
                }
                $tmp = $wpapi->revisions($page,1,'older',true,$lastrvid);
                $oldsects = splitintosections($tmp[0]['*']);
                $header = $cursects[0];
                unset($cursects[0]);
                unset($oldsects[0]);
                $keepsects = array();
                $archsects = array();
                foreach ($oldsects as $id => $array) {
                        if (!isset($cursects[$id])) {
                                unset($oldsects[$id]);
                        }
                }
                foreach ($cursects as $id => $array) {
                        if ((count($cursects) - count($archsects)) <= $minkeep) {
                                $keepsects[$id] = $array;
                        } elseif (strpos($array['content'],'{{User:ClueBot III/ArchiveNow}}') !== false) {
                                $array['content'] = str_replace('{{User:ClueBot III/ArchiveNow}}','{{tlu|User:ClueBot III/ArchiveNow}}',$array['content']);
                                $archsects[$id] = $array;
                        } elseif (!isset($oldsects[$id])) {
                                $keepsects[$id] = $array;
                        } elseif (trim($array['content']) == trim($oldsects[$id]['content'])) {
                                $archsects[$id] = $array;
                        } else {
                                $keepsects[$id] = $array;
                        }
                }

                foreach ($oldsects as $id => $array) $tmpsectsprintr['oldsects'][] = $id;
                foreach ($cursects as $id => $array) $tmpsectsprintr['cursects'][] = $id;
                foreach ($keepsects as $id => $array) $tmpsectsprintr['keepsects'][] = $id;
                foreach ($archsects as $id => $array) $tmpsectsprintr['archsects'][] = $id;

                print_r($tmpsectsprintr);


                if ((count($archsects) > 0) and (count($archsects) >= $minarch)) {
                        $pdata = $header;
                        foreach ($keepsects as $array) { $pdata .= '=='.$array['header'].'=='.$array['content']; }
//                      echo '$pdata = '.$pdata."\n\n\n\n";

                        if (substr(strtolower(str_replace('_',' ',$archiveprefix)),0,strlen($page)) != strtolower($page)) {
                                $archiveprefix = $page.'/Archives/';
                        }

                        $apage = $archiveprefix.gmdate($archivename,(time() - ($age * 60 * 60)));

                        $adata = (($x = $wpq->getpage($apage))?$x:$defaulthead."\n")."\n";
                        foreach ($archsects as $array) { $adata .= '=='.$array['header'].'=='.$array['content']; }
//                      echo '$adata = '.$adata."\n\n\n\n";
                        $wpi->post($apage,$adata,'Archiving '.count($archsects).' discussions from [['.$page.']]. (BOT)',true);
                        $wpi->post($page,$pdata,'Archiving '.count($archsects).' discussions to [['.$apage.']]. (BOT)',true);

                        //generateindex($page,$archiveprefix);

                        $pid = pcntl_fork();
                        if ($pid == 0) {
                                $search = array();
                                $replace = array();
                                foreach ($archsects as $header => $data) {
                                        $anchor = str_replace('%','.',urlencode(str_replace(' ','_',$header)));
                                        $search[] = $page.'#'.$anchor;
                                        $replace[] = $apage.'#'.$anchor;
                                }
                        
                                $pagelist = array();
                                $continue = null;
                                $bl = $wpapi->backlinks($page,500,$continue);
                                foreach ($bl as $data) { $pagelist[] = $data['title']; }
                                while (count($bl) >= 500) {
                                        $bl = $wpapi->backlinks($page,500,$continue);
                                        foreach ($bl as $data) { $pagelist[] = $data['title']; }
                                }

                                print_r($search);
                                print_r($replace);
//                              print_r($pagelist);

                                $forktasklist = array();
                                $count = 0;
                                foreach ($pagelist as $title) {
                                        $count++;
                                        $group = floor($count / 500);
                                        $forktasklist[$group][] = $title;
                                }
                                unset($pagelist);

                                for ($i=0;$i<count($forktasklist);$i++) {
                                        $pid = pcntl_fork();
                                        if ($pid == 0) {
                                                foreach ($forktasklist[$i] as $title) {
                                                        $data = $wpq->getpage($title);
                                                        $newdata = str_replace($search,$replace,$data);
                                                        if ($data != $newdata) {
//                                                              echo 'Would post to '.$title."\n";
                                                                $wpi->post($title,$newdata,'Fixing links to archived content. (BOT)',true);
                                                        }
                                                }
                                                die();
                                        }
                                }
                                die();
                        }
                }
                generateindex($page,$archiveprefix);
        }

        function generateindex ($origpage,$archiveprefix) {
                global $user;
                global $wpapi;
                global $wpi;

                $tmp = extractnamespace($archiveprefix);
                $array = $wpapi->listprefix($tmp[1],namespacetoid($tmp[0]),500);
                $data = '';
                foreach ($array as $page) {
                        $tmp = $wpapi->revisions($page['title'],1,'newer');
                        $newarray[$page['title']] = $tmp[0]['timestamp'];
                }
                asort($newarray);
                foreach ($newarray as $page => $time) {
                        $data .= '* [['.$page.'|'.str_replace($archiveprefix,'',$page).']]'."\n";
                }
                $wpi->post('User:'.$user.'/Indices/'.$origpage,$data,'Setting index for [['.$origpage.']]. (BOT)');
        }

        function parsetemplate ($page) {
                global $wpq;
                global $user;
                
                $data = $wpq->getpage($page);

                if (($x = stripos($data,'{{user:'.$user.'/archivethis')) !== false) {
                         $data = substr($data,$x);
                         $pos = 1;
                         $depth = 1;
                         $tmp = '{';
                         while ($depth != 0) {
                                if ($data{$pos} == '{') $depth++;
                                if ($data{$pos} == '}') $depth--;
                                $tmp .= $data{$pos};
                                $pos++;
                        }
                        $data = substr($tmp,2,-2);
                        unset($pos,$depth,$tmp,$x);
                        
                        $data = explode('|',$data);
                        unset($data[0]);
                        $set = array();
                        foreach ($data as $argument) {
                                $argument = trim($argument);
                                $key = explode('=',$argument,2);
                                $value = $key[1];
                                $key = strtolower(trim($key[0]));

                                $set[$key] = $value;
                                unset($key,$value,$argument);
                        }
                        print_r($set);                  
                        doarchive($page,
                                $set['archiveprefix'],
                                $set['format'],
                                $set['age'],
                                (isset($set['minarchthreads'])?$set['minarchthreads']:0),
                                (isset($set['minkeepthreads'])?$set['minkeepthreads']:0),
                                (isset($set['header'])?$set['header']:'{{Talkarchive}}')
                                );
                }
        }

        $wpq = new wikipediaquery;
        $wpi = new wikipediaindex;
        $wpapi = new wikipediaapi;

//      print_r(splitintosections($wpq->getpage('User talk:Sharkface217/Award Center')));
//      parsetemplate('User talk:Sharkface217/Award Center');
//      die();

        $wpapi->login($user,$pass);

        $wpi->forcepost('User:'.$user.'/Source',
                'The following is automatically generated by [[User:'.$user.'|'.$user."]].\n\n\n\n" .
                '* wikibot.classes.php is [[User:ClueBot/Source|here]].' .
                "\n\n\n\n==Source to ".$user."==\n\n" .
                '<pre>'.htmlentities(file_get_contents(__FILE__))."</pre>\n\n\n\n~~~~",
                'Automated source upload.'); /* Our source code, we force post this because this is *our* page, and it triggers the nobots. */


        while (1) {
                $pid = pcntl_fork();
                if ($pid == 0) {
                        $titles = array();
                        $continue = null;
                        $ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue);
                        foreach ($ei as $data) { $titles[] = $data['title']; }
                        while (isset($ei[499])) {
                                $ei = $wpapi->embeddedin('User:'.$user.'/ArchiveThis',500,$continue);
                                foreach ($ei as $data) { $titles[] = $data['title']; }
                        }

                        foreach ($titles as $title) {
                                parsetemplate($title);
                        }
                        die();
                }
                $time = time();
                while ((time() - $time) < 3600) {
                        sleep(1);
                }
        }
?>


ClueBot III (talk) 21:54, 20 May 2008 (UTC)