User:Dr pda/generatestats.js

From Wikipedia, the free encyclopedia

If a message on your talk page led you here, please be wary of who left it. Code that you insert on this page could contain malicious content capable of compromising your account. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. If this is a .js page, the code will be executed when previewing the page.
Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.
 //<pre>
 //This script generates a list of the ten shortest and ten longest articles which transclude a template,
 //e.g. {{featured article}}, calculates some statistics and plots a histogram.
 //To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js
 //then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestats&action=edit
 //See the talk page for documentation.
 
 function keyValuePair(key,value){
  this.key = key;
  this.value = value;
 }
 
 
 function sortByValue(a, b){
  return a.value - b.value
 }
 
 function getBestScale(min,max){
   scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000);
   var val = (max-min)/15;
   for(var x in scales){
    if (scales[x]-val >= 0) return scales[x];
   }
   return 5000;
 }
 
 function loadXMLDocPassingTemplate(url,handler,template)
 {
   // branch for native XMLHttpRequest object
   if (window.XMLHttpRequest) {
       var req = new XMLHttpRequest();
   }
   // branch for IE/Windows ActiveX version
   else if (window.ActiveXObject) {
      var req = new ActiveXObject("Microsoft.XMLHTTP");
  }
  if (req) {
   req.onreadystatechange = function () {handler(req,template)};
   req.open("GET", url, true);
   req.send("");
  }
 }
 
 function getSizeFromAPI(req,template) {
     // only if req shows "loaded"
     if (req.readyState == 4) {
       // only if "OK"
       if (req.status == 200) {
         // ...processing statements go here...
 	 var response = req.responseXML.documentElement;
         var pages = response.getElementsByTagName('page');
 
         if(pages.length > 0){
 
           for(var i=0;i<pages.length; i++){
             pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length'));
           }
             document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.';
 	   //Check for more pages
 	   var embeddedin = response.getElementsByTagName('embeddedin');
 	   if(embeddedin.length > 0){
 	     var geicontinue = embeddedin[0].getAttribute('geicontinue');
             	      loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template);
           }
 	   //If last page retrieved then start processing
           else{
 	     //If using wiki text size
             if(document.location.href.indexOf('prosesize') == -1){
               sortAndMakeChart();
             }
 	     //If using readable prose size (WARNING:Will load every page which transcludes template. Could be thousands of pages!!)
 	     else{
 		for(var x in pagesList){
                  var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_'));                  
 		  loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key);
 		}
 	     }
           }
 	 }
       } else {
             alert("There was a problem retrieving the XML data:\n" +
                 req.statusText);
       }
    }
 } 
 
 
 function getProseSizeFromPage(req,title) {
     // only if req shows "loaded"
     if (req.readyState == 4) {
       // only if "OK"
       if (req.status == 200) {
         // ...processing statements go here...
 	 var response = req.responseText;
 
         var start = response.indexOf('<p>',-1);
 	 var stop = 0;
 	 var proseSize = 0;
 
 	 while(start > -1){
 	   stop = response.indexOf('</p>',start);
 	   var para = response.substring(start+3,stop);
 	   para = para.replace(/\[\d{1,3}\]/g,'');
 	   para = para.replace(/citation needed/g,'');
 	   para = para.replace(/(<([^>]+)>)/ig,'');
 	   proseSize += para.length;
 	   start = response.indexOf('<p>',stop);
 	 }
         proseList[proseIndex++] = new keyValuePair(title,proseSize);
         document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.';
 	 //If last page retrieved then start processing
         if(proseIndex == index){
 	   pagesList = proseList;
 	   sortAndMakeChart();
         }
 
       } else {
             alert("There was a problem retrieving the XML data:\n" +
                 req.statusText);
       }
    }
 } 
 
 
 function sortAndMakeChart(){
   pagesList.sort(sortByValue);
   //Get top ten and bottom ten
   var bottomTen = '===Ten shortest articles===\n';
   for(var i=0;i<10;i++){
     bottomTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
   }
 
   pagesList.reverse();
   var topTen = '===Ten longest articles===\n';
   for(var i=0;i<10;i++){
     topTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
   }
 
   var list = '===List of articles by size===\n';
   if(document.location.href.indexOf('&list') != -1){
     for(var i=0;i<pagesList.length;i++){
       list += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n');
     }
   }
 
   //Get Range
   var max = Math.ceil(pagesList[0].value/1024);
   var min = Math.floor(pagesList[pagesList.length-1].value/1024);
   var xScale = getBestScale(min,max);
   max = Math.ceil(max/xScale)*xScale;
   min = Math.floor(min/xScale)*xScale;
   var numBins = (max - min)/xScale;
 
   //Calculate statistics
   var sum = 0.0;
   var bins = new Array(numBins);
   for(var i=0;i<numBins;i++){
     bins[i]=0;
   }
   for(var i=0;i<pagesList.length;i++){
     sum += pagesList[i].value*1.0;
     bins[Math.floor((pagesList[i].value/1024-min)/(xScale*1.0))]++;
   }
 
   var mean = (sum/(pagesList.length*1024)).toFixed(3);
   var median = (pagesList[Math.floor(pagesList.length/2)+1].value/1024).toFixed(3);
 
   var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n';
 
   //Calculate best vertical scale
   var yMax = Math.max.apply(Math,bins);
   var yScale = getBestScale(0,yMax);
   yScale = Math.max(1,yScale);
   yMax = Math.ceil(yMax/yScale)*yScale;
   var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + ' start:0';
   if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0'
 	   //Draw chart
   var chart = '===Chart===\n<timeline>\nColors=\n  id:lightgrey  value:gray(0.8)\n  id:darkgrey  value:gray(0.8)\n  id:white value:rgb(1,1,1)\n  id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize  = width:auto height:303 barincrement:25\nPlotArea   = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod     = from:0 till:' + yMax +'\nTimeAxis   = orientation:vertical\nAlignBars  = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n  color:steel width:20 align:left\n';
   for(var i=0;i<numBins;i++){
     chart += '  bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n';
   }
   //Add axis label
   chart += '  bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>';
 
   if(document.location.href.indexOf('&list') != -1){ 
     document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list;
   }
   else{
     document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart;
   }
   document.getElementById('wpPreview').click(); 
 
 }
 
 
 function generateStatistics(){
 
  template=prompt("Enter the template you want to check for\n (Don't include Template:)","");
  template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1);
  queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml';
  pagesList = new Array();
  index = 0;
  proseList = new Array();
  proseIndex = 0;
  loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template);
 
 } 
 
 addOnloadHook(function () {
   if(document.location.href.indexOf('User:Dr_pda/generatestats&action=edit') != -1){
     generateStatistics();
   }
 });
 
 //</pre>