User:Dr pda/generatestats.js
From Wikipedia, the free encyclopedia
If a message on your talk page led you here, please be wary of who left it. Code that you insert on this page could contain malicious content capable of compromising your account. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. If this is a .js page, the code will be executed when previewing the page.
Note: After saving, you have to bypass your browser's cache to see the changes. In Internet Explorer and Firefox, hold down the Ctrl key and click the Refresh or Reload button. Opera users have to clear their caches through Tools→Preferences, see the instructions for Opera. Konqueror and Safari users can just click the Reload button.
//<pre> //This script generates a list of the ten shortest and ten longest articles which transclude a template, //e.g. {{featured article}}, calculates some statistics and plots a histogram. //To use this function add {{subst:js|User:Dr pda/generatestats.js}} to your monobook.js //then go to http://en.wikipedia.org/w/index.php?title=User:Dr_pda/generatestats&action=edit //See the talk page for documentation. function keyValuePair(key,value){ this.key = key; this.value = value; } function sortByValue(a, b){ return a.value - b.value } function getBestScale(min,max){ scales = new Array(0.2,0.5,1,2,5,10,20,25,50,100,200,250,500,1000,2000,5000); var val = (max-min)/15; for(var x in scales){ if (scales[x]-val >= 0) return scales[x]; } return 5000; } function loadXMLDocPassingTemplate(url,handler,template) { // branch for native XMLHttpRequest object if (window.XMLHttpRequest) { var req = new XMLHttpRequest(); } // branch for IE/Windows ActiveX version else if (window.ActiveXObject) { var req = new ActiveXObject("Microsoft.XMLHTTP"); } if (req) { req.onreadystatechange = function () {handler(req,template)}; req.open("GET", url, true); req.send(""); } } function getSizeFromAPI(req,template) { // only if req shows "loaded" if (req.readyState == 4) { // only if "OK" if (req.status == 200) { // ...processing statements go here... var response = req.responseXML.documentElement; var pages = response.getElementsByTagName('page'); if(pages.length > 0){ for(var i=0;i<pages.length; i++){ pagesList[index++] = new keyValuePair(pages[i].getAttribute('title'),pages[i].getAttribute('length')); } document.getElementById('wpTextbox1').value = 'Retrieved ' + index + ' articles.\n To abort click the back button in your browser.'; //Check for more pages var embeddedin = response.getElementsByTagName('embeddedin'); if(embeddedin.length > 0){ var geicontinue = embeddedin[0].getAttribute('geicontinue'); loadXMLDocPassingTemplate(queryURL+'&geicontinue='+geicontinue,getSizeFromAPI,template); } //If last page retrieved then start processing else{ //If using wiki text size if(document.location.href.indexOf('prosesize') == -1){ sortAndMakeChart(); } //If using readable prose size (WARNING:Will load every page which transcludes template. Could be thousands of pages!!) else{ for(var x in pagesList){ var titleURL = encodeURIComponent(pagesList[x].key.replace(/ /g,'_')); loadXMLDocPassingTemplate('/w/index.php?action=render&title='+titleURL,getProseSizeFromPage,pagesList[x].key); } } } } } else { alert("There was a problem retrieving the XML data:\n" + req.statusText); } } } function getProseSizeFromPage(req,title) { // only if req shows "loaded" if (req.readyState == 4) { // only if "OK" if (req.status == 200) { // ...processing statements go here... var response = req.responseText; var start = response.indexOf('<p>',-1); var stop = 0; var proseSize = 0; while(start > -1){ stop = response.indexOf('</p>',start); var para = response.substring(start+3,stop); para = para.replace(/\[\d{1,3}\]/g,''); para = para.replace(/citation needed/g,''); para = para.replace(/(<([^>]+)>)/ig,''); proseSize += para.length; start = response.indexOf('<p>',stop); } proseList[proseIndex++] = new keyValuePair(title,proseSize); document.getElementById('wpTextbox1').value = 'Retrieved prose size for ' + proseIndex + ' out of ' + index + ' articles.\n To abort click the back button in your browser.'; //If last page retrieved then start processing if(proseIndex == index){ pagesList = proseList; sortAndMakeChart(); } } else { alert("There was a problem retrieving the XML data:\n" + req.statusText); } } } function sortAndMakeChart(){ pagesList.sort(sortByValue); //Get top ten and bottom ten var bottomTen = '===Ten shortest articles===\n'; for(var i=0;i<10;i++){ bottomTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n'); } pagesList.reverse(); var topTen = '===Ten longest articles===\n'; for(var i=0;i<10;i++){ topTen += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n'); } var list = '===List of articles by size===\n'; if(document.location.href.indexOf('&list') != -1){ for(var i=0;i<pagesList.length;i++){ list += ('# ' + pagesList[i].key + ' (' + Math.round(pagesList[i].value/1024) + ' kB)\n'); } } //Get Range var max = Math.ceil(pagesList[0].value/1024); var min = Math.floor(pagesList[pagesList.length-1].value/1024); var xScale = getBestScale(min,max); max = Math.ceil(max/xScale)*xScale; min = Math.floor(min/xScale)*xScale; var numBins = (max - min)/xScale; //Calculate statistics var sum = 0.0; var bins = new Array(numBins); for(var i=0;i<numBins;i++){ bins[i]=0; } for(var i=0;i<pagesList.length;i++){ sum += pagesList[i].value*1.0; bins[Math.floor((pagesList[i].value/1024-min)/(xScale*1.0))]++; } var mean = (sum/(pagesList.length*1024)).toFixed(3); var median = (pagesList[Math.floor(pagesList.length/2)+1].value/1024).toFixed(3); var statistics = '===Statistics===\n*Number of articles: '+pagesList.length+'\n*Mean: '+mean+' kB\n*Median: '+median+' kB\n'; //Calculate best vertical scale var yMax = Math.max.apply(Math,bins); var yScale = getBestScale(0,yMax); yScale = Math.max(1,yScale); yMax = Math.ceil(yMax/yScale)*yScale; var verticalScale = '\nScaleMajor = gridcolor:darkgrey increment:' + yScale + ' start:0'; if(Math.floor(yScale/2) == yScale/2) verticalScale += '\nScaleMinor = gridcolor:lightgrey increment:' + yScale/2 + ' start:0' //Draw chart var chart = '===Chart===\n<timeline>\nColors=\n id:lightgrey value:gray(0.8)\n id:darkgrey value:gray(0.8)\n id:white value:rgb(1,1,1)\n id:steel value:rgb(0.6,0.7,0.8)\n\nImageSize = width:auto height:303 barincrement:25\nPlotArea = left:50 bottom:50 top:30 right:30\nDateFormat = x.y\nPeriod = from:0 till:' + yMax +'\nTimeAxis = orientation:vertical\nAlignBars = early'+ verticalScale +'\nBackgroundColors = canvas:white\n\nPlotData=\n color:steel width:20 align:left\n'; for(var i=0;i<numBins;i++){ chart += ' bar:'+(min+i*xScale)+' from:0 till:'+bins[i]+'\n'; } //Add axis label chart += ' bar:'+(min + Math.floor(2*numBins/5)*xScale)+' at:0 text:"Article size in kB" shift:(0,-30)\n\n</timeline>'; if(document.location.href.indexOf('&list') != -1){ document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart + '\n' + list; } else{ document.getElementById('wpTextbox1').value = topTen + '\n' + bottomTen + '\n' + statistics + '\n' + chart; } document.getElementById('wpPreview').click(); } function generateStatistics(){ template=prompt("Enter the template you want to check for\n (Don't include Template:)",""); template = "Template:"+template.toUpperCase().substr(0,1)+template.substr(1); queryURL = '/w/api.php?action=query&generator=embeddedin&geititle=' + template + '&geilimit=500&geinamespace=0&prop=info&format=xml'; pagesList = new Array(); index = 0; proseList = new Array(); proseIndex = 0; loadXMLDocPassingTemplate(queryURL,getSizeFromAPI,template); } addOnloadHook(function () { if(document.location.href.indexOf('User:Dr_pda/generatestats&action=edit') != -1){ generateStatistics(); } }); //</pre>

