Wikipedia:WikiProject Orphanage/Orphaned Articles/How to update
From Wikipedia, the free encyclopedia
Apart from "manual" updating of the Wikipedia:Orphaned Articles list, the automatic routine described on this page can be used.
#Use at your own risk, no warranty implied or given
#Please debug/improve the query.
DROP TABLE IF EXISTS temp_deorphan;
# Define as the links table, droping some keys
CREATE TABLE `temp_deorphan` (
`l_from` int(8) unsigned NOT NULL default '0',
`l_to` int(8) unsigned NOT NULL default '0',
KEY `l_from` (`l_from`),
KEY `l_to` (`l_to`)
) TYPE=MyISAM;
# This is slow .. links has over 6 mio entries
#
INSERT INTO temp_deorphan SELECT l_from, l_to
FROM links
LIMIT 10000000;
# Remove links that don't de-orphan pages
# This version does two in one
# 1.1 millon rows
DELETE
FROM temp_deorphan
USING temp_deorphan, cur
WHERE l_from = cur_id
AND (cur_namespace <>0 #links from namespaces other than the article namespace.
OR cur_is_redirect=1); #links from redirects
#find cur_id of disambig marker (Template:Disambig)
DROP TABLE IF EXISTS temp_disambigid;
CREATE TABLE temp_disambigid
SELECT cur_id AS d_id
FROM cur
WHERE (cur_title = 'Disambig')
AND cur_namespace=10; # 10 = Template namespace
#links from disambiguation pages are not included
#i.e. a page linked only from a disambiguation page is an orphan
# approx. 100000 rows
DELETE
FROM temp_deorphan
USING temp_disambigid AS id, links AS l, temp_deorphan AS d
WHERE l.l_to = id.d_id
AND l.l_from=d.l_from;
#This avoids that disambiguation pages show up as orphans
INSERT INTO temp_deorphan
SELECT DISTINCT 999999, l_from
FROM links, temp_disambigid
WHERE d_id = l_to
LIMIT 20000;
#Adds a temporary table with the orphans
#12000 rows
DROP TABLE IF EXISTS temp_orphans;
CREATE TABLE temp_orphans
SELECT cur_id
FROM cur
LEFT JOIN temp_deorphan ON cur_id=l_to
WHERE l_to IS NULL
AND cur_namespace=0
AND cur_is_redirect=0
LIMIT 20000;
#Output from list (filtering some 600 rambot orphans)
SELECT CONCAT('#[[', REPLACE (cur_title, '_', ' '), ']]') AS orphanslist
INTO OUTFILE 'wp:\wp_orphanend_articles.txt' #set this to path you need
FROM cur, temp_orphans
WHERE temp_orphans.cur_id = cur.cur_id
AND NOT (cur_title LIKE '%(CDP)%'
OR cur_title LIKE '%(town)%'
OR cur_title LIKE '%(city)%'
OR cur_title LIKE '%(village)%'
OR cur_title LIKE '%Township%')
ORDER BY Lower(cur_title)
LIMIT 20000;
Note: these queries are slow.

