Wikipedia:Persondata/extractPersondata.stx
From Wikipedia, the free encyclopedia
<?xml version="1.0"?>
<stx:transform version="1.0"
xmlns:stx="http://stx.sourceforge.net/2002/ns"
xmlns:m="http://www.mediawiki.org/xml/export-0.3/"
pass-through="none"
output-method="xml"
exclude-result-prefixes="#all">
<stx:variable name="namespace-prefixes"/>
<stx:template match="m:namespace">
<stx:if test="@key!=0">
<stx:assign name="namespace-prefixes" select="($namespace-prefixes, .)"/>
</stx:if>
</stx:template>
<stx:template match="/m:mediawiki">
<mediawiki>
<stx:process-children />
</mediawiki>
</stx:template>
<stx:template match="m:siteinfo">
<stx:process-children />
</stx:template>
<stx:template match="m:namespaces">
<stx:process-children />
</stx:template>
<stx:variable name="page-title"/>
<stx:variable name="page-text"/>
<stx:variable name="page-id"/>
<stx:template match="m:title">
<stx:assign name="page-title" select="string(.)"/>
</stx:template>
<stx:template match="m:text">
<stx:assign name="page-text" select="string(.)"/>
</stx:template>
<stx:variable name="first-revision" select="true()"/>
<stx:template match="m:revision">
<stx:if test="$first-revision">
<stx:assign name="first-revision" select="false()"/>
<stx:process-children/>
</stx:if>
</stx:template>
<stx:buffer name="parsed"/>
<stx:variable name="pd-count" select="0"/>
<stx:variable name="pnd-count" select="0"/>
<stx:variable name="found-something"/>
<stx:template match="m:id">
<stx:if test="$first-revision">
<stx:assign name="page-id" select="normalize-space(.)"/>
</stx:if>
</stx:template>
<stx:template match="m:page">
<stx:assign name="first-revision" select="true()"/>
<stx:process-children />
<stx:variable name="prefix" select="substring-before($page-title,':')"/>
<stx:variable name="skip" select="false()"/>
<stx:if test="$prefix">
<stx:value-of select="$prefix"/>
<stx:for-each-item name="p" select="$namespace-prefixes">
<stx:if test="string($p) = string($prefix)">
<stx:assign name="skip" select="true()"/>
</stx:if>
</stx:for-each-item>
</stx:if>
<stx:if test="not($skip)">
<stx:assign name="found-something" select="false()"/>
<stx:result-buffer name="parsed" clear="yes">
<stx:variable name="text" select="$page-text"/>
<stx:while test="string-length($text) > 0">
<stx:variable name="before" select="substring-before($text,'{{')"/>
<stx:assign name="text" select="substring-after($text,'{{')"/>
<stx:variable name="nestcheck" select="substring-before($text,'}}')"/>
<stx:variable name="concattext" select="$nestcheck"/>
<stx:while test="contains($nestcheck,'{{')">
<stx:assign name="text" select="substring-after($text,'}}')"/>
<stx:assign name="concattext" select="string-join(($concattext, '}}', substring- before($text,'}}')),)"/>
<stx:assign name="nestcheck" select="substring-before($text,'}}')"/>
</stx:while>
<stx:call-procedure name="template">
<stx:with-param name="content" select="$concattext"/>
</stx:call-procedure>
<stx:assign name="text" select="substring-after($text,'}}')"/>
</stx:while>
</stx:result-buffer>
<stx:if test="$found-something">
<stx:message>
<stx:value-of select="$pd-count"/>
<stx:text>/</stx:text>
<stx:value-of select="$pnd-count"/>
</stx:message>
<stx:text>
</stx:text>
<page>
<title><stx:value-of select="$page-title"/></title>
<id><stx:value-of select="$page-id"/></id>
<stx:text>
</stx:text>
<revision>
<parsed>
<stx:process-buffer name="parsed" group="copy"/>
</parsed>
<stx:text>
</stx:text>
</revision>
</page>
<stx:text>
</stx:text>
</stx:if>
</stx:if>
</stx:template>
<stx:group name="copy">
<stx:template match="*">
<stx:element name="{name(.)}">
<stx:process-attributes/>
<stx:process-children/>
</stx:element>
</stx:template>
<stx:template match="@*">
<stx:attribute name="{name(.)}" select="."/>
</stx:template>
<stx:template match="text()">
<stx:value-of select="."/>
</stx:template>
</stx:group>
<stx:procedure name="template">
<stx:param name="content" required="yes"/>
<stx:if test="starts-with($content,'PND')">
<stx:assign name="pnd-count" select="$pnd-count+1"/>
<stx:assign name="found-something" select="true()"/>
<template name="PND">
<param>
<stx:value-of select="normalize-space(substring-after($content,'|'))"/>
</param>
</template>
</stx:if>
<stx:else>
<stx:if test="starts-with($content,'Persondata')">
<stx:assign name="pd-count" select="$pd-count+1"/>
<stx:assign name="found-something" select="true()"/>
<template name="Persondata">
<stx:call-procedure name="Persondata">
<stx:with-param name="text"
select="normalize-space(substring-after($content,'|'))"/>
</stx:call-procedure>
</template>
</stx:if>
</stx:else>
</stx:procedure>
<stx:procedure name="Persondata">
<stx:param name="text"/>
<stx:variable name="tokens"/>
<stx:while test="string-length($text) > 0">
<stx:variable name="before" select="substring-before($text,'|')"/>
<stx:if test="not($before)">
<stx:assign name="before" select="$text"/>
</stx:if>
<stx:assign name="tokens" select="($tokens, $before)"/>
<stx:assign name="text" select="substring-after($text,'|')"/>
</stx:while>
<stx:variable name="parameter"/>
<stx:variable name="value"/>
<stx:for-each-item name="token" select="$tokens">
<stx:variable name="name" select="normalize-space(substring-before($token,'='))"/>
<stx:if test="$name">
<stx:if test="$parameter">
<param name="{$parameter}">
<stx:value-of select="normalize-space($value)"/>
</param>
</stx:if>
<stx:assign name="parameter" select="$name"/>
<stx:assign name="value" select="substring-after($token,'=')"/>
</stx:if>
<stx:else>
<stx:assign name="value" select="concat($value,'|',$token)"/>
</stx:else>
</stx:for-each-item>
<stx:if test="$parameter">
<param name="{$parameter}">
<stx:value-of select="normalize-space($value)"/>
</param>
</stx:if>
</stx:procedure>
</stx:transform>

