User:AmiDaniel/VP/RSS source
From Wikipedia, the free encyclopedia
< User:AmiDaniel | VP
The following is the method being used in VandalProof version 1.3 to retrieve items from the recent changes RSS feed, and as I've had many requests for it, I decided to copy it here. This method will work for an RSS RC feed on any foundation Wiki regardless of language, etc.
To use it, you will need to copy the following code into a module. Then you will need to retrieve the innerHTML of the RSS feed you wish to scrape (on en.wikipedia, it can be found at http://en.wikipedia.org/w/index.php?title=Special:Recentchanges&feed=rss). Then pass the innerHTML to SplitItems (like so: SplitItems WB_RSS.Document.body.innerHTML). That will then populate the RSSItems variable with every RC item it finds in the feed.
Option Explicit
Public Type RSSItem
BodyContent As String
sUser As String
sArticleName As String
sPageAddress As String
sSummary As String
sAdded As String
sRemoved As String
sMatches As String
sNewTime As String
sOldTime As String
End Type
Public RSSItems() As RSSItem
Public Sub SplitItems(ByVal str$)
Dim i%
On Error Resume Next
i = UBound(RSSItems)
If Err Then
Err.Clear
ReDim RSSItems(0)
End If
On Error GoTo 0
Do Until InStr(1, LCase(str), "<item>") = 0
ReDim Preserve RSSItems(i)
With RSSItems(i)
.BodyContent = Left(str, InStr(1, LCase(str), "<item>") - 1)
.BodyContent = FindAndReplace(.BodyContent, """/w", """" & GlVars.Root & "/w")
.sArticleName = BetwixtStr(.BodyContent, "<title>", "</title>")
.sPageAddress = BetwixtStr(.BodyContent, "<link>", "</link>")
.sUser = BetwixtStr(.BodyContent, "<dc:creator>", "</dc:creator>")
.sNewTime = BetwixtStr(.BodyContent, "<pubDate>", "</pubdate>")
.sSummary = BetwixtStr(.BodyContent, "<p>", "</p>")
.sSummary = FindAndReplace(.sSummary, "<span class=autocomment>", "/*")
.sSummary = FindAndReplace(.sSummary, "</span>", "*/")
.sAdded = GetAdded(.BodyContent)
.sRemoved = GetRemoved(.BodyContent)
.BodyContent = FindAndReplace(.BodyContent, "<link>" & .sPageAddress & "</link>", "<H2><A href=""" & .sPageAddress & """>" & .sArticleName & "</A> (<A href=""" & _
.sPageAddress & "?diff=cur"">last diff</A>) (<A href=""" & GlVars.Root & "/w/index.php?title=" & Trim(StrtoHTML(.sArticleName)) & "&action=history"">hist</A>)</H2>")
If .sArticleName = "" & GlVars.SpecialText & "Log/newusers" Then
.BodyContent = .BodyContent & Chr(13) & Chr(10) & Chr(13) & Chr(10) & "<H3><A href=""$bcur:indef-unsername"">Block Username</A></H3>"
End If
End With
str = Right(str, Len(str) - InStr(1, LCase(str), "<item>") - Len("<item>") + 1)
'If InStr(1, lastinstr, LCase(str), "<item>") > 0 Then lastinstr = InStr(1, lastinstr, LCase(str), "<item>")
i = i + 1
Loop
End Sub
Public Function BetwixtStr$(ByVal sIn$, ByVal sFirst$, ByVal sLast$)
If InStr(sIn, sLast) Then
BetwixtStr = Left(sIn, InStrRev(sIn, sLast) - 1)
If InStr(BetwixtStr, sFirst) Then
BetwixtStr = Right(BetwixtStr, Len(BetwixtStr) - InStr(BetwixtStr, sFirst) - Len(sFirst) + 1)
End If
End If
End Function
Public Function GetAdded$(ByVal sIn$)
Dim fields
Debug.Print
Debug.Print sIn
If InStr(1, sIn, "<p><b>New page</b></p>") Then GetAdded = "##NEWPAGE##"
Do Until InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") = 0
sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") - Len("<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") + 1)
GetAdded = GetAdded & sIn
GetAdded = Left(GetAdded, InStr(1, LCase(GetAdded), "</td>") - 1)
Loop
GetAdded = FindAndReplace(GetAdded, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "")
GetAdded = FindAndReplace(GetAdded, "</span>", "")
GetAdded = FindAndReplace(GetAdded, "</sup>", "")
End Function
Public Function GetRemoved$(ByVal sIn$)
Dim fields
'If InStr(1, sIn, "<p><b>New page</b></p>") Then GetRemoved = "##NEWPAGE##"
Do Until InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) = 0
sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) - Len("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">") + 1)
GetRemoved = GetRemoved & sIn
GetRemoved = Left(GetRemoved, InStr(1, LCase(GetRemoved), "</td>") - 1)
Loop
GetRemoved = FindAndReplace(GetRemoved, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "")
GetRemoved = FindAndReplace(GetRemoved, "</span>", "")
GetRemoved = FindAndReplace(GetRemoved, "</sup>", "")
End Function
Function FindAndReplace(ByVal strIn$, ByVal strFind$, ByVal strReplace$)
Dim lastInstr%, lastInstr_New%
lastInstr = 1
Do Until InStr(lastInstr, strIn, strFind) = 0
lastInstr_New = InStr(lastInstr, strIn, strFind)
strIn = Left(strIn, InStr(lastInstr, strIn, strFind) - 1) & strReplace & Right(strIn, Len(strIn) - InStr(lastInstr, strIn, strFind) - Len(strFind) + 1)
lastInstr = lastInstr_New
Loop
FindAndReplace = strIn
End Function

