rss-prefetch
#!/usr/bin/perl
#
# rss-prefetch -- fetch rss feeds from Oddmuse pages and put them in a
# directory
#
# Copyright (C) 2005 Alex Schroeder <alex@emacswiki.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
use LWP::UserAgent;
my $verbose = 0;
sub UrlEncode {
my $str = shift;
return '' unless $str;
my @letters = split(//, $str);
my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')', '#');
foreach my $letter (@letters) {
my $pattern = quotemeta($letter);
if (not grep(/$pattern/, @safe)) {
$letter = sprintf("%%%02x", ord($letter));
}
}
return join('', @letters);
}
sub GetRaw {
my $uri = shift;
my $ua = LWP::UserAgent->new;
my $response = $ua->get($uri);
warn $response->status_line if $response->code != 200 and $verbose;
return $response->content;
}
sub ParseData {
my $data = shift;
my %result;
while ($data =~ /(\S+?): (.*?)(?=\n[^ \t]|\Z)/sg) {
my ($key, $value) = ($1, $2);
$value =~ s/\n\t/\n/g;
$result{$key} = $value;
}
return %result;
}
my @pages = qw(RecentNearChanges RecentFarChanges
RecentFarChangesEnglishOnly);
my $dir = '/org/org.emacswiki/htdocs/community/page/';
my $cache = '/org/org.emacswiki/htdocs/community/rss/';
my %done = ();
foreach my $page (@pages) {
local $/ = undef; # Read complete files
open(F, $dir . uc(substr($page, 0, 1)) . '/' . $page . '.pg')
or die "Cannot read $_ page file";
my $data = <F>;
close(F);
my %result = ParseData($data);
while ($result{text} =~ /<rss(.*?)>/sg) {
my @uris = map { s/^"?(.*?)"?$/$1/; $_; } split(' ', $1);
foreach my $uri (@uris) {
next if $uri =~ /^\d+$/;
next if $done{$uri};
$done{$uri} = 1;
my $rss = GetRaw($uri) or $verbose and warn "No data from $uri";
next unless $rss;
my $name = UrlEncode($uri);
open(F, "> $cache$name")
or $verbose and warn "Cannot write $uri page file";
print F $rss;
close F;
}
}
};