💾 Archived View for gems.geminet.nl › avr › rss2atom.pl captured on 2023-01-29 at 02:51:45.
-=-=-=-=-=-=-
#!/usr/bin/perl -w # # Search for the "[TODO]" strings to make your changes. # use warnings; use strict; ###use criticism 'brutal'; use XML::RSS; use XML::Atom::Entry; use LWP::Simple; my $url = shift @ARGV || die "Need to pass a URL as first argument"; my $content = ''; if ( open( my $fh, '<', '.rss2atom-cache' ) ) { while (<$fh>) { $content .= $_; } close($fh); } else { $content = get($url) or die "get: Cannot get $url\n"; open( my $fh, '>', '.rss2atomcache' ) || warn "open: Cannot write cache ($!)\n"; print $fh $content; close($fh); } my $rss = new XML::RSS(); $rss->parse($content); my $feed = XML::Atom::Feed->new(); my $blogId = "[TODO]"; # put your blog ID here my $dt = DateTime->now(); my $author = XML::Atom::Person->new; $author->name('[TODO]'); # your name here $author->email('[TODO]'); # your email address my $num_posts = 0; foreach my $item ( @{ $rss->{'items'} } ) { my $id = make_id(); my $entry = XML::Atom::Entry->new(); $entry->id("\ntag:blogger.com,1999:blog-$blogId.post-$id"); $entry->published( $dt->ymd('-') . "T" . $dt->hms . '.001-08:00' ); $entry->updated( $entry->published ); $entry->category( { scheme => 'http://schemas.google.com/g/2005#kind', term => 'http://schemas.google.com/blogger/2008/kind#post' } ); $entry->title( $item->{'title'}, { type => 'text' } ); $entry->content( $item->{'description'}, { type => 'html' } ); $entry->add_link( make_link( { rel => 'replies', type => 'application/atom+xml', href => "http://whatswhat-fashion-news.blogspot.com/feeds/$id/comments/default", title => 'Post Comments', } ) ); $entry->add_link( make_link( { rel => 'replies', type => 'text/html', href => $item->{'link'} . "#comment-form", title => '0 Comments' } ) ); $entry->add_link( make_link( { rel => 'edit', type => 'application/atom+xml', # [TODO] fix this URL href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id", } ) ); $entry->add_link( make_link( { rel => 'self', type => 'application/atom+xml', # [TODO] fix this URL href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id", } ) ); $entry->add_link( make_link( { href => $item->{'link'}, rel => 'alternate', type => 'text/html', title => $item->{'title'}, } ) ); $entry->author($author); $feed->add_entry($entry); $dt->subtract( days => 7 ); $num_posts++; } print STDERR "$num_posts posts\n"; # This is the ugly part. A bunch of regex's to format the XML in the very # perverse and unusual way Blogger seemed to need it. (Kids -- don't use # regex to process XML!) # my $xml = $feed->as_xml; $xml =~ s/^\s+//gm; $xml =~ s/[\n\r]+//gs; $xml =~ s/issued>/published>/gs; $xml =~ s/modified>/updated>/gs; $xml =~ s.</entry>.<thr:total>0</thr:total></entry>.gs; $xml =~ s/^.*?<entry>/<entry>/; $xml =~ s:</feed>::; $xml =~ s: mode="xml"::; $xml =~ s:":':g; $xml =~ s:(<content[^>]+>)(.*?)(</content>):&fix_content_tag($1, $2, $3):ge; open( my $fh, '<', "blogger-import-master.xml" ) || die "open: Cannot read master ($!)\n"; my $master = ''; while (<$fh>) { $master .= $_; } close($fh); $master =~ s/<!-- INSERT HERE -->/$xml/; print $master; sub fix_content_tag { my ( $begin, $html, $end ) = @_; $html =~ s/</</g; $html =~ s/>/>/g; return $begin . $html . $end; } sub make_link { my $hash = shift; my $link = XML::Atom::Link->new(); foreach my $k ( keys %{$hash} ) { $link->$k( $hash->{$k} ); } return $link; } sub make_id { # Classic ID: 818 439 446 451 821 498 8; # 999 439 446 451 821 498 8 my $id = ''; for ( my $i = 0 ; $i < 6 ; $i++ ) { $id .= 100 + int( rand(900) ); } $id .= 1 + int( rand(9) ); return $id; }