💾 Archived View for gems.geminet.nl › avr › rss2atom.pl captured on 2023-01-29 at 02:51:45.

View Raw

More Information

-=-=-=-=-=-=-

#!/usr/bin/perl -w
#
# Search for the "[TODO]" strings to make your changes.
#

use warnings;
use strict;
###use criticism 'brutal';

use XML::RSS;
use XML::Atom::Entry;
use LWP::Simple;


my $url = shift @ARGV
  || die "Need to pass a URL as first argument";

my $content = '';
if ( open( my $fh, '<', '.rss2atom-cache' ) ) {
	while (<$fh>) {
		$content .= $_;
	}
	close($fh);

}
else {
	$content = get($url) or die "get: Cannot get $url\n";
	open( my $fh, '>', '.rss2atomcache' )
	  || warn "open: Cannot write cache ($!)\n";
	print $fh $content;
	close($fh);
}

my $rss = new XML::RSS();
$rss->parse($content);

my $feed = XML::Atom::Feed->new();

my $blogId = "[TODO]"; # put your blog ID here
my $dt     = DateTime->now();

my $author = XML::Atom::Person->new;
$author->name('[TODO]');   # your name here
$author->email('[TODO]');  # your email address
my $num_posts = 0;
foreach my $item ( @{ $rss->{'items'} } ) {
	my $id = make_id();

	my $entry = XML::Atom::Entry->new();
	$entry->id("\ntag:blogger.com,1999:blog-$blogId.post-$id");
	$entry->published( $dt->ymd('-') . "T" . $dt->hms . '.001-08:00' );
	$entry->updated( $entry->published );
	$entry->category(
		{
			scheme => 'http://schemas.google.com/g/2005#kind',
			term   => 'http://schemas.google.com/blogger/2008/kind#post'
		}
	);

	$entry->title( $item->{'title'}, { type => 'text' } );
	$entry->content( $item->{'description'}, { type => 'html' } );

	$entry->add_link(
		make_link(
			{
				rel  => 'replies',
				type => 'application/atom+xml',
				href => "http://whatswhat-fashion-news.blogspot.com/feeds/$id/comments/default",
				title => 'Post Comments',
			}
		)
	);
	$entry->add_link(
		make_link(
			{
				rel   => 'replies',
				type  => 'text/html',
				href  => $item->{'link'} . "#comment-form",
				title => '0 Comments'
			}
		)
	);
	$entry->add_link(
		make_link(
			{
				rel  => 'edit',
				type => 'application/atom+xml',
                                # [TODO] fix this URL
				href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id",
			}
		)
	);
	$entry->add_link(
		make_link(
			{
				rel  => 'self',
				type => 'application/atom+xml',
                                # [TODO] fix this URL
				href => "http://whatswhat-fashion-news.blogspot.com/feeds/$blogId/posts/default/$id",
			}
		)
	);
	$entry->add_link(
		make_link(
			{
				href  => $item->{'link'},
				rel   => 'alternate',
				type  => 'text/html',
				title => $item->{'title'},
			}
		)
	);

	$entry->author($author);

	$feed->add_entry($entry);

	$dt->subtract( days => 7 );
	$num_posts++;
}
print STDERR "$num_posts posts\n";

# This is the ugly part. A bunch of regex's to format the XML in the very
# perverse and unusual way Blogger seemed to need it. (Kids -- don't use
# regex to process XML!)
#
my $xml = $feed->as_xml;
$xml =~ s/^\s+//gm;
$xml =~ s/[\n\r]+//gs;
$xml =~ s/issued>/published>/gs;
$xml =~ s/modified>/updated>/gs;
$xml =~ s.</entry>.<thr:total>0</thr:total></entry>.gs;
$xml =~ s/^.*?<entry>/<entry>/;
$xml =~ s:</feed>::;
$xml =~ s: mode="xml"::;
$xml =~ s:":':g;
$xml =~ s:(<content[^>]+>)(.*?)(</content>):&fix_content_tag($1, $2, $3):ge;

open( my $fh, '<', "blogger-import-master.xml" )
  || die "open: Cannot read master ($!)\n";
my $master = '';
while (<$fh>) {
	$master .= $_;
}
close($fh);

$master =~ s/<!-- INSERT HERE -->/$xml/;

print $master;


sub fix_content_tag
{
    my ( $begin, $html, $end ) = @_;
    $html =~ s/</&lt;/g;
    $html =~ s/>/&gt;/g;
    return $begin . $html . $end;
}

sub make_link 
{
	my $hash = shift;

	my $link = XML::Atom::Link->new();
	foreach my $k ( keys %{$hash} ) {
		$link->$k( $hash->{$k} );
	}
	return $link;
}

sub make_id 
{
	# Classic ID: 818 439 446 451 821 498 8;
	#             999 439 446 451 821 498 8
	my $id = '';
	for ( my $i = 0 ; $i < 6 ; $i++ ) {
		$id .= 100 + int( rand(900) );
	}
	$id .= 1 + int( rand(9) );
	return $id;
}