/usr/bin/chronicle-rss-importer

#!/usr/bin/perl -w

# vim: expandtab tabstop=4

=head1 NAME

chronicle-rss-importer - Import entries from an RSS feed to chronicle

=cut

=head1 SYNOPSIS


  General Options:

   --output        Specify the directory to write entries to.
   --feed          Specify the URL of the feed.
   --sequential    Specify that entries should be numbered rather than named.

  Help Options:

   --help        Show the help information for this script.
   --manual      Read the manual for this script.
   --verbose     Show useful debugging information.

=cut


=head1 ABOUT

  Chronicle is a simple tool to convert a collection of text files,
 located within a single directory, into a blog consisting of static
 HTML files.

  This importer script will create a directory of input files from a
 given RSS feed, by downloading it and writing out each entry to a single
 text file.

  The output files will be named after the entries titles, or if
 B<--sequential> was used each entry will be numbered numerically.

=cut

=head1 AUTHOR

 Steve
 --
 http://www.steve.org.uk/

=cut

=head1 LICENSE

Copyright (c) 2007-2010 by Steve Kemp.  All rights reserved.

This module is free software;
you can redistribute it and/or modify it under
the same terms as Perl itself.
The LICENSE file contains the full text of the license.

=cut



use strict;
use warnings;

use File::Path;
use Getopt::Long;
use HTML::Entities;
use LWP;
use Pod::Usage;
use XML::RSSLite;


#
#  Configuration variables
#
my %CONFIG;


#
#  Parse command line arguments.
#
parseCommandLineArguments();


#
#  Validate any arguments.
#
validateCommandLineArguments();


#
#  Fetch the feed.
#
my $content = fetchRSSFeed( $CONFIG{ 'feed' } );


#
# Parse the feed
#
my %rssHash;
parseRSS( \%rssHash, \$content );


#
#  Now import
#
processEntries(%rssHash);


#
#  All done.
#



=begin doc

  Parse the command line arguments, if any.

=end doc

=cut

sub parseCommandLineArguments
{
    my $HELP   = 0;
    my $MANUAL = 0;

    if (
        !GetOptions(

            # Help options
            "help",    \$HELP,
            "manual",  \$MANUAL,
            "verbose", \$CONFIG{ 'verbose' },

            # General options
            "feed=s",     \$CONFIG{ 'feed' },
            "output=s",   \$CONFIG{ 'output' },
            "sequential", \$CONFIG{ 'sequential' },
        ) )
    {
        exit;
    }


    pod2usage(1) if $HELP;
    pod2usage( -verbose => 2 ) if $MANUAL;
}



=begin doc

  Ensure we received the arguments we need, and that
 those arguments look OK.

=end doc

=cut

sub validateCommandLineArguments
{

    #
    #  We need an output dir
    #
    if ( !$CONFIG{ 'output' } )
    {
        print "Output directory is mandatory.\n";
        print "Please specificy via --output=...\n";
        exit;
    }
    if ( !-d $CONFIG{ 'output' } )
    {
        $CONFIG{ 'verbose' } &&
          print "Creating output directory: $CONFIG{'output'}\n";
        mkpath( $CONFIG{ 'output' }, 0, oct(755) );
    }

    #
    #  We need a feed
    #
    if ( !$CONFIG{ 'feed' } )
    {
        print "Please specify a feed to import, via --feed=http:/....\n";
        exit;
    }

}



=begin doc

  Fetch the remote RSS feed.

=end doc

=cut

sub fetchRSSFeed
{
    my ($uri) = (@_);

    my $ua = LWP::UserAgent->new();
    $ua->timeout(10);
    $ua->agent('chronicle-importer');

    $CONFIG{ 'verbose' } && print "Fetching: $uri\n";
    my $response = $ua->get($uri);

    if ( $response->is_success )
    {
        $CONFIG{ 'verbose' } && print "\tFetched successfully\n";
        return ( $response->content() );
    }
    else
    {
        print "Failed to fetch feed: $uri\n";
        print "\n";
        print $response->message() . "\n";
        exit;
    }

}



=begin doc

  Iterate over the items in our feed and write each one out to a
 single file.

=end doc

=cut

sub processEntries
{
    my (%entries) = (@_);

    my $count = 1;

    foreach my $item ( @{ $rssHash{ 'item' } } )
    {

        #
        #  Get details from the feed.
        #
        my $title = $item->{ 'title' } || "no title";
        my $date = $item->{ 'pubDate' } || $item->{ 'dc:date' } || undef;
        my $body = $item->{ 'description' } ||
          $item->{ 'content:encoded' } ||
          undef;
        my $filename;


        #
        #  Build up a suitable filename.
        #
        if ( $CONFIG{ 'sequential' } )
        {
            $filename = $count . ".txt";
        }
        else
        {
            $filename = $title;
            $filename =~ s/[^a-z0-9]/_/gi;
            $filename .= ".txt";

        }

        #
        #  Naive expansion.
        #
        if ( $body =~ m/&lt;/ )
        {
            $body = decode_entities($body);
        }
        $filename = $CONFIG{ 'output' } . "/" . $filename;
        open( my $handle, ">", $filename ) or
          die "Failed to write to $filename - $!";
        print $handle <<EOF;
Title: $title
Date: $date

$body
EOF
        close($handle);
        $count += 1;
    }

}
chronicle 4.6-2 / usr / bin / chronicle-rss-importer