This file is indexed.

/usr/bin/samizdat-import-feeds is in samizdat 0.7.0-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/ruby
#
# Samizdat syndication feeds updater
#
#   Copyright (c) 2002-2011  Dmitry Borodaenko <angdraug@debian.org>
#
#   This program is free software.
#   You can distribute/modify this program under the terms of
#   the GNU General Public License version 3 or later.
#
# vim: et sw=2 sts=2 ts=8 tw=0

require 'samizdat'
require 'samizdat/helpers/syndication_helper'
require 'rss/1.0'
require 'rss/2.0'
require 'rss/dublincore'
require 'rss/maker'
require 'timeout'

class FeedUpdaterError < RuntimeError; end

class FeedUpdater
  include SyndicationHelper

  CONNECTION_TIMEOUT = 60   # 1 minute

  def initialize
    @sites = SamizdatSites.instance.all
  end

  # cycle through all sites and update all configured imported feeds
  #
  # configuration:
  #
  #   import_feeds:
  #     name:
  #       url: http://example.com/feed.rss
  #       limit: 5
  #
  # or, to use limit:page limit:
  #
  #   import_feeds:
  #     name: http://example.com/feed.rss
  #
  # each feed is only fetched once per URL across all sites
  #
  def run
    feeds = {}

    @sites.each do |site_name|
      @site = Site.new(site_name)

      # Only sites with a shared remote cache can import feeds.
      # See Site#initialize().
      cache_uri = config.cache or next

      each_import_feed do |feed_name, url, limit|
        # fetch once per url across shared caches of all sites
        (feeds[url] ||= {})[cache_uri] ||= shared_cache if url
      end
    end

    feeds.each do |url, caches|
      begin
        update_feed(url, caches)
      rescue FeedUpdaterError => error
        log error
        next   # just ignore the feed if it can't be updated
      end
    end

    flush_pages_with_syndication(feeds)
  end

  private

  def update_feed(url, caches)
    response = nil   # scope fix
    begin
      Timeout.timeout(CONNECTION_TIMEOUT) do
        response = open(url) {|file| file.read }
      end
    rescue => error
      raise FeedUpdaterError, "Failed to fetch feed from #{url}: " + error.message
    end

    begin
      feed = parse_feed(response)
    rescue => error
      raise FeedUpdaterError, "Failed to parse feed from #{url}: " + error.message
    end

    caches.each_value do |c|
      c['samizdat/*/import_feeds/' + url] = feed   # '*' to avoid clashes with site_name
    end
  end

  def parse_feed(response)
    # Remove tag section not needed and known to be buggy for invalid "mn" type
    # URI http://usefulinc.com/rss/manifest/
    #
    # fixme: explain this better
    #
    if response =~ %r{http://usefulinc.com/rss/manifest/}
      response.sub!(%r{<rdf:Description(.*\n)*?.*mn:channels.*(.*\n)*?.*</rdf:Description>}, '')
    end

    begin
      rss = RSS::Parser.parse(response)   # try RSS 1.0 compliant parser first
    rescue RSS::Error
      rss = RSS::Parser.parse(response, false)   # fall back to non RSS 1.0 compliant
    end

    rss.respond_to?(:items) or raise FeedUpdaterError, "Failed to parse RSS"

    # don't store more than limit:page items
    rss.items[0, limit_page].collect {|item|
      {
        'link' => item.link.strip,
        'title' => item.title.strip,
        'date' => item.date
      }
    }
  end

  def flush_pages_with_syndication(feeds)
    # make sure we only flush each affected shared cache once
    all_caches = {}
    feeds.each_value do |caches|
      all_caches.merge!(caches)
    end

    all_caches.each_value do |c|
      c.flush(%r{\Asamizdat/[^/]+/index/})
    end
  end
end

FeedUpdater.new.run