This file is indexed.

/usr/share/pyshared/planet/opml.py is in planet-venus 0~bzr116-1.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python

from xml.sax import ContentHandler, make_parser, SAXParseException
from xml.sax.xmlreader import InputSource
from sgmllib import SGMLParser
from cStringIO import StringIO
from ConfigParser import ConfigParser
from htmlentitydefs import entitydefs
import re

# input = opml, output = ConfigParser
def opml2config(opml, config=None):

    if hasattr(opml, 'read'):
        opml = opml.read()

    if not config:
        config = ConfigParser()

    opmlParser = OpmlParser(config)

    try:
        # try SAX
        source = InputSource()
        source.setByteStream(StringIO(opml))
        parser = make_parser()
        parser.setContentHandler(opmlParser)
        parser.parse(source)
    except SAXParseException:
        # try as SGML
        opmlParser.feed(opml)

    return config

# Parse OPML via either SAX or SGML
class OpmlParser(ContentHandler,SGMLParser):
    entities = re.compile('&(#?\w+);')

    def __init__(self, config):
        ContentHandler.__init__(self)
        SGMLParser.__init__(self)
        self.config = config

    def startElement(self, name, attrs):

        # we are only looking for data in 'outline' nodes.
        if name != 'outline': return

        # A type of 'rss' is meant to be used generically to indicate that
        # this is an entry in a subscription list, but some leave this
        # attribute off, and others have placed 'atom' in here
        if attrs.has_key('type'):
            if attrs['type'] == 'link' and not attrs.has_key('url'):
                # Auto-correct WordPress link manager OPML files
                attrs = dict(attrs.items())
                attrs['type'] = 'rss'
            if attrs['type'].lower() not in['rss','atom']: return

        # The feed itself is supposed to be in an attribute named 'xmlUrl'
        # (note the camel casing), but this has proven to be problematic,
        # with the most common misspelling being in all lower-case
        if not attrs.has_key('xmlUrl') or not attrs['xmlUrl'].strip():
            for attribute in attrs.keys():
                if attribute.lower() == 'xmlurl' and attrs[attribute].strip():
                    attrs = dict(attrs.items())
                    attrs['xmlUrl'] = attrs[attribute]
                    break
            else:
                return

        # the text attribute is nominally required in OPML, but this
        # data is often found in a title attribute instead
        if not attrs.has_key('text') or not attrs['text'].strip():
            if not attrs.has_key('title') or not attrs['title'].strip(): return
            attrs = dict(attrs.items())
            attrs['text'] = attrs['title']

        # if we get this far, we either have a valid subscription list entry,
        # or one with a correctable error.  Add it to the configuration, if
        # it is not already there.
        xmlUrl = attrs['xmlUrl']
        if not self.config.has_section(xmlUrl):
            self.config.add_section(xmlUrl)
            self.config.set(xmlUrl, 'name', self.unescape(attrs['text']))

    def unescape(self, text):
        parsed = self.entities.split(text)

        for i in range(1,len(parsed),2):

            if parsed[i] in entitydefs.keys():
                # named entities
                codepoint=entitydefs[parsed[i]]
                match=self.entities.match(codepoint)
                if match:
                    parsed[i]=match.group(1)
                else:
                    parsed[i]=unichr(ord(codepoint))

                # numeric entities
                if parsed[i].startswith('#'):
                    if parsed[i].startswith('#x'):
                        parsed[i]=unichr(int(parsed[i][2:],16))
                    else:
                        parsed[i]=unichr(int(parsed[i][1:]))

        return u''.join(parsed).encode('utf-8')
    # SGML => SAX
    def unknown_starttag(self, name, attrs):
        attrs = dict(attrs)
        for attribute in attrs:
            try:
                attrs[attribute] = attrs[attribute].decode('utf-8')
            except:
                work = attrs[attribute].decode('iso-8859-1')
                work = u''.join([c in cp1252 and cp1252[c] or c for c in work])
                attrs[attribute] = work
        self.startElement(name, attrs)

# http://www.intertwingly.net/stories/2004/04/14/i18n.html#CleaningWindows
cp1252 = {
  unichr(128): unichr(8364), # euro sign
  unichr(130): unichr(8218), # single low-9 quotation mark
  unichr(131): unichr( 402), # latin small letter f with hook
  unichr(132): unichr(8222), # double low-9 quotation mark
  unichr(133): unichr(8230), # horizontal ellipsis
  unichr(134): unichr(8224), # dagger
  unichr(135): unichr(8225), # double dagger
  unichr(136): unichr( 710), # modifier letter circumflex accent
  unichr(137): unichr(8240), # per mille sign
  unichr(138): unichr( 352), # latin capital letter s with caron
  unichr(139): unichr(8249), # single left-pointing angle quotation mark
  unichr(140): unichr( 338), # latin capital ligature oe
  unichr(142): unichr( 381), # latin capital letter z with caron
  unichr(145): unichr(8216), # left single quotation mark
  unichr(146): unichr(8217), # right single quotation mark
  unichr(147): unichr(8220), # left double quotation mark
  unichr(148): unichr(8221), # right double quotation mark
  unichr(149): unichr(8226), # bullet
  unichr(150): unichr(8211), # en dash
  unichr(151): unichr(8212), # em dash
  unichr(152): unichr( 732), # small tilde
  unichr(153): unichr(8482), # trade mark sign
  unichr(154): unichr( 353), # latin small letter s with caron
  unichr(155): unichr(8250), # single right-pointing angle quotation mark
  unichr(156): unichr( 339), # latin small ligature oe
  unichr(158): unichr( 382), # latin small letter z with caron
  unichr(159): unichr( 376)} # latin capital letter y with diaeresis

if __name__ == "__main__":
    # small main program which converts OPML into config.ini format
    import sys, urllib
    config = ConfigParser()
    for opml in sys.argv[1:]:
        opml2config(urllib.urlopen(opml), config)
    config.write(sys.stdout)