This file is indexed.

/usr/share/avant-window-navigator/applets/comics/feed/basic.py is in awn-applet-comics 0.4.1~bzr1507-0ubuntu7.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# -*- coding: utf-8 -*-

# Copyright (c) 2008 Moses Palmér
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.


import gobject
import re
import urllib
import urlparse
import threading
import htmlentitydefs
import httplib
import time

from settings import Settings

NAME = 'name'
URL = 'url'
PLUGIN = 'plugin'
TITLE = 'title'
LINK = 'link'
DATE = 'date'

month = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04',
         'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08',
         'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}


class Feed(gobject.GObject):
    """A feed class."""

    DOWNLOAD_OK = 0
    DOWNLOAD_FAILED = -1
    DOWNLOAD_NOT_FEED = -2

    # Convenient regular expressions
    IMG_RE = re.compile('(<img .*?>)', re.IGNORECASE)
    IMG_SRC_RE = re.compile('<img .*?src=["\'](.*?)["\'].*?>', re.IGNORECASE)

    __gsignals__ = {
        'updated': (gobject.SIGNAL_RUN_FIRST, None, (int,)),
        }

    def make_absolute_url(self, url, from_doc):
        """Convert a relative URL to an absolute one."""
        if url is None or len(url) == 0:
            return None
        parsed = (urlparse.urlparse(url), urlparse.urlparse(from_doc))
        if len(parsed[0][1]) > 0:
            return url
        elif parsed[0][2][0] == '/':
            return parsed[1][0] + '://' + parsed[1][1] + parsed[0][2]
        else:
            # TODO this didn't work for some (or all?) urls,
            # like http://www.gwscomic.com - test more thoroughly whether
            # there should be an elif for that
            #return parsed[1][0] + '://' + parsed[1][1] \
            #    + parsed[1][2].rsplit('/', 1)[0] + parsed[0][2]
            return parsed[1][0] + '://' + parsed[1][1] \
                + '/' + parsed[0][2]

    def get_timestamp_for_url(self, url):
        """Request the "Last-Modified" header from url without downloading
        any data and return a timestamp or None"""
        if url is None or len(url) == 0:
            return None
        p = urlparse.urlparse(url)
        try:
            if p.scheme == 'http':
                conn = httplib.HTTPConnection(p.netloc)
            elif p.scheme == 'https':
                conn = httplib.HTTPSConnection(p.netloc)
            else:
                raise Exception
            conn.request("HEAD", p.path)
            res = conn.getresponse()
            htime = res.getheader("Last-Modified")
        except Exception:
            return None

        # Based on a posting by Philip Semanchuk, Nov 2009 on
        # http://mail.python.org/mailman/listinfo/python-list

        try:
            if not htime.endswith("GMT"):
                # ASCTIME format
                # Work around locale problems: remove weedays,
                # convert month names to decimals
                htime = htime[4:]
                htime = month[htime[0:3]] + htime[3:]
                timestamp = time.strptime(htime, "%m %d %H:%M:%S %Y")
            else:
                # RFC 850 Format
                if "-" in htime:
                    htime = htime.split(" ", 1)[1]
                    htime = htime[:3] + month[htime[3:6]] + htime[6:]
                    timestamp = time.strptime(htime, "%d-%m-%y %H:%M:%S GMT")
                else:
                    # RFC 1123 Format
                    htime = htime[5:]
                    htime = htime[:3] + month[htime[3:6]] + htime[6:]
                    timestamp = time.strptime(htime, "%d %m %Y %H:%M:%S GMT")
            timestamp = time.mktime(timestamp)
        except Exception, err:
            print "Comics!: %s" % err
            return None
        return timestamp

    def unescape_html(self, text):
        """Taken from Fredrik Lundh -
        http://effbot.org/zone/re-sub.htm#unescape-html"""
        def fixup(m):
            text = m.group(0)
            if text[:2] == "&#":
                # character reference
                try:
                    if text[:3] == "&#x":
                        return unichr(int(text[3:-1], 16))
                    else:
                        return unichr(int(text[2:-1]))
                except ValueError:
                    pass
            else:
                # named entity
                try:
                    text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
                except KeyError:
                    pass
            return text  # leave as is
        return re.sub("&#?\w+;", fixup, text)

    def __init__(self, settings=None, url=None):
        """Initialize a feed."""
        super(Feed, self).__init__()

        if settings is None:
            self.is_query = True
            settings = Settings()
            settings['name'] = None
            settings['url'] = url
        else:
            self.is_query = False
        self.filename = settings.filename
        self.description = settings.get_string('description', '')
        self.name = settings.get_string('name', '---')
        self.url = settings.get_string('url')
        self.timeout = settings.get_int('timeout', 20)
        self.items = {}
        self.newest = 0.0
        self.status = None
        self.__lock = threading.Lock()
        self.__timeout = gobject.timeout_add(self.timeout * 60 * 1000,
            self.on_timeout)

    def run(self):
        """The thread body."""
        if not self.__lock.acquire(False):
            return
        old_status = self.status
        self.updated = False
        try:
            filename, headers = urllib.urlretrieve(self.url)
            self.status = self.parse_file(filename)
            # If the status has changed, the feed is considered updated
            if self.updated or old_status != self.status:
                gobject.idle_add(gobject.GObject.emit, self, 'updated',
                    self.status)
        except IOError:  # Network is down
            self.status = Feed.DOWNLOAD_FAILED
        except Exception, err:
            self.status = Feed.DOWNLOAD_FAILED
            print "Comics!: Parsing error: %s" % err
        self.__lock.release()

    def update(self):
        """Reload the feed."""
        thread = threading.Thread(target=self.run, name=self.name)
        thread.setDaemon(True)
        thread.start()

    def parse_file(self, o):
        """This method is called when the file pointer to by settings.url has
        been correctly downloaded.

        It returns an error code."""
        raise NotImplementedError()

    def on_timeout(self):
        self.update()

        # Return True to keep the timer running
        return True