/usr/share/avant-window-navigator/applets/comics/feed/basic.py is in awn-applet-comics 0.4.1~bzr1507-0ubuntu7.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | # -*- coding: utf-8 -*-
# Copyright (c) 2008 Moses Palmér
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
import gobject
import re
import urllib
import urlparse
import threading
import htmlentitydefs
import httplib
import time
from settings import Settings
NAME = 'name'
URL = 'url'
PLUGIN = 'plugin'
TITLE = 'title'
LINK = 'link'
DATE = 'date'
month = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04',
'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08',
'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}
class Feed(gobject.GObject):
"""A feed class."""
DOWNLOAD_OK = 0
DOWNLOAD_FAILED = -1
DOWNLOAD_NOT_FEED = -2
# Convenient regular expressions
IMG_RE = re.compile('(<img .*?>)', re.IGNORECASE)
IMG_SRC_RE = re.compile('<img .*?src=["\'](.*?)["\'].*?>', re.IGNORECASE)
__gsignals__ = {
'updated': (gobject.SIGNAL_RUN_FIRST, None, (int,)),
}
def make_absolute_url(self, url, from_doc):
"""Convert a relative URL to an absolute one."""
if url is None or len(url) == 0:
return None
parsed = (urlparse.urlparse(url), urlparse.urlparse(from_doc))
if len(parsed[0][1]) > 0:
return url
elif parsed[0][2][0] == '/':
return parsed[1][0] + '://' + parsed[1][1] + parsed[0][2]
else:
# TODO this didn't work for some (or all?) urls,
# like http://www.gwscomic.com - test more thoroughly whether
# there should be an elif for that
#return parsed[1][0] + '://' + parsed[1][1] \
# + parsed[1][2].rsplit('/', 1)[0] + parsed[0][2]
return parsed[1][0] + '://' + parsed[1][1] \
+ '/' + parsed[0][2]
def get_timestamp_for_url(self, url):
"""Request the "Last-Modified" header from url without downloading
any data and return a timestamp or None"""
if url is None or len(url) == 0:
return None
p = urlparse.urlparse(url)
try:
if p.scheme == 'http':
conn = httplib.HTTPConnection(p.netloc)
elif p.scheme == 'https':
conn = httplib.HTTPSConnection(p.netloc)
else:
raise Exception
conn.request("HEAD", p.path)
res = conn.getresponse()
htime = res.getheader("Last-Modified")
except Exception:
return None
# Based on a posting by Philip Semanchuk, Nov 2009 on
# http://mail.python.org/mailman/listinfo/python-list
try:
if not htime.endswith("GMT"):
# ASCTIME format
# Work around locale problems: remove weedays,
# convert month names to decimals
htime = htime[4:]
htime = month[htime[0:3]] + htime[3:]
timestamp = time.strptime(htime, "%m %d %H:%M:%S %Y")
else:
# RFC 850 Format
if "-" in htime:
htime = htime.split(" ", 1)[1]
htime = htime[:3] + month[htime[3:6]] + htime[6:]
timestamp = time.strptime(htime, "%d-%m-%y %H:%M:%S GMT")
else:
# RFC 1123 Format
htime = htime[5:]
htime = htime[:3] + month[htime[3:6]] + htime[6:]
timestamp = time.strptime(htime, "%d %m %Y %H:%M:%S GMT")
timestamp = time.mktime(timestamp)
except Exception, err:
print "Comics!: %s" % err
return None
return timestamp
def unescape_html(self, text):
"""Taken from Fredrik Lundh -
http://effbot.org/zone/re-sub.htm#unescape-html"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
def __init__(self, settings=None, url=None):
"""Initialize a feed."""
super(Feed, self).__init__()
if settings is None:
self.is_query = True
settings = Settings()
settings['name'] = None
settings['url'] = url
else:
self.is_query = False
self.filename = settings.filename
self.description = settings.get_string('description', '')
self.name = settings.get_string('name', '---')
self.url = settings.get_string('url')
self.timeout = settings.get_int('timeout', 20)
self.items = {}
self.newest = 0.0
self.status = None
self.__lock = threading.Lock()
self.__timeout = gobject.timeout_add(self.timeout * 60 * 1000,
self.on_timeout)
def run(self):
"""The thread body."""
if not self.__lock.acquire(False):
return
old_status = self.status
self.updated = False
try:
filename, headers = urllib.urlretrieve(self.url)
self.status = self.parse_file(filename)
# If the status has changed, the feed is considered updated
if self.updated or old_status != self.status:
gobject.idle_add(gobject.GObject.emit, self, 'updated',
self.status)
except IOError: # Network is down
self.status = Feed.DOWNLOAD_FAILED
except Exception, err:
self.status = Feed.DOWNLOAD_FAILED
print "Comics!: Parsing error: %s" % err
self.__lock.release()
def update(self):
"""Reload the feed."""
thread = threading.Thread(target=self.run, name=self.name)
thread.setDaemon(True)
thread.start()
def parse_file(self, o):
"""This method is called when the file pointer to by settings.url has
been correctly downloaded.
It returns an error code."""
raise NotImplementedError()
def on_timeout(self):
self.update()
# Return True to keep the timer running
return True
|