/usr/share/pyshared/scrapy/statscol.py is in python-scrapy 0.14.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | """
Scrapy extension for collecting scraping stats
"""
import pprint
from scrapy.xlib.pydispatch import dispatcher
from scrapy.signals import stats_spider_opened, stats_spider_closing, \
stats_spider_closed
from scrapy.utils.signal import send_catch_log
from scrapy import signals
from scrapy import log
from scrapy.conf import settings
class StatsCollector(object):
def __init__(self):
self._dump = settings.getbool('STATS_DUMP')
self._stats = {None: {}} # None is for global stats
def get_value(self, key, default=None, spider=None):
return self._stats[spider].get(key, default)
def get_stats(self, spider=None):
return self._stats[spider]
def set_value(self, key, value, spider=None):
self._stats[spider][key] = value
def set_stats(self, stats, spider=None):
self._stats[spider] = stats
def inc_value(self, key, count=1, start=0, spider=None):
d = self._stats[spider]
d[key] = d.setdefault(key, start) + count
def max_value(self, key, value, spider=None):
d = self._stats[spider]
d[key] = max(d.setdefault(key, value), value)
def min_value(self, key, value, spider=None):
d = self._stats[spider]
d[key] = min(d.setdefault(key, value), value)
def clear_stats(self, spider=None):
self._stats[spider].clear()
def iter_spider_stats(self):
return [x for x in self._stats.iteritems() if x[0]]
def open_spider(self, spider):
self._stats[spider] = {}
send_catch_log(stats_spider_opened, spider=spider)
def close_spider(self, spider, reason):
send_catch_log(stats_spider_closing, spider=spider, reason=reason)
stats = self._stats.pop(spider)
send_catch_log(stats_spider_closed, spider=spider, reason=reason, \
spider_stats=stats)
if self._dump:
log.msg("Dumping spider stats:\n" + pprint.pformat(stats), \
spider=spider)
self._persist_stats(stats, spider)
def engine_stopped(self):
stats = self.get_stats()
if self._dump:
log.msg("Dumping global stats:\n" + pprint.pformat(stats))
self._persist_stats(stats, spider=None)
def _persist_stats(self, stats, spider=None):
pass
class MemoryStatsCollector(StatsCollector):
def __init__(self):
super(MemoryStatsCollector, self).__init__()
self.spider_stats = {}
def _persist_stats(self, stats, spider=None):
if spider is not None:
self.spider_stats[spider.name] = stats
class DummyStatsCollector(StatsCollector):
def get_value(self, key, default=None, spider=None):
return default
def set_value(self, key, value, spider=None):
pass
def set_stats(self, stats, spider=None):
pass
def inc_value(self, key, count=1, start=0, spider=None):
pass
def max_value(self, key, value, spider=None):
pass
def min_value(self, key, value, spider=None):
pass
|