/usr/lib/python2.7/dist-packages/scrapy/spidermanager.py is in python-scrapy 0.24.2-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | """
SpiderManager is the class which locates and manages all website-specific
spiders
"""
from zope.interface import implements
from scrapy import signals
from scrapy.interfaces import ISpiderManager
from scrapy.utils.misc import walk_modules
from scrapy.utils.spider import iter_spider_classes
class SpiderManager(object):
implements(ISpiderManager)
def __init__(self, spider_modules):
self.spider_modules = spider_modules
self._spiders = {}
for name in self.spider_modules:
for module in walk_modules(name):
self._load_spiders(module)
def _load_spiders(self, module):
for spcls in iter_spider_classes(module):
self._spiders[spcls.name] = spcls
@classmethod
def from_settings(cls, settings):
return cls(settings.getlist('SPIDER_MODULES'))
@classmethod
def from_crawler(cls, crawler):
sm = cls.from_settings(crawler.settings)
sm.crawler = crawler
crawler.signals.connect(sm.close_spider, signals.spider_closed)
return sm
def create(self, spider_name, **spider_kwargs):
try:
spcls = self._spiders[spider_name]
except KeyError:
raise KeyError("Spider not found: %s" % spider_name)
if hasattr(self, 'crawler') and hasattr(spcls, 'from_crawler'):
return spcls.from_crawler(self.crawler, **spider_kwargs)
else:
return spcls(**spider_kwargs)
def find_by_request(self, request):
return [name for name, cls in self._spiders.iteritems()
if cls.handles_request(request)]
def list(self):
return self._spiders.keys()
def close_spider(self, spider, reason):
closed = getattr(spider, 'closed', None)
if callable(closed):
return closed(reason)
|