/usr/share/pyshared/scrapy/commands/runspider.py is in python-scrapy 0.14.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import sys
import os
from scrapy.utils.spider import iter_spider_classes
from scrapy.command import ScrapyCommand
from scrapy.exceptions import UsageError
from scrapy.utils.conf import arglist_to_dict
def _import_file(filepath):
abspath = os.path.abspath(filepath)
dirname, file = os.path.split(abspath)
fname, fext = os.path.splitext(file)
if fext != '.py':
raise ValueError("Not a Python source file: %s" % abspath)
if dirname:
sys.path = [dirname] + sys.path
try:
module = __import__(fname, {}, {}, [''])
finally:
if dirname:
sys.path.pop(0)
return module
class Command(ScrapyCommand):
requires_project = False
def syntax(self):
return "[options] <spider_file>"
def short_desc(self):
return "Run a self-contained spider (without creating a project)"
def long_desc(self):
return "Run the spider defined in the given file"
def add_options(self, parser):
ScrapyCommand.add_options(self, parser)
parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \
help="set spider argument (may be repeated)")
def process_options(self, args, opts):
ScrapyCommand.process_options(self, args, opts)
try:
opts.spargs = arglist_to_dict(opts.spargs)
except ValueError:
raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
def run(self, args, opts):
if len(args) != 1:
raise UsageError()
filename = args[0]
if not os.path.exists(filename):
raise UsageError("File not found: %s\n" % filename)
try:
module = _import_file(filename)
except (ImportError, ValueError), e:
raise UsageError("Unable to load %r: %s\n" % (filename, e))
spclasses = list(iter_spider_classes(module))
if not spclasses:
raise UsageError("No spider found in file: %s\n" % filename)
spider = spclasses.pop()(**opts.spargs)
self.crawler.crawl(spider)
self.crawler.start()
|