/usr/share/pyshared/scrapy/cmdline.py is in python-scrapy 0.14.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | from __future__ import with_statement
import sys
import os
import optparse
import cProfile
import inspect
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.xlib import lsprofcalltree
from scrapy.conf import settings
from scrapy.command import ScrapyCommand
from scrapy.exceptions import UsageError, ScrapyDeprecationWarning
from scrapy.utils.misc import walk_modules
from scrapy.utils.project import inside_project
def _iter_command_classes(module_name):
# TODO: add `name` attribute to commands and and merge this function with
# scrapy.utils.spider.iter_spider_classes
for module in walk_modules(module_name):
for obj in vars(module).itervalues():
if inspect.isclass(obj) and \
issubclass(obj, ScrapyCommand) and \
obj.__module__ == module.__name__:
yield obj
def _get_commands_from_module(module, inproject):
d = {}
for cmd in _iter_command_classes(module):
if inproject or not cmd.requires_project:
cmdname = cmd.__module__.split('.')[-1]
d[cmdname] = cmd()
return d
def _get_commands_dict(inproject):
cmds = _get_commands_from_module('scrapy.commands', inproject)
cmds_module = settings['COMMANDS_MODULE']
if cmds_module:
cmds.update(_get_commands_from_module(cmds_module, inproject))
return cmds
def _pop_command_name(argv):
i = 0
for arg in argv[1:]:
if not arg.startswith('-'):
del argv[i]
return arg
i += 1
def _print_header(inproject):
if inproject:
print "Scrapy %s - project: %s\n" % (scrapy.__version__, \
settings['BOT_NAME'])
else:
print "Scrapy %s - no active project\n" % scrapy.__version__
def _print_commands(inproject):
_print_header(inproject)
print "Usage:"
print " scrapy <command> [options] [args]\n"
print "Available commands:"
cmds = _get_commands_dict(inproject)
for cmdname, cmdclass in sorted(cmds.iteritems()):
print " %-13s %s" % (cmdname, cmdclass.short_desc())
print
print 'Use "scrapy <command> -h" to see more info about a command'
def _print_unknown_command(cmdname, inproject):
_print_header(inproject)
print "Unknown command: %s\n" % cmdname
print 'Use "scrapy" to see available commands'
if not inproject:
print
print "More commands are available in project mode"
def _check_deprecated_scrapy_ctl(argv, inproject):
"""Check if Scrapy was called using the deprecated scrapy-ctl command and
warn in that case, also creating a scrapy.cfg if it doesn't exist.
"""
if not any('scrapy-ctl' in x for x in argv):
return
import warnings
warnings.warn("`scrapy-ctl.py` command-line tool is deprecated and will be removed in Scrapy 0.11, use `scrapy` instead",
ScrapyDeprecationWarning, stacklevel=3)
if inproject:
projpath = os.path.abspath(os.path.dirname(os.path.dirname(settings.settings_module.__file__)))
cfg_path = os.path.join(projpath, 'scrapy.cfg')
if not os.path.exists(cfg_path):
with open(cfg_path, 'w') as f:
f.write("# generated automatically - feel free to edit" + os.linesep)
f.write("[settings]" + os.linesep)
f.write("default = %s" % settings.settings_module.__name__ + os.linesep)
def _run_print_help(parser, func, *a, **kw):
try:
func(*a, **kw)
except UsageError, e:
if str(e):
parser.error(str(e))
if e.print_help:
parser.print_help()
sys.exit(2)
def execute(argv=None):
if argv is None:
argv = sys.argv
crawler = CrawlerProcess(settings)
crawler.install()
inproject = inside_project()
_check_deprecated_scrapy_ctl(argv, inproject) # TODO: remove for Scrapy 0.11
cmds = _get_commands_dict(inproject)
cmdname = _pop_command_name(argv)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
conflict_handler='resolve')
if not cmdname:
_print_commands(inproject)
sys.exit(0)
elif cmdname not in cmds:
_print_unknown_command(cmdname, inproject)
sys.exit(2)
cmd = cmds[cmdname]
parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax())
parser.description = cmd.long_desc()
settings.defaults.update(cmd.default_settings)
cmd.settings = settings
cmd.add_options(parser)
opts, args = parser.parse_args(args=argv[1:])
_run_print_help(parser, cmd.process_options, args, opts)
cmd.set_crawler(crawler)
_run_print_help(parser, _run_command, cmd, args, opts)
sys.exit(cmd.exitcode)
def _run_command(cmd, args, opts):
if opts.profile or opts.lsprof:
_run_command_profiled(cmd, args, opts)
else:
cmd.run(args, opts)
def _run_command_profiled(cmd, args, opts):
if opts.profile:
sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile)
if opts.lsprof:
sys.stderr.write("scrapy: writing lsprof stats to %r\n" % opts.lsprof)
loc = locals()
p = cProfile.Profile()
p.runctx('cmd.run(args, opts)', globals(), loc)
if opts.profile:
p.dump_stats(opts.profile)
k = lsprofcalltree.KCacheGrind(p)
if opts.lsprof:
with open(opts.lsprof, 'w') as f:
k.output(f)
if __name__ == '__main__':
execute()
|