/usr/share/pyshared/scrapyd/website.py is in python-scrapy 0.14.4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 | from datetime import datetime
from twisted.web import resource, static
from twisted.application.service import IServiceCollection
from .interfaces import IPoller, IEggStorage, ISpiderScheduler
from . import webservice
class Root(resource.Resource):
def __init__(self, config, app):
resource.Resource.__init__(self)
self.debug = config.getboolean('debug', False)
self.runner = config.get('runner')
logsdir = config.get('logs_dir')
self.app = app
self.putChild('', Home(self))
self.putChild('schedule.json', webservice.Schedule(self))
self.putChild('addversion.json', webservice.AddVersion(self))
self.putChild('listprojects.json', webservice.ListProjects(self))
self.putChild('listversions.json', webservice.ListVersions(self))
self.putChild('listspiders.json', webservice.ListSpiders(self))
self.putChild('delproject.json', webservice.DeleteProject(self))
self.putChild('delversion.json', webservice.DeleteVersion(self))
self.putChild('listjobs.json', webservice.ListJobs(self))
self.putChild('logs', static.File(logsdir, 'text/plain'))
self.putChild('procmon', ProcessMonitor(self))
self.update_projects()
def update_projects(self):
self.poller.update_projects()
self.scheduler.update_projects()
@property
def launcher(self):
app = IServiceCollection(self.app, self.app)
return app.getServiceNamed('launcher')
@property
def scheduler(self):
return self.app.getComponent(ISpiderScheduler)
@property
def eggstorage(self):
return self.app.getComponent(IEggStorage)
@property
def poller(self):
return self.app.getComponent(IPoller)
class Home(resource.Resource):
def __init__(self, root):
resource.Resource.__init__(self)
self.root = root
def render_GET(self, txrequest):
vars = {
'projects': ', '.join(self.root.scheduler.list_projects()),
}
return """
<html>
<head><title>Scrapyd</title></head>
<body>
<h1>Scrapyd</h1>
<p>Available projects: <b>%(projects)s</b></p>
<ul>
<li><a href="/procmon">Process monitor</a></li>
<li><a href="/logs/">Logs</li>
<li><a href="http://doc.scrapy.org/en/latest/topics/scrapyd.html">Documentation</a></li>
</ul>
<h2>How to schedule a spider?</h2>
<p>To schedule a spider you need to use the API (this web UI is only for
monitoring)</p>
<p>Example using <a href="http://curl.haxx.se/">curl</a>:</p>
<p><code>curl http://localhost:6800/schedule.json -d project=default -d spider=somespider</code></p>
<p>For more information about the API, see the <a href="http://doc.scrapy.org/topics/scrapyd.html">Scrapyd documentation</a></p>
</body>
</html>
""" % vars
class ProcessMonitor(resource.Resource):
def __init__(self, root):
resource.Resource.__init__(self)
self.root = root
def render(self, txrequest):
s = "<html><head><title>Scrapyd</title></title>"
s += "<body>"
s += "<h1>Process monitor</h1>"
s += "<p><a href='..'>Go back</a></p>"
s += "<table border='1'>"
s += "<tr>"
s += "<th>Project</th><th>Spider</th><th>Job</th><th>PID</th><th>Runtime</th><th>Log</th>"
s += "</tr>"
for p in self.root.launcher.processes.values():
s += "<tr>"
for a in ['project', 'spider', 'job', 'pid']:
s += "<td>%s</td>" % getattr(p, a)
s += "<td>%s</td>" % (datetime.now() - p.start_time)
s += "<td><a href='/logs/%s/%s/%s.log'>Log</a></td>" % (p.project, p.spider, p.job)
s += "</tr>"
s += "</table>"
s += "</body>"
s += "</html>"
return s
|