/usr/share/doc/python-scrapy-doc/html/index.html is in python-scrapy-doc 1.0.3-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 | <!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Scrapy documentation — Scrapy documentation</title>
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<link rel="top" title="Scrapy documentation" href="#"/>
<link rel="next" title="Scrapy at a glance" href="intro/overview.html"/>
<script src="_static/js/modernizr.min.js"></script>
</head>
<body class="wy-body-for-nav" role="document">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search">
<a href="#" class="icon icon-home"> Scrapy
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<ul>
<li class="toctree-l1"><a class="reference internal" href="intro/overview.html">Scrapy at a glance</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/install.html">Installation guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/tutorial.html">Scrapy Tutorial</a></li>
<li class="toctree-l1"><a class="reference internal" href="intro/examples.html">Examples</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/commands.html">Command line tool</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/spiders.html">Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/selectors.html">Selectors</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/items.html">Items</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/loaders.html">Item Loaders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/shell.html">Scrapy shell</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/item-pipeline.html">Item Pipeline</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/feed-exports.html">Feed exports</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/request-response.html">Requests and Responses</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/link-extractors.html">Link Extractors</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/settings.html">Settings</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/exceptions.html">Exceptions</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/logging.html">Logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/stats.html">Stats Collection</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/email.html">Sending e-mail</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/telnetconsole.html">Telnet Console</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/webservice.html">Web Service</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/debug.html">Debugging Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/contracts.html">Spiders Contracts</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/practices.html">Common Practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/broad-crawls.html">Broad Crawls</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/firefox.html">Using Firefox for scraping</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/firebug.html">Using Firebug for scraping</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/leaks.html">Debugging memory leaks</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/media-pipeline.html">Downloading and processing files and images</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/ubuntu.html">Ubuntu packages</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/deploy.html">Deploying Spiders</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/autothrottle.html">AutoThrottle extension</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/benchmarking.html">Benchmarking</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/jobs.html">Jobs: pausing and resuming crawls</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="topics/architecture.html">Architecture overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/downloader-middleware.html">Downloader Middleware</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/spider-middleware.html">Spider Middleware</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/extensions.html">Extensions</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/api.html">Core API</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/signals.html">Signals</a></li>
<li class="toctree-l1"><a class="reference internal" href="topics/exporters.html">Item Exporters</a></li>
</ul>
<ul>
<li class="toctree-l1"><a class="reference internal" href="news.html">Release notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="contributing.html">Contributing to Scrapy</a></li>
<li class="toctree-l1"><a class="reference internal" href="versioning.html">Versioning and API Stability</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
<nav class="wy-nav-top" role="navigation" aria-label="top navigation">
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="#">Scrapy</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="wy-breadcrumbs">
<li><a href="#">Docs</a> »</li>
<li>Scrapy documentation</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/index.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<div class="section" id="scrapy-version-documentation">
<span id="topics-index"></span><h1>Scrapy documentation<a class="headerlink" href="#scrapy-version-documentation" title="Permalink to this headline">¶</a></h1>
<p>This documentation contains everything you need to know about Scrapy.</p>
<div class="section" id="getting-help">
<h2>Getting help<a class="headerlink" href="#getting-help" title="Permalink to this headline">¶</a></h2>
<p>Having trouble? We’d like to help!</p>
<ul class="simple">
<li>Try the <a class="reference internal" href="faq.html"><span class="doc">FAQ</span></a> – it’s got answers to some common questions.</li>
<li>Looking for specific information? Try the <a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a> or <a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a>.</li>
<li>Search for information in the <a class="reference external" href="https://groups.google.com/forum/#!forum/scrapy-users">archives of the scrapy-users mailing list</a>, or
<a class="reference external" href="https://groups.google.com/forum/#!forum/scrapy-users">post a question</a>.</li>
<li>Ask a question in the <a class="reference external" href="irc://irc.freenode.net/scrapy">#scrapy IRC channel</a>.</li>
<li>Report bugs with Scrapy in our <a class="reference external" href="https://github.com/scrapy/scrapy/issues">issue tracker</a>.</li>
</ul>
</div>
<div class="section" id="first-steps">
<h2>First steps<a class="headerlink" href="#first-steps" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="intro/overview.html"><span class="doc">Scrapy at a glance</span></a></dt>
<dd>Understand what Scrapy is and how it can help you.</dd>
<dt><a class="reference internal" href="intro/install.html"><span class="doc">Installation guide</span></a></dt>
<dd>Get Scrapy installed on your computer.</dd>
<dt><a class="reference internal" href="intro/tutorial.html"><span class="doc">Scrapy Tutorial</span></a></dt>
<dd>Write your first Scrapy project.</dd>
<dt><a class="reference internal" href="intro/examples.html"><span class="doc">Examples</span></a></dt>
<dd>Learn more by playing with a pre-made Scrapy project.</dd>
</dl>
</div>
<div class="section" id="basic-concepts">
<span id="section-basics"></span><h2>Basic concepts<a class="headerlink" href="#basic-concepts" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/commands.html"><span class="doc">Command line tool</span></a></dt>
<dd>Learn about the command-line tool used to manage your Scrapy project.</dd>
<dt><a class="reference internal" href="topics/spiders.html"><span class="doc">Spiders</span></a></dt>
<dd>Write the rules to crawl your websites.</dd>
<dt><a class="reference internal" href="topics/selectors.html"><span class="doc">Selectors</span></a></dt>
<dd>Extract the data from web pages using XPath.</dd>
<dt><a class="reference internal" href="topics/shell.html"><span class="doc">Scrapy shell</span></a></dt>
<dd>Test your extraction code in an interactive environment.</dd>
<dt><a class="reference internal" href="topics/items.html"><span class="doc">Items</span></a></dt>
<dd>Define the data you want to scrape.</dd>
<dt><a class="reference internal" href="topics/loaders.html"><span class="doc">Item Loaders</span></a></dt>
<dd>Populate your items with the extracted data.</dd>
<dt><a class="reference internal" href="topics/item-pipeline.html"><span class="doc">Item Pipeline</span></a></dt>
<dd>Post-process and store your scraped data.</dd>
<dt><a class="reference internal" href="topics/feed-exports.html"><span class="doc">Feed exports</span></a></dt>
<dd>Output your scraped data using different formats and storages.</dd>
<dt><a class="reference internal" href="topics/request-response.html"><span class="doc">Requests and Responses</span></a></dt>
<dd>Understand the classes used to represent HTTP requests and responses.</dd>
<dt><a class="reference internal" href="topics/link-extractors.html"><span class="doc">Link Extractors</span></a></dt>
<dd>Convenient classes to extract links to follow from pages.</dd>
<dt><a class="reference internal" href="topics/settings.html"><span class="doc">Settings</span></a></dt>
<dd>Learn how to configure Scrapy and see all <a class="reference internal" href="topics/settings.html#topics-settings-ref"><span class="std std-ref">available settings</span></a>.</dd>
<dt><a class="reference internal" href="topics/exceptions.html"><span class="doc">Exceptions</span></a></dt>
<dd>See all available exceptions and their meaning.</dd>
</dl>
</div>
<div class="section" id="built-in-services">
<h2>Built-in services<a class="headerlink" href="#built-in-services" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/logging.html"><span class="doc">Logging</span></a></dt>
<dd>Learn how to use Python’s builtin logging on Scrapy.</dd>
<dt><a class="reference internal" href="topics/stats.html"><span class="doc">Stats Collection</span></a></dt>
<dd>Collect statistics about your scraping crawler.</dd>
<dt><a class="reference internal" href="topics/email.html"><span class="doc">Sending e-mail</span></a></dt>
<dd>Send email notifications when certain events occur.</dd>
<dt><a class="reference internal" href="topics/telnetconsole.html"><span class="doc">Telnet Console</span></a></dt>
<dd>Inspect a running crawler using a built-in Python console.</dd>
<dt><a class="reference internal" href="topics/webservice.html"><span class="doc">Web Service</span></a></dt>
<dd>Monitor and control a crawler using a web service.</dd>
</dl>
</div>
<div class="section" id="solving-specific-problems">
<h2>Solving specific problems<a class="headerlink" href="#solving-specific-problems" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="faq.html"><span class="doc">Frequently Asked Questions</span></a></dt>
<dd>Get answers to most frequently asked questions.</dd>
<dt><a class="reference internal" href="topics/debug.html"><span class="doc">Debugging Spiders</span></a></dt>
<dd>Learn how to debug common problems of your scrapy spider.</dd>
<dt><a class="reference internal" href="topics/contracts.html"><span class="doc">Spiders Contracts</span></a></dt>
<dd>Learn how to use contracts for testing your spiders.</dd>
<dt><a class="reference internal" href="topics/practices.html"><span class="doc">Common Practices</span></a></dt>
<dd>Get familiar with some Scrapy common practices.</dd>
<dt><a class="reference internal" href="topics/broad-crawls.html"><span class="doc">Broad Crawls</span></a></dt>
<dd>Tune Scrapy for crawling a lot domains in parallel.</dd>
<dt><a class="reference internal" href="topics/firefox.html"><span class="doc">Using Firefox for scraping</span></a></dt>
<dd>Learn how to scrape with Firefox and some useful add-ons.</dd>
<dt><a class="reference internal" href="topics/firebug.html"><span class="doc">Using Firebug for scraping</span></a></dt>
<dd>Learn how to scrape efficiently using Firebug.</dd>
<dt><a class="reference internal" href="topics/leaks.html"><span class="doc">Debugging memory leaks</span></a></dt>
<dd>Learn how to find and get rid of memory leaks in your crawler.</dd>
<dt><a class="reference internal" href="topics/media-pipeline.html"><span class="doc">Downloading and processing files and images</span></a></dt>
<dd>Download files and/or images associated with your scraped items.</dd>
<dt><a class="reference internal" href="topics/ubuntu.html"><span class="doc">Ubuntu packages</span></a></dt>
<dd>Install latest Scrapy packages easily on Ubuntu</dd>
<dt><a class="reference internal" href="topics/deploy.html"><span class="doc">Deploying Spiders</span></a></dt>
<dd>Deploying your Scrapy spiders and run them in a remote server.</dd>
<dt><a class="reference internal" href="topics/autothrottle.html"><span class="doc">AutoThrottle extension</span></a></dt>
<dd>Adjust crawl rate dynamically based on load.</dd>
<dt><a class="reference internal" href="topics/benchmarking.html"><span class="doc">Benchmarking</span></a></dt>
<dd>Check how Scrapy performs on your hardware.</dd>
<dt><a class="reference internal" href="topics/jobs.html"><span class="doc">Jobs: pausing and resuming crawls</span></a></dt>
<dd>Learn how to pause and resume crawls for large spiders.</dd>
</dl>
</div>
<div class="section" id="extending-scrapy">
<span id="id1"></span><h2>Extending Scrapy<a class="headerlink" href="#extending-scrapy" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="topics/architecture.html"><span class="doc">Architecture overview</span></a></dt>
<dd>Understand the Scrapy architecture.</dd>
<dt><a class="reference internal" href="topics/downloader-middleware.html"><span class="doc">Downloader Middleware</span></a></dt>
<dd>Customize how pages get requested and downloaded.</dd>
<dt><a class="reference internal" href="topics/spider-middleware.html"><span class="doc">Spider Middleware</span></a></dt>
<dd>Customize the input and output of your spiders.</dd>
<dt><a class="reference internal" href="topics/extensions.html"><span class="doc">Extensions</span></a></dt>
<dd>Extend Scrapy with your custom functionality</dd>
<dt><a class="reference internal" href="topics/api.html"><span class="doc">Core API</span></a></dt>
<dd>Use it on extensions and middlewares to extend Scrapy functionality</dd>
<dt><a class="reference internal" href="topics/signals.html"><span class="doc">Signals</span></a></dt>
<dd>See all available signals and how to work with them.</dd>
<dt><a class="reference internal" href="topics/exporters.html"><span class="doc">Item Exporters</span></a></dt>
<dd>Quickly export your scraped items to a file (XML, CSV, etc).</dd>
</dl>
</div>
<div class="section" id="all-the-rest">
<h2>All the rest<a class="headerlink" href="#all-the-rest" title="Permalink to this headline">¶</a></h2>
<div class="toctree-wrapper compound">
</div>
<dl class="docutils">
<dt><a class="reference internal" href="news.html"><span class="doc">Release notes</span></a></dt>
<dd>See what has changed in recent Scrapy versions.</dd>
<dt><a class="reference internal" href="contributing.html"><span class="doc">Contributing to Scrapy</span></a></dt>
<dd>Learn how to contribute to the Scrapy project.</dd>
<dt><a class="reference internal" href="versioning.html"><span class="doc">Versioning and API Stability</span></a></dt>
<dd>Understand Scrapy versioning and API stability.</dd>
</dl>
</div>
</div>
</div>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="intro/overview.html" class="btn btn-neutral float-right" title="Scrapy at a glance" accesskey="n">Next <span class="fa fa-arrow-circle-right"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>
© Copyright 2008-2016, Scrapy developers.
Last updated on July 11, 2016.
</p>
</div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT:'./',
VERSION:'',
COLLAPSE_INDEX:false,
FILE_SUFFIX:'.html',
HAS_SOURCE: true
};
</script>
<script type="text/javascript" src="_static/jquery.js"></script>
<script type="text/javascript" src="_static/underscore.js"></script>
<script type="text/javascript" src="_static/doctools.js"></script>
<script type="text/javascript" src="_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.StickyNav.enable();
});
</script>
</body>
</html>
|