/usr/bin/dosage is in dosage 2.12-1.
This file is owned by root:root, with mode 0o755.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 | #! /usr/bin/python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2004-2005 Tristan Seligmann and Jonathan Jacobs
# Copyright (C) 2012-2014 Bastian Kleineidam
# ___
# / \___ ___ __ _ __ _ ___
# / /\ / _ \/ __|/ _` |/ _` |/ _ \
# / /_// (_) \__ \ (_| | (_| | __/
# /___,' \___/|___/\__,_|\__, |\___|
# |___/
from __future__ import division, print_function
import sys
import os
import argparse
import pydoc
import threading
from io import StringIO
try:
from Queue import Queue, Empty
except ImportError:
from queue import Queue, Empty
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
from dosagelib import events, scraper, configuration, singleton
from dosagelib.output import out
from dosagelib.util import internal_error, getDirname, strlimit, getLangName
from dosagelib.ansicolor import get_columns
class ArgumentParser(argparse.ArgumentParser):
"""Custom argument parser."""
def print_help(self, file=None):
"""Paginate help message on TTYs."""
msg = self.format_help()
if file is None:
file = sys.stdout
if hasattr(file, "isatty") and file.isatty():
pydoc.pager(msg)
else:
print(msg, file=file)
Examples = """\
EXAMPLES
List available comics (ca. 3000 at the moment):
dosage -l
Get the latest comic of for example CalvinAndHobbes and save it in the "Comics"
directory:
dosage CalvinAndHobbes
If you already have downloaded several comics and want to get the latest
strips of all of them:
dosage --continue @
"""
def setupOptions():
"""Construct option parser.
@return: new option parser
@rtype argparse.ArgumentParser
"""
kwargs = dict(
description = "A comic downloader and archiver.",
epilog = Examples,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser = ArgumentParser(**kwargs)
parser.add_argument('-v', '--verbose', action='count', default=0, help='provides verbose output, use multiple times for more verbosity')
parser.add_argument('-n', '--numstrips', action='store', type=int, default=0, help='traverse and retrieve the given number of comic strips; use --all to retrieve all comic strips')
parser.add_argument('-a', '--all', action='store_true', help='traverse and retrieve all comic strips')
parser.add_argument('-c', '--continue', action='store_true', dest='cont', help='traverse and retrieve comic strips until an existing one is found')
parser.add_argument('-b', '--basepath', action='store', default='Comics', help='set the path to create invidivual comic directories in, default is Comics', metavar='PATH')
parser.add_argument('--baseurl', action='store', help='the base URL of your comics directory (for RSS, HTML, etc.); this should correspond to --base-path', metavar='PATH')
parser.add_argument('-l', '--list', action='store_true', help='list available comic modules')
parser.add_argument('--singlelist', action='store_true', help='list available comic modules in a single list')
parser.add_argument('--version', action='store_true', help='display the version number')
parser.add_argument('--vote', action='store_true', help='vote for the selected comics')
parser.add_argument('-m', '--modulehelp', action='store_true', help='display help for comic modules')
parser.add_argument('-t', '--timestamps', action='store_true', help='print timestamps for all output at any info level')
parser.add_argument('-o', '--output', action='append', dest='handler', choices=events.getHandlerNames(), help='sets output handlers for downloaded comics')
parser.add_argument('--adult', action='store_true', help='confirms that you are old enough to view adult content')
# used for development testing prev/next matching
parser.add_argument('--dry-run', action='store_true', help=argparse.SUPPRESS)
# multimatch is only used for development, eg. testing if all comics of a scripted plugin are working
parser.add_argument('--multimatch', action='store_true', help=argparse.SUPPRESS)
parser.add_argument('comic', nargs='*', help='comic module name (including case insensitive substrings)')
try:
import argcomplete
argcomplete.autocomplete(parser)
except ImportError:
pass
return parser
def displayVersion(verbose):
"""Display application name, version, copyright and license."""
print(configuration.App)
print(configuration.Copyright)
print(configuration.Freeware)
print("For support see", configuration.SupportUrl)
if verbose:
# search for updates
from dosagelib.updater import check_update
result, value = check_update()
if result:
if value:
version, url = value
if url is None:
# current version is newer than online version
text = ('Detected local or development version %(currentversion)s. '
'Available version of %(app)s is %(version)s.')
else:
# display update link
text = ('A new version %(version)s of %(app)s is '
'available at %(url)s.')
attrs = dict(version=version, app=configuration.AppName,
url=url, currentversion=configuration.Version)
print(text % attrs)
else:
if value is None:
value = 'invalid update file syntax'
text = ('An error occured while checking for an '
'update of %(app)s: %(error)s.')
attrs = dict(error=value, app=configuration.AppName)
print(text % attrs)
return 0
def setOutputInfo(options):
"""Set global output level and timestamp option."""
out.level = 0
out.level += options.verbose
out.timestamps = options.timestamps
# debug urllib3
#from requests.packages.urllib3 import add_stderr_logger
#add_stderr_logger()
def saveComicStrip(strip, basepath, dryrun):
"""Save a comic strip which can consist of multiple images."""
errors = 0
allskipped = True
for image in strip.getImages():
try:
if dryrun:
filename, saved = "", False
else:
filename, saved = image.save(basepath)
if saved:
allskipped = False
except Exception as msg:
out.exception('Could not save image at %s to %s: %r' % (image.referrer, image.filename, msg))
errors += 1
return errors, allskipped
def displayHelp(options):
"""Print help for comic strips."""
errors = 0
try:
for scraperobj in getScrapers(options.comic, options.basepath):
errors += displayComicHelp(scraperobj)
except ValueError as msg:
out.exception(msg)
return 2
return errors
def displayComicHelp(scraperobj):
"""Print description and help for a comic."""
orig_context = out.context
out.context = getScraperName(scraperobj)
try:
out.info(u"URL: " + scraperobj.url)
if scraperobj.description:
out.info(u"Description: " + scraperobj.description)
if scraperobj.lang:
out.info(u"Language: " + getLangName(scraperobj.lang))
if scraperobj.genres:
out.info(u"Genres: " + ", ".join(scraperobj.genres))
if scraperobj.help:
for line in scraperobj.help.splitlines():
out.info(line)
return 0
except ValueError as msg:
out.exception(msg)
return 1
finally:
out.context = orig_context
# the comic scraper job queue
jobs = Queue()
# ensure threads download only from one host at a time
host_locks = {}
def get_hostname(url):
"""Get hostname from URL."""
return list(urlparse(url))[1].lower()
lock = threading.Lock()
def get_host_lock(url):
"""Get lock object for given URL host."""
hostname = get_hostname(url)
return host_locks.setdefault(hostname, threading.Lock())
comic_errors = 0
class ComicGetter(threading.Thread):
"""Get all strips of a comic in a thread."""
def __init__(self, options):
"""Store options."""
super(ComicGetter, self).__init__()
self.options = options
self.origname = self.getName()
def run(self):
"""Process from queue until it is empty."""
global comic_errors
while True:
try:
scraperobj = jobs.get(False)
self.setName(scraperobj.getName())
with lock:
host_lock = get_host_lock(scraperobj.url)
with host_lock:
errors = getStrips(scraperobj, self.options)
with lock:
comic_errors += errors
jobs.task_done()
self.setName(self.origname)
except Empty:
break
def getComics(options):
"""Retrieve comics."""
if options.handler:
for name in set(options.handler):
events.addHandler(name, options.basepath, options.baseurl)
events.getHandler().start()
errors = 0
try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
jobs.put(scraperobj)
# start threads
num_threads = max(1, min(10, jobs.qsize()))
for i in range(num_threads):
ComicGetter(options).start()
# wait for threads to finish
jobs.join()
except ValueError as msg:
out.exception(msg)
errors += 1
finally:
events.getHandler().end()
return errors + comic_errors
def voteComics(options):
"""Vote for comics."""
errors = 0
try:
for scraperobj in getScrapers(options.comic, options.basepath, options.adult, options.multimatch):
errors += voteComic(scraperobj)
except ValueError as msg:
out.exception(msg)
errors += 1
return errors
def voteComic(scraperobj):
"""Vote for given comic scraper."""
errors = 0
orig_context = out.context
out.context = getScraperName(scraperobj)
try:
name = scraperobj.getName()
answer = scraperobj.vote()
out.debug(u'Vote answer %r' % answer)
if answer == 'counted':
url = configuration.Url + 'comics/%s.html' % name.replace('/', '_')
out.info(u'Vote submitted. Votes are updated regularly at %s.' % url)
elif answer == 'no':
out.info(u'Vote not submitted - your vote has already been submitted before.')
elif answer == 'noname':
out.warn(u'The comic %s cannot be voted.' % name)
else:
out.warn(u'Error submitting vote parameters: %r' % answer)
except Exception as msg:
out.exception(msg)
errors += 1
finally:
out.context = orig_context
return errors
def getStrips(scraperobj, options):
"""Get all strips from a scraper."""
errors = 0
if options.all or options.cont:
numstrips = None
elif options.numstrips:
numstrips = options.numstrips
else:
# get current strip
numstrips = 1
try:
if scraperobj.isComplete(options.basepath):
out.info(u"All comics are already downloaded.")
return 0
for strip in scraperobj.getStrips(numstrips):
_errors, skipped = saveComicStrip(strip, options.basepath, options.dry_run)
errors += _errors
if skipped and options.cont:
# stop when retrieval skipped an image for one comic strip
out.info(u"Stop retrieval because image file already exists")
break
if options.all and not (errors or options.dry_run or
options.cont or scraperobj.indexes):
scraperobj.setComplete(options.basepath)
except Exception as msg:
out.exception(msg)
errors += 1
return errors
def run(options):
"""Execute comic commands."""
setOutputInfo(options)
# ensure only one instance of dosage is running
me = singleton.SingleInstance()
if options.version:
return displayVersion(options.verbose)
if options.list:
return doList()
if options.singlelist:
return doList(columnList=False, verbose=options.verbose)
# after this a list of comic strips is needed
if not options.comic:
out.warn(u'No comics specified, bailing out!')
return 1
if options.modulehelp:
return displayHelp(options)
if options.vote:
return voteComics(options)
return getComics(options)
def doList(columnList=True, verbose=False):
"""List available comics."""
orig_context = out.context
out.context = u''
try:
page = hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
if page:
fd = StringIO(u'')
out.setStream(fd)
out.info(u'Available comic scrapers:')
out.info(u'Comics tagged with [%s] require age confirmation with the --adult option.' % TAG_ADULT)
out.info(u'Non-english comics are tagged with [%s].' % TAG_LANG)
scrapers = sorted(getScrapers(['@@']), key=lambda s: s.getName())
if columnList:
num = doColumnList(scrapers)
else:
num = doSingleList(scrapers, verbose=verbose)
out.info(u'%d supported comics.' % num)
if page:
pydoc.pager(fd.getvalue())
return 0
finally:
out.context = orig_context
def doSingleList(scrapers, verbose=False):
"""Get list of scraper names, one per line."""
for num, scraperobj in enumerate(scrapers):
if verbose:
displayComicHelp(scraperobj)
else:
out.info(getScraperName(scraperobj))
return num
def doColumnList(scrapers):
"""Get list of scraper names with multiple names per line."""
screenWidth = get_columns(sys.stdout)
# limit name length so at least two columns are there
limit = (screenWidth // 2) - 8
names = [getScraperName(scraperobj, limit=limit) for scraperobj in scrapers]
num = len(names)
maxlen = max(len(name) for name in names)
namesPerLine = max(screenWidth // (maxlen + 1), 1)
while names:
out.info(u''.join(name.ljust(maxlen) for name in names[:namesPerLine]))
del names[:namesPerLine]
return num
TAG_ADULT = "adult"
TAG_LANG = "lang"
def getScraperName(scraperobj, limit=None):
"""Get comic scraper name."""
tags = []
if scraperobj.adult:
tags.append(TAG_ADULT)
if scraperobj.lang != "en":
tags.append("%s:%s" % (TAG_LANG, scraperobj.lang))
if tags:
suffix = " [" + ", ".join(tags) + "]"
else:
suffix = ""
name = scraperobj.getName()
if limit is not None:
name = strlimit(name, limit)
return name + suffix
def getScrapers(comics, basepath=None, adult=True, multiple_allowed=False):
"""Get scraper objects for the given comics."""
if '@' in comics:
# only scrapers whose directory already exists
if len(comics) > 1:
out.warn(u"using '@' as comic name ignores all other specified comics.")
for scraperclass in scraper.get_scraperclasses():
dirname = getDirname(scraperclass.getName())
if os.path.isdir(os.path.join(basepath, dirname)):
if not adult and scraperclass.adult:
warn_adult(scraperclass)
continue
yield scraperclass()
elif '@@' in comics:
# all scrapers
for scraperclass in scraper.get_scraperclasses():
if not adult and scraperclass.adult:
warn_adult(scraperclass)
continue
yield scraperclass()
else:
# get only selected comic scrapers
# store them in a set to eliminate duplicates
scrapers = set()
for comic in comics:
# Helpful when using shell completion to pick comics to get
comic.rstrip(os.path.sep)
if basepath and comic.startswith(basepath):
# make the following command work:
# find Comics -type d | xargs -n1 -P10 dosage -b Comics
comic = comic[len(basepath):].lstrip(os.sep)
if ':' in comic:
name, index = comic.split(':', 1)
indexes = index.split(',')
else:
name = comic
indexes = None
scraperclasses = scraper.find_scraperclasses(name, multiple_allowed=multiple_allowed)
for scraperclass in scraperclasses:
if not adult and scraperclass.adult:
warn_adult(scraperclass)
continue
scraperobj = scraperclass(indexes=indexes)
if scraperobj not in scrapers:
scrapers.add(scraperobj)
yield scraperobj
def warn_adult(scraperclass):
"""Print warning about adult content."""
out.warn(u"skipping adult comic %s; use the --adult option to confirm your age" % scraperclass.getName())
def main():
"""Parse options and execute commands."""
try:
options = setupOptions().parse_args()
options.basepath = os.path.expanduser(options.basepath)
res = run(options)
except KeyboardInterrupt:
print("Aborted.")
res = 1
except Exception:
internal_error()
res = 2
return res
def profile():
"""Profile the loading of all scrapers."""
import cProfile
cProfile.run("scraper.get_scraperclasses()", "dosage.prof")
def viewprof():
"""View profile stats."""
import pstats
stats = pstats.Stats("dosage.prof")
stats.strip_dirs().sort_stats("cumulative").print_stats(100)
if __name__ == '__main__':
sys.exit(main())
#profile()
#viewprof()
|