/usr/share/pyshared/pdfrw/buildxobj.py is in python-pdfrw 0+svn136-3.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | # A part of pdfrw (pdfrw.googlecode.com)
# Copyright (C) 2006-2009 Patrick Maupin, Austin, Texas
# MIT license -- See LICENSE.txt for details
'''
This module contains code to build PDF "Form XObjects".
A Form XObject allows a fragment from one PDF file to be cleanly
included in another PDF file.
Reference for syntax: "Parameters for opening PDF files" from SDK 8.1
http://www.adobe.com/devnet/acrobat/pdfs/pdf_open_parameters.pdf
supported 'page=xxx', 'viewrect=<left>,<top>,<width>,<height>'
Units are in points
Reference for content: Adobe PDF reference, sixth edition, version 1.7
http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
Form xobjects discussed chapter 4.9, page 355
'''
from pdfobjects import PdfDict, PdfArray, PdfName
from pdfreader import PdfReader
class ViewInfo(object):
''' Instantiate ViewInfo with a uri, and it will parse out
the filename, page, and viewrect into object attributes.
'''
doc = None
docname = None
page = None
viewrect = None
def __init__(self, pageinfo='', **kw):
pageinfo=pageinfo.split('#',1)
if len(pageinfo) == 2:
pageinfo[1:] = pageinfo[1].replace('&', '#').split('#')
for key in 'page viewrect'.split():
if pageinfo[0].startswith(key+'='):
break
else:
self.docname = pageinfo.pop(0)
for item in pageinfo:
key, value = item.split('=')
key = key.strip()
value = value.replace(',', ' ').split()
if key == 'page':
assert len(value) == 1
setattr(self, key, int(value[0]))
elif key == 'viewrect':
assert len(value) == 4
setattr(self, key, [float(x) for x in value])
else:
log.error('Unknown option: %s', key)
for key, value in kw.iteritems():
assert hasattr(self, key), key
setattr(self, key, value)
def getrects(inheritable, pageinfo):
''' Given the inheritable attributes of a page and
the desired pageinfo rectangle, return the page's
media box and the calculated boundary (clip) box.
'''
mbox = tuple([float(x) for x in inheritable.MediaBox])
vrect = pageinfo.viewrect
if vrect is None:
cbox = tuple([float(x) for x in (inheritable.CropBox or mbox)])
else:
mleft, mbot, mright, mtop = mbox
x, y, w, h = vrect
cleft = mleft + x
ctop = mtop - y
cright = cleft + w
cbot = ctop - h
cbox = max(mleft, cleft), max(mbot, cbot), min(mright, cright), min(mtop, ctop)
return mbox, cbox
def _cache_xobj(contents, resources, mbox, bbox):
''' Return a cached Form XObject, or create a new one and cache it.
'''
cachedict = contents.xobj_cachedict
if cachedict is None:
cachedict = contents.private.xobj_cachedict = {}
result = cachedict.get(bbox)
if result is None:
func = (_get_fullpage, _get_subpage)[mbox != bbox]
result = PdfDict(
func(contents, resources, mbox, bbox),
Type = PdfName.XObject,
Subtype = PdfName.Form,
FormType = 1,
BBox = PdfArray(bbox),
)
cachedict[bbox] = result
return result
def _get_fullpage(contents, resources, mbox, bbox):
''' fullpage is easy. Just copy the contents,
set up the resources, and let _cache_xobj handle the
rest.
'''
return PdfDict(contents, Resources=resources)
def _get_subpage(contents, resources, mbox, bbox):
''' subpages *could* be as easy as full pages, but we
choose to complicate life by creating a Form XObject
for the page, and then one that references it for
the subpage, on the off-chance that we want multiple
items from the page.
'''
return PdfDict(
stream = '/FullPage Do\n',
Resources = PdfDict(
XObject = PdfDict(
FullPage = _cache_xobj(contents, resources, mbox, mbox)
)
)
)
def pagexobj(page, viewinfo=ViewInfo(), allow_compressed=True):
''' pagexobj creates and returns a Form XObject for
a given view within a page (Defaults to entire page.)
'''
inheritable = page.inheritable
resources = inheritable.Resources
mbox, bbox = getrects(inheritable, viewinfo)
contents = page.Contents
# Make sure the only attribute is length
# All the filters must have been executed
assert int(contents.Length) == len(contents.stream)
if not allow_compressed:
assert len([x for x in contents.iteritems()]) == 1
return _cache_xobj(contents, resources, mbox, bbox)
def docxobj(pageinfo, doc=None, allow_compressed=True):
''' docxobj creates and returns an actual Form XObject.
Can work standalone, or in conjunction with
the CacheXObj class (below).
'''
if not isinstance(pageinfo, ViewInfo):
pageinfo = ViewInfo(pageinfo)
# If we're explicitly passed a document,
# make sure we don't have one implicitly as well.
# If no implicit or explicit doc, then read one in
# from the filename.
if doc is not None:
assert pageinfo.doc is None
pageinfo.doc = doc
elif pageinfo.doc is not None:
doc = pageinfo.doc
else:
doc = pageinfo.doc = PdfReader(pageinfo.docname, decompress = not allow_compressed)
assert isinstance(doc, PdfReader)
sourcepage = doc.pages[(pageinfo.page or 1) - 1]
return pagexobj(sourcepage, pageinfo, allow_compressed)
class CacheXObj(object):
''' Use to keep from reparsing files over and over,
and to keep from making the output too much
bigger than it ought to be by replicating
unnecessary object copies.
'''
def __init__(self, decompress=False):
''' Set decompress true if you need
the Form XObjects to be decompressed.
Will decompress what it can and scream
about the rest.
'''
self.cached_pdfs = {}
self.decompress = decompress
def load(self, sourcename):
''' Load a Form XObject from a uri
'''
info = ViewInfo(sourcename)
fname = info.docname
pcache = self.cached_pdfs
doc = pcache.get(fname)
if doc is None:
doc = pcache[fname] = PdfReader(fname, decompress=self.decompress)
return docxobj(info, doc, allow_compressed=not self.decompress)
|