/usr/lib/python3/dist-packages/openpyxl/reader/excel.py is in python3-openpyxl 2.4.9-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | from __future__ import absolute_import
# Copyright (c) 2010-2017 openpyxl
"""Read an xlsx file into Python"""
# Python stdlib imports
from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile
from sys import exc_info
from io import BytesIO
import os.path
import warnings
# compatibility imports
from openpyxl.compat import unicode, file
# Allow blanket setting of KEEP_VBA for testing
try:
from ..tests import KEEP_VBA
except ImportError:
KEEP_VBA = False
# package imports
from openpyxl.utils.exceptions import InvalidFileException
from openpyxl.xml.constants import (
ARC_SHARED_STRINGS,
ARC_CORE,
ARC_CONTENT_TYPES,
ARC_WORKBOOK,
ARC_THEME,
COMMENTS_NS,
SHARED_STRINGS,
EXTERNAL_LINK,
XLTM,
XLTX,
XLSM,
XLSX,
)
from openpyxl.comments.comment_sheet import CommentSheet
from openpyxl.workbook import Workbook
from .strings import read_string_table
from openpyxl.styles.stylesheet import apply_stylesheet
from openpyxl.packaging.core import DocumentProperties
from openpyxl.packaging.manifest import Manifest, Override
from openpyxl.packaging.workbook import WorkbookParser
from openpyxl.packaging.relationship import get_dependents, get_rels_path
from openpyxl.worksheet.read_only import ReadOnlyWorksheet
from openpyxl.worksheet.table import Table
from openpyxl.xml.functions import fromstring
from .worksheet import WorkSheetParser
# Use exc_info for Python 2 compatibility with "except Exception[,/ as] e"
CENTRAL_DIRECTORY_SIGNATURE = b'\x50\x4b\x05\x06'
SUPPORTED_FORMATS = ('.xlsx', '.xlsm', '.xltx', '.xltm')
def repair_central_directory(zipFile, is_file_instance):
''' trims trailing data from the central directory
code taken from http://stackoverflow.com/a/7457686/570216, courtesy of Uri Cohen
'''
f = zipFile if is_file_instance else open(zipFile, 'rb+')
data = f.read()
pos = data.find(CENTRAL_DIRECTORY_SIGNATURE) # End of central directory signature
if (pos > 0):
sio = BytesIO(data)
sio.seek(pos + 22) # size of 'ZIP end of central directory record'
sio.truncate()
sio.seek(0)
return sio
f.seek(0)
return f
def _validate_archive(filename):
"""
Check the file is a valid zipfile
"""
is_file_like = hasattr(filename, 'read')
if not is_file_like and os.path.isfile(filename):
file_format = os.path.splitext(filename)[-1].lower()
if file_format not in SUPPORTED_FORMATS:
if file_format == '.xls':
msg = ('openpyxl does not support the old .xls file format, '
'please use xlrd to read this file, or convert it to '
'the more recent .xlsx file format.')
elif file_format == '.xlsb':
msg = ('openpyxl does not support binary format .xlsb, '
'please convert this file to .xlsx format if you want '
'to open it with openpyxl')
else:
msg = ('openpyxl does not support %s file format, '
'please check you can open '
'it with Excel first. '
'Supported formats are: %s') % (file_format,
','.join(SUPPORTED_FORMATS))
raise InvalidFileException(msg)
if is_file_like:
# fileobject must have been opened with 'rb' flag
# it is required by zipfile
if getattr(filename, 'encoding', None) is not None:
raise IOError("File-object must be opened in binary mode")
try:
archive = ZipFile(filename, 'r', ZIP_DEFLATED)
except BadZipfile:
f = repair_central_directory(filename, is_file_like)
archive = ZipFile(f, 'r', ZIP_DEFLATED)
return archive
def _find_workbook_part(package):
workbook_types = [XLTM, XLTX, XLSM, XLSX]
for ct in workbook_types:
part = package.find(ct)
if part:
return part
# some applications reassign the default for application/xml
defaults = set((p.ContentType for p in package.Default))
workbook_type = defaults & set(workbook_types)
if workbook_type:
return Override("/" + ARC_WORKBOOK, workbook_type.pop())
raise IOError("File contains no valid workbook part")
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA,
data_only=False, guess_types=False, keep_links=True):
"""Open the given filename and return the workbook
:param filename: the path to open or a file-like object
:type filename: string or a file-like object open in binary mode c.f., :class:`zipfile.ZipFile`
:param read_only: optimised for reading, content cannot be edited
:type read_only: bool
:param keep_vba: preseve vba content (this does NOT mean you can use it)
:type keep_vba: bool
:param guess_types: guess cell content type and do not read it from the file
:type guess_types: bool
:param data_only: controls whether cells with formulae have either the formula (default) or the value stored the last time Excel read the sheet
:type data_only: bool
:param keep_links: whether links to external workbooks should be preserved. The default is True
:type keep_links: bool
:rtype: :class:`openpyxl.workbook.Workbook`
.. note::
When using lazy load, all worksheets will be :class:`openpyxl.worksheet.iter_worksheet.IterableWorksheet`
and the returned workbook will be read-only.
"""
archive = _validate_archive(filename)
read_only = read_only
src = archive.read(ARC_CONTENT_TYPES)
root = fromstring(src)
package = Manifest.from_tree(root)
wb_part = _find_workbook_part(package)
parser = WorkbookParser(archive, wb_part.PartName[1:])
wb = parser.wb
wb._data_only = data_only
wb._read_only = read_only
wb._keep_links = keep_links
wb.guess_types = guess_types
wb.template = wb_part.ContentType in (XLTX, XLTM)
parser.parse()
wb._sheets = []
if read_only and guess_types:
warnings.warn('Data types are not guessed when using iterator reader')
valid_files = archive.namelist()
# If are going to preserve the vba then attach a copy of the archive to the
# workbook so that is available for the save.
if keep_vba:
wb.vba_archive = ZipFile(BytesIO(), 'a', ZIP_DEFLATED)
for name in archive.namelist():
wb.vba_archive.writestr(name, archive.read(name))
if read_only:
wb._archive = ZipFile(filename)
# get workbook-level information
if ARC_CORE in valid_files:
src = fromstring(archive.read(ARC_CORE))
wb.properties = DocumentProperties.from_tree(src)
shared_strings = []
ct = package.find(SHARED_STRINGS)
if ct is not None:
strings_path = ct.PartName[1:]
shared_strings = read_string_table(archive.read(strings_path))
if ARC_THEME in valid_files:
wb.loaded_theme = archive.read(ARC_THEME)
apply_stylesheet(archive, wb) # bind styles to workbook
# get worksheets
for sheet, rel in parser.find_sheets():
sheet_name = sheet.name
worksheet_path = rel.target
rels_path = get_rels_path(worksheet_path)
rels = []
if rels_path in valid_files:
rels = get_dependents(archive, rels_path)
if not worksheet_path in valid_files:
continue
if read_only:
ws = ReadOnlyWorksheet(wb, sheet_name, worksheet_path, None,
shared_strings)
wb._sheets.append(ws)
else:
fh = archive.open(worksheet_path)
ws = wb.create_sheet(sheet_name)
ws._rels = rels
ws_parser = WorkSheetParser(ws, fh, shared_strings)
ws_parser.parse()
if rels:
# assign any comments to cells
for r in rels.find(COMMENTS_NS):
src = archive.read(r.target)
comment_sheet = CommentSheet.from_tree(fromstring(src))
for ref, comment in comment_sheet.comments:
ws[ref].comment = comment
# preserve link to VML file if VBA
if (
wb.vba_archive is not None
and ws.legacy_drawing is not None
):
ws.legacy_drawing = rels[ws.legacy_drawing].target
for t in ws_parser.tables:
src = archive.read(t)
xml = fromstring(src)
table = Table.from_tree(xml)
ws.add_table(table)
ws.sheet_state = sheet.state
ws._rels = [] # reset
parser.assign_names()
#wb._differential_styles.styles = [] # tables may depened upon dxf
archive.close()
return wb
|