/usr/lib/python2.7/dist-packages/boltons/tableutils.py is in python-boltons 17.1.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 | # -*- coding: utf-8 -*-
"""If there is one recurring theme in ``boltons``, it is that Python
has excellent datastructures that constitute a good foundation for
most quick manipulations, as well as building applications. However,
Python usage has grown much faster than builtin data structure
power. Python has a growing need for more advanced general-purpose
data structures which behave intuitively.
The :class:`Table` class is one example. When handed one- or
two-dimensional data, it can provide useful, if basic, text and HTML
renditions of small to medium sized data. It also heuristically
handles recursive data of various formats (lists, dicts, namedtuples,
objects).
For more advanced :class:`Table`-style manipulation check out the
`pandas`_ DataFrame.
.. _pandas: http://pandas.pydata.org/
"""
from __future__ import print_function
import cgi
import types
from itertools import islice
from collections import Sequence, Mapping, MutableSequence
try:
string_types, integer_types = (str, unicode), (int, long)
except Exception:
# Python 3 compat
unicode = str
string_types, integer_types = (str, bytes), (int,)
try:
from typeutils import make_sentinel
_MISSING = make_sentinel(var_name='_MISSING')
except ImportError:
_MISSING = object()
"""
Some idle feature thoughts:
* shift around column order without rearranging data
* gotta make it so you can add additional items, not just initialize with
* maybe a shortcut would be to allow adding of Tables to other Tables
* what's the perf of preallocating lists and overwriting items versus
starting from empty?
* is it possible to effectively tell the difference between when a
Table is from_data()'d with a single row (list) or with a list of lists?
* CSS: white-space pre-line or pre-wrap maybe?
* Would be nice to support different backends (currently uses lists
exclusively). Sometimes large datasets come in list-of-dicts and
list-of-tuples format and it's desirable to cut down processing overhead.
TODO: make iterable on rows?
"""
__all__ = ['Table']
def to_text(obj, maxlen=None):
try:
text = unicode(obj)
except Exception:
try:
text = unicode(repr(obj))
except Exception:
text = unicode(object.__repr__(obj))
if maxlen and len(text) > maxlen:
text = text[:maxlen - 3] + '...'
# TODO: inverse of ljust/rjust/center
return text
def escape_html(obj, maxlen=None):
text = to_text(obj, maxlen=maxlen)
return cgi.escape(text, quote=True)
_DNR = set((type(None), bool, complex, float,
type(NotImplemented), slice,
types.FunctionType, types.MethodType, types.BuiltinFunctionType,
types.GeneratorType) + string_types + integer_types)
class UnsupportedData(TypeError):
pass
class InputType(object):
def __init__(self, *a, **kw):
pass
def get_entry_seq(self, data_seq, headers):
return [self.get_entry(entry, headers) for entry in data_seq]
class DictInputType(InputType):
def check_type(self, obj):
return isinstance(obj, Mapping)
def guess_headers(self, obj):
return sorted(obj.keys())
def get_entry(self, obj, headers):
return [obj.get(h) for h in headers]
def get_entry_seq(self, obj, headers):
return [[ci.get(h) for h in headers] for ci in obj]
class ObjectInputType(InputType):
def check_type(self, obj):
return type(obj) not in _DNR and hasattr(obj, '__class__')
def guess_headers(self, obj):
headers = []
for attr in dir(obj):
# an object's __dict__ could technically have non-string keys
try:
val = getattr(obj, attr)
except Exception:
# seen on greenlet: `run` shows in dir() but raises
# AttributeError. Also properties misbehave.
continue
if callable(val):
continue
headers.append(attr)
return headers
def get_entry(self, obj, headers):
values = []
for h in headers:
try:
values.append(getattr(obj, h))
except Exception:
values.append(None)
return values
# might be better to hardcode list support since it's so close to the
# core or might be better to make this the copy-style from_* importer
# and have the non-copy style be hardcoded in __init__
class ListInputType(InputType):
def check_type(self, obj):
return isinstance(obj, MutableSequence)
def guess_headers(self, obj):
return None
def get_entry(self, obj, headers):
return obj
def get_entry_seq(self, obj_seq, headers):
return obj_seq
class TupleInputType(InputType):
def check_type(self, obj):
return isinstance(obj, tuple)
def guess_headers(self, obj):
return None
def get_entry(self, obj, headers):
return list(obj)
def get_entry_seq(self, obj_seq, headers):
return [list(t) for t in obj_seq]
class NamedTupleInputType(InputType):
def check_type(self, obj):
return hasattr(obj, '_fields') and isinstance(obj, tuple)
def guess_headers(self, obj):
return list(obj._fields)
def get_entry(self, obj, headers):
return [getattr(obj, h, None) for h in headers]
def get_entry_seq(self, obj_seq, headers):
return [[getattr(obj, h, None) for h in headers] for obj in obj_seq]
class Table(object):
"""
This Table class is meant to be simple, low-overhead, and extensible. Its
most common use would be for translation between in-memory data
structures and serialization formats, such as HTML and console-ready text.
As such, it stores data in list-of-lists format, and *does not* copy
lists passed in. It also reserves the right to modify those lists in a
"filling" process, whereby short lists are extended to the width of
the table (usually determined by number of headers). This greatly
reduces overhead and processing/validation that would have to occur
otherwise.
General description of headers behavior:
Headers describe the columns, but are not part of the data, however,
if the *headers* argument is omitted, Table tries to infer header
names from the data. It is possible to have a table with no headers,
just pass in ``headers=None``.
Supported inputs:
* :class:`list` of :class:`list` objects
* :class:`dict` (list/single)
* :class:`object` (list/single)
* :class:`collections.namedtuple` (list/single)
* TODO: DB API cursor?
* TODO: json
Supported outputs:
* HTML
* Pretty text (also usable as GF Markdown)
* TODO: CSV
* TODO: json
* TODO: json lines
To minimize resident size, the Table data is stored as a list of lists.
"""
# order definitely matters here
_input_types = [DictInputType(), ListInputType(),
NamedTupleInputType(), TupleInputType(),
ObjectInputType()]
_html_tr, _html_tr_close = '<tr>', '</tr>'
_html_th, _html_th_close = '<th>', '</th>'
_html_td, _html_td_close = '<td>', '</td>'
# _html_thead, _html_thead_close = '<thead>', '</thead>'
# _html_tfoot, _html_tfoot_close = '<tfoot>', '</tfoot>'
_html_table_tag, _html_table_tag_close = '<table>', '</table>'
def __init__(self, data=None, headers=_MISSING, metadata=None):
if headers is _MISSING:
headers = []
if data:
headers, data = list(data[0]), islice(data, 1, None)
self.headers = headers or []
self.metadata = metadata or {}
self._data = []
self._width = 0
self.extend(data)
def extend(self, data):
"""
Append the given data to the end of the Table.
"""
if not data:
return
self._data.extend(data)
self._set_width()
self._fill()
def _set_width(self, reset=False):
if reset:
self._width = 0
if self._width:
return
if self.headers:
self._width = len(self.headers)
return
self._width = max([len(d) for d in self._data])
def _fill(self):
width, filler = self._width, [None]
if not width:
return
for d in self._data:
rem = width - len(d)
if rem > 0:
d.extend(filler * rem)
return
@classmethod
def from_dict(cls, data, headers=_MISSING, max_depth=1, metadata=None):
"""Create a Table from a :class:`dict`. Operates the same as
:meth:`from_data`, but forces interpretation of the data as a
Mapping.
"""
return cls.from_data(data=data, headers=headers,
max_depth=max_depth, _data_type=DictInputType(),
metadata=metadata)
@classmethod
def from_list(cls, data, headers=_MISSING, max_depth=1, metadata=None):
"""Create a Table from a :class:`list`. Operates the same as
:meth:`from_data`, but forces the interpretation of the data
as a Sequence.
"""
return cls.from_data(data=data, headers=headers,
max_depth=max_depth, _data_type=ListInputType(),
metadata=metadata)
@classmethod
def from_object(cls, data, headers=_MISSING, max_depth=1, metadata=None):
"""Create a Table from an :class:`object`. Operates the same as
:meth:`from_data`, but forces the interpretation of the data
as an object. May be useful for some :class:`dict` and
:class:`list` subtypes.
"""
return cls.from_data(data=data, headers=headers,
max_depth=max_depth, _data_type=ObjectInputType(),
metadata=metadata)
@classmethod
def from_data(cls, data, headers=_MISSING, max_depth=1, **kwargs):
"""Create a Table from any supported data, heuristically
selecting how to represent the data in Table format.
Args:
data (object): Any object or iterable with data to be
imported to the Table.
headers (iterable): An iterable of headers to be matched
to the data. If not explicitly passed, headers will be
guessed for certain datatypes.
max_depth (int): The level to which nested Tables should
be created (default: 1).
_data_type (InputType subclass): For advanced use cases,
do not guess the type of the input data, use this data
type instead.
"""
# TODO: seen/cycle detection/reuse ?
# maxdepth follows the same behavior as find command
# i.e., it doesn't work if max_depth=0 is passed in
metadata = kwargs.pop('metadata', None)
_data_type = kwargs.pop('_data_type', None)
if max_depth < 1:
# return data instead?
return cls(headers=headers, metadata=metadata)
is_seq = isinstance(data, Sequence)
if is_seq:
if not data:
return cls(headers=headers, metadata=metadata)
to_check = data[0]
if not _data_type:
for it in cls._input_types:
if it.check_type(to_check):
_data_type = it
break
else:
# not particularly happy about this rewind-y approach
is_seq = False
to_check = data
else:
if type(data) in _DNR:
# hmm, got scalar data.
# raise an exception or make an exception, nahmsayn?
return cls([[data]], headers=headers, metadata=metadata)
to_check = data
if not _data_type:
for it in cls._input_types:
if it.check_type(to_check):
_data_type = it
break
else:
raise UnsupportedData('unsupported data type %r'
% type(data))
if headers is _MISSING:
headers = _data_type.guess_headers(to_check)
if is_seq:
entries = _data_type.get_entry_seq(data, headers)
else:
entries = [_data_type.get_entry(data, headers)]
if max_depth > 1:
new_max_depth = max_depth - 1
for i, entry in enumerate(entries):
for j, cell in enumerate(entry):
if type(cell) in _DNR:
# optimization to avoid function overhead
continue
try:
entries[i][j] = cls.from_data(cell,
max_depth=new_max_depth)
except UnsupportedData:
continue
return cls(entries, headers=headers, metadata=metadata)
def __len__(self):
return len(self._data)
def __getitem__(self, idx):
return self._data[idx]
def __repr__(self):
cn = self.__class__.__name__
if self.headers:
return '%s(headers=%r, data=%r)' % (cn, self.headers, self._data)
else:
return '%s(%r)' % (cn, self._data)
def to_html(self, orientation=None, wrapped=True,
with_headers=True, with_newlines=True,
with_metadata=False, max_depth=1):
"""Render this Table to HTML. Configure the structure of Table
HTML by subclassing and overriding ``_html_*`` class
attributes.
Args:
orientation (str): one of 'auto', 'horizontal', or
'vertical' (or the first letter of any of
those). Default 'auto'.
wrapped (bool): whether or not to include the wrapping
'<table></table>' tags. Default ``True``, set to
``False`` if appending multiple Table outputs or an
otherwise customized HTML wrapping tag is needed.
with_newlines (bool): Set to ``True`` if output should
include added newlines to make the HTML more
readable. Default ``False``.
with_metadata (bool/str): Set to ``True`` if output should
be preceded with a Table of preset metadata, if it
exists. Set to special value ``'bottom'`` if the
metadata Table HTML should come *after* the main HTML output.
max_depth (int): Indicate how deeply to nest HTML tables
before simply reverting to :func:`repr`-ing the nested
data.
Returns:
A text string of the HTML of the rendered table.
"""
lines = []
headers = []
if with_metadata and self.metadata:
metadata_table = Table.from_data(self.metadata,
max_depth=max_depth)
metadata_html = metadata_table.to_html(with_headers=True,
with_newlines=with_newlines,
with_metadata=False,
max_depth=max_depth)
if with_metadata != 'bottom':
lines.append(metadata_html)
lines.append('<br />')
if with_headers and self.headers:
headers.extend(self.headers)
headers.extend([None] * (self._width - len(self.headers)))
if wrapped:
lines.append(self._html_table_tag)
orientation = orientation or 'auto'
ol = orientation[0].lower()
if ol == 'a':
ol = 'h' if len(self) > 1 else 'v'
if ol == 'h':
self._add_horizontal_html_lines(lines, headers=headers,
max_depth=max_depth)
elif ol == 'v':
self._add_vertical_html_lines(lines, headers=headers,
max_depth=max_depth)
else:
raise ValueError("expected one of 'auto', 'vertical', or"
" 'horizontal', not %r" % orientation)
if with_metadata and self.metadata and with_metadata == 'bottom':
lines.append('<br />')
lines.append(metadata_html)
if wrapped:
lines.append(self._html_table_tag_close)
sep = '\n' if with_newlines else ''
return sep.join(lines)
def get_cell_html(self, value):
"""Called on each value in an HTML table. By default it simply escapes
the HTML. Override this method to add additional conditions
and behaviors, but take care to ensure the final output is
HTML escaped.
"""
return escape_html(value)
def _add_horizontal_html_lines(self, lines, headers, max_depth):
esc = self.get_cell_html
new_depth = max_depth - 1 if max_depth > 1 else max_depth
if max_depth > 1:
new_depth = max_depth - 1
if headers:
_thth = self._html_th_close + self._html_th
lines.append(self._html_tr + self._html_th +
_thth.join([esc(h) for h in headers]) +
self._html_th_close + self._html_tr_close)
trtd, _tdtd, _td_tr = (self._html_tr + self._html_td,
self._html_td_close + self._html_td,
self._html_td_close + self._html_tr_close)
for row in self._data:
if max_depth > 1:
_fill_parts = []
for cell in row:
if isinstance(cell, Table):
_fill_parts.append(cell.to_html(max_depth=new_depth))
else:
_fill_parts.append(esc(cell))
else:
_fill_parts = [esc(c) for c in row]
lines.append(''.join([trtd, _tdtd.join(_fill_parts), _td_tr]))
def _add_vertical_html_lines(self, lines, headers, max_depth):
esc = self.get_cell_html
new_depth = max_depth - 1 if max_depth > 1 else max_depth
tr, th, _th = self._html_tr, self._html_th, self._html_th_close
td, _tdtd = self._html_td, self._html_td_close + self._html_td
_td_tr = self._html_td_close + self._html_tr_close
for i in range(self._width):
line_parts = [tr]
if headers:
line_parts.extend([th, esc(headers[i]), _th])
if max_depth > 1:
new_depth = max_depth - 1
_fill_parts = []
for row in self._data:
cell = row[i]
if isinstance(cell, Table):
_fill_parts.append(cell.to_html(max_depth=new_depth))
else:
_fill_parts.append(esc(row[i]))
else:
_fill_parts = [esc(row[i]) for row in self._data]
line_parts.extend([td, _tdtd.join(_fill_parts), _td_tr])
lines.append(''.join(line_parts))
def to_text(self, with_headers=True, maxlen=None):
"""Get the Table's textual representation. Only works well
for Tables with non-recursive data.
Args:
with_headers (bool): Whether to include a header row at the top.
maxlen (int): Max length of data in each cell.
"""
# TODO: verify this works for markdown
lines = []
widths = []
headers = list(self.headers)
text_data = [[to_text(cell, maxlen=maxlen) for cell in row]
for row in self._data]
for idx in range(self._width):
cur_widths = [len(cur) for cur in text_data]
if with_headers:
cur_widths.append(len(to_text(headers[idx], maxlen=maxlen)))
widths.append(max(cur_widths))
if with_headers:
lines.append(' | '.join([h.center(widths[i])
for i, h in enumerate(headers)]))
lines.append('-+-'.join(['-' * w for w in widths]))
for row in text_data:
lines.append(' | '.join([cell.center(widths[j])
for j, cell in enumerate(row)]))
return '\n'.join(lines)
|