/usr/lib/python2.7/dist-packages/humanfriendly/text.py is in python-humanfriendly 4.4.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 | # Human friendly input/output in Python.
#
# Author: Peter Odding <peter@peterodding.com>
# Last Change: June 24, 2017
# URL: https://humanfriendly.readthedocs.io
"""
Simple text manipulation functions.
The :mod:`~humanfriendly.text` module contains simple functions to manipulate text:
- The :func:`concatenate()` and :func:`pluralize()` functions make it easy to
generate human friendly output.
- The :func:`format()`, :func:`compact()` and :func:`dedent()` functions
provide a clean and simple to use syntax for composing large text fragments
with interpolated variables.
- The :func:`tokenize()` function parses simple user input.
"""
# Standard library modules.
import math
import numbers
import random
import re
import string
import textwrap
# Public identifiers that require documentation.
__all__ = (
'compact',
'concatenate',
'dedent',
'format',
'is_empty_line',
'join_lines',
'pluralize',
'random_string',
'split',
'split_paragraphs',
'tokenize',
'trim_empty_lines',
)
def compact(text, *args, **kw):
'''
Compact whitespace in a string.
Trims leading and trailing whitespace, replaces runs of whitespace
characters with a single space and interpolates any arguments using
:func:`format()`.
:param text: The text to compact (a string).
:param args: Any positional arguments are interpolated using :func:`format()`.
:param kw: Any keyword arguments are interpolated using :func:`format()`.
:returns: The compacted text (a string).
Here's an example of how I like to use the :func:`compact()` function, this
is an example from a random unrelated project I'm working on at the moment::
raise PortDiscoveryError(compact("""
Failed to discover port(s) that Apache is listening on!
Maybe I'm parsing the wrong configuration file? ({filename})
""", filename=self.ports_config))
The combination of :func:`compact()` and Python's multi line strings allows
me to write long text fragments with interpolated variables that are easy
to write, easy to read and work well with Python's whitespace
sensitivity.
'''
non_whitespace_tokens = text.split()
compacted_text = ' '.join(non_whitespace_tokens)
return format(compacted_text, *args, **kw)
def concatenate(items):
"""
Concatenate a list of items in a human friendly way.
:param items: A sequence of strings.
:returns: A single string.
>>> from humanfriendly.text import concatenate
>>> concatenate(["eggs", "milk", "bread"])
'eggs, milk and bread'
"""
items = list(items)
if len(items) > 1:
return ', '.join(items[:-1]) + ' and ' + items[-1]
elif items:
return items[0]
else:
return ''
def dedent(text, *args, **kw):
"""
Dedent a string (remove common leading whitespace from all lines).
Removes common leading whitespace from all lines in the string using
:func:`textwrap.dedent()`, removes leading and trailing empty lines using
:func:`trim_empty_lines()` and interpolates any arguments using
:func:`format()`.
:param text: The text to dedent (a string).
:param args: Any positional arguments are interpolated using :func:`format()`.
:param kw: Any keyword arguments are interpolated using :func:`format()`.
:returns: The dedented text (a string).
The :func:`compact()` function's documentation contains an example of how I
like to use the :func:`compact()` and :func:`dedent()` functions. The main
difference is that I use :func:`compact()` for text that will be presented
to the user (where whitespace is not so significant) and :func:`dedent()`
for data file and code generation tasks (where newlines and indentation are
very significant).
"""
dedented_text = textwrap.dedent(text)
trimmed_text = trim_empty_lines(dedented_text)
return format(trimmed_text, *args, **kw)
def format(text, *args, **kw):
"""
Format a string using the string formatting operator and/or :func:`str.format()`.
:param text: The text to format (a string).
:param args: Any positional arguments are interpolated into the text using
the string formatting operator (``%``). If no positional
arguments are given no interpolation is done.
:param kw: Any keyword arguments are interpolated into the text using the
:func:`str.format()` function. If no keyword arguments are given
no interpolation is done.
:returns: The text with any positional and/or keyword arguments
interpolated (a string).
The implementation of this function is so trivial that it seems silly to
even bother writing and documenting it. Justifying this requires some
context :-).
**Why format() instead of the string formatting operator?**
For really simple string interpolation Python's string formatting operator
is ideal, but it does have some strange quirks:
- When you switch from interpolating a single value to interpolating
multiple values you have to wrap them in tuple syntax. Because
:func:`format()` takes a `variable number of arguments`_ it always
receives a tuple (which saves me a context switch :-). Here's an
example:
>>> from humanfriendly.text import format
>>> # The string formatting operator.
>>> print('the magic number is %s' % 42)
the magic number is 42
>>> print('the magic numbers are %s and %s' % (12, 42))
the magic numbers are 12 and 42
>>> # The format() function.
>>> print(format('the magic number is %s', 42))
the magic number is 42
>>> print(format('the magic numbers are %s and %s', 12, 42))
the magic numbers are 12 and 42
- When you interpolate a single value and someone accidentally passes in a
tuple your code raises a :exc:`~exceptions.TypeError`. Because
:func:`format()` takes a `variable number of arguments`_ it always
receives a tuple so this can never happen. Here's an example:
>>> # How expecting to interpolate a single value can fail.
>>> value = (12, 42)
>>> print('the magic value is %s' % value)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: not all arguments converted during string formatting
>>> # The following line works as intended, no surprises here!
>>> print(format('the magic value is %s', value))
the magic value is (12, 42)
**Why format() instead of the str.format() method?**
When you're doing complex string interpolation the :func:`str.format()`
function results in more readable code, however I frequently find myself
adding parentheses to force evaluation order. The :func:`format()` function
avoids this because of the relative priority between the comma and dot
operators. Here's an example:
>>> "{adjective} example" + " " + "(can't think of anything less {adjective})".format(adjective='silly')
"{adjective} example (can't think of anything less silly)"
>>> ("{adjective} example" + " " + "(can't think of anything less {adjective})").format(adjective='silly')
"silly example (can't think of anything less silly)"
>>> format("{adjective} example" + " " + "(can't think of anything less {adjective})", adjective='silly')
"silly example (can't think of anything less silly)"
The :func:`compact()` and :func:`dedent()` functions are wrappers that
combine :func:`format()` with whitespace manipulation to make it easy to
write nice to read Python code.
.. _variable number of arguments: https://docs.python.org/2/tutorial/controlflow.html#arbitrary-argument-lists
"""
if args:
text %= args
if kw:
text = text.format(**kw)
return text
def is_empty_line(text):
"""
Check if a text is empty or contains only whitespace.
:param text: The text to check for "emptiness" (a string).
:returns: :data:`True` if the text is empty or contains only whitespace,
:data:`False` otherwise.
"""
return len(text) == 0 or text.isspace()
def join_lines(text):
"""
Remove "hard wrapping" from the paragraphs in a string.
:param text: The text to reformat (a string).
:returns: The text without hard wrapping (a string).
This function works by removing line breaks when the last character before
a line break and the first character after the line break are both
non-whitespace characters. This means that common leading indentation will
break :func:`join_lines()` (in that case you can use :func:`dedent()`
before calling :func:`join_lines()`).
"""
return re.sub(r'(\S)\n(\S)', r'\1 \2', text)
def pluralize(count, singular, plural=None):
"""
Combine a count with the singular or plural form of a word.
If the plural form of the word is not provided it is obtained by
concatenating the singular form of the word with the letter "s". Of course
this will not always be correct, which is why you have the option to
specify both forms.
:param count: The count (a number).
:param singular: The singular form of the word (a string).
:param plural: The plural form of the word (a string or :data:`None`).
:returns: The count and singular/plural word concatenated (a string).
"""
if not plural:
plural = singular + 's'
return '%s %s' % (count, singular if math.floor(float(count)) == 1 else plural)
def random_string(length=(25, 100), characters=string.ascii_letters):
"""random_string(length=(25, 100), characters=string.ascii_letters)
Generate a random string.
:param length: The length of the string to be generated (a number or a
tuple with two numbers). If this is a tuple then a random
number between the two numbers given in the tuple is used.
:param characters: The characters to be used (a string, defaults
to :data:`string.ascii_letters`).
:returns: A random string.
The :func:`random_string()` function is very useful in test suites; by the
time I included it in :mod:`humanfriendly.text` I had already included
variants of this function in seven different test suites :-).
"""
if not isinstance(length, numbers.Number):
length = random.randint(length[0], length[1])
return ''.join(random.choice(characters) for _ in range(length))
def split(text, delimiter=','):
"""
Split a comma-separated list of strings.
:param text: The text to split (a string).
:param delimiter: The delimiter to split on (a string).
:returns: A list of zero or more nonempty strings.
Here's the default behavior of Python's built in :func:`str.split()`
function:
>>> 'foo,bar, baz,'.split(',')
['foo', 'bar', ' baz', '']
In contrast here's the default behavior of the :func:`split()` function:
>>> from humanfriendly.text import split
>>> split('foo,bar, baz,')
['foo', 'bar', 'baz']
Here is an example that parses a nested data structure (a mapping of
logging level names to one or more styles per level) that's encoded in a
string so it can be set as an environment variable:
>>> from pprint import pprint
>>> encoded_data = 'debug=green;warning=yellow;error=red;critical=red,bold'
>>> parsed_data = dict((k, split(v, ',')) for k, v in (split(kv, '=') for kv in split(encoded_data, ';')))
>>> pprint(parsed_data)
{'debug': ['green'],
'warning': ['yellow'],
'error': ['red'],
'critical': ['red', 'bold']}
"""
return [token.strip() for token in text.split(delimiter) if token and not token.isspace()]
def split_paragraphs(text):
"""
Split a string into paragraphs (one or more lines delimited by an empty line).
:param text: The text to split into paragraphs (a string).
:returns: A list of strings.
"""
paragraphs = []
for chunk in text.split('\n\n'):
chunk = trim_empty_lines(chunk)
if chunk and not chunk.isspace():
paragraphs.append(chunk)
return paragraphs
def tokenize(text):
"""
Tokenize a text into numbers and strings.
:param text: The text to tokenize (a string).
:returns: A list of strings and/or numbers.
This function is used to implement robust tokenization of user input in
functions like :func:`.parse_size()` and :func:`.parse_timespan()`. It
automatically coerces integer and floating point numbers, ignores
whitespace and knows how to separate numbers from strings even without
whitespace. Some examples to make this more concrete:
>>> from humanfriendly.text import tokenize
>>> tokenize('42')
[42]
>>> tokenize('42MB')
[42, 'MB']
>>> tokenize('42.5MB')
[42.5, 'MB']
>>> tokenize('42.5 MB')
[42.5, 'MB']
"""
tokenized_input = []
for token in re.split(r'(\d+(?:\.\d+)?)', text):
token = token.strip()
if re.match(r'\d+\.\d+', token):
tokenized_input.append(float(token))
elif token.isdigit():
tokenized_input.append(int(token))
elif token:
tokenized_input.append(token)
return tokenized_input
def trim_empty_lines(text):
"""
Trim leading and trailing empty lines from the given text.
:param text: The text to trim (a string).
:returns: The trimmed text (a string).
"""
lines = text.splitlines(True)
while lines and is_empty_line(lines[0]):
lines.pop(0)
while lines and is_empty_line(lines[-1]):
lines.pop(-1)
return ''.join(lines)
|