/usr/lib/python3/dist-packages/parsel/utils.py is in python3-parsel 1.4.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 | import re
import six
from w3lib.html import replace_entities as w3lib_replace_entities
def flatten(x):
"""flatten(sequence) -> list
Returns a single, flat list which contains all elements retrieved
from the sequence and all recursively contained sub-sequences
(iterables).
Examples:
>>> [1, 2, [3,4], (5,6)]
[1, 2, [3, 4], (5, 6)]
>>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
[1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
>>> flatten(["foo", "bar"])
['foo', 'bar']
>>> flatten(["foo", ["baz", 42], "bar"])
['foo', 'baz', 42, 'bar']
"""
return list(iflatten(x))
def iflatten(x):
"""iflatten(sequence) -> iterator
Similar to ``.flatten()``, but returns iterator instead"""
for el in x:
if _is_listlike(el):
for el_ in flatten(el):
yield el_
else:
yield el
def _is_listlike(x):
"""
>>> _is_listlike("foo")
False
>>> _is_listlike(5)
False
>>> _is_listlike(b"foo")
False
>>> _is_listlike([b"foo"])
True
>>> _is_listlike((b"foo",))
True
>>> _is_listlike({})
True
>>> _is_listlike(set())
True
>>> _is_listlike((x for x in range(3)))
True
>>> _is_listlike(six.moves.xrange(5))
True
"""
return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
def extract_regex(regex, text, replace_entities=True):
"""Extract a list of unicode strings from the given text/encoding using the following policies:
* if the regex contains a named group called "extract" that will be returned
* if the regex contains multiple numbered groups, all those will be returned (flattened)
* if the regex doesn't contain any group the entire regex matching is returned
"""
if isinstance(regex, six.string_types):
regex = re.compile(regex, re.UNICODE)
if 'extract' in regex.groupindex:
# named group
try:
extracted = regex.search(text).group('extract')
except AttributeError:
strings = []
else:
strings = [extracted] if extracted is not None else []
else:
# full regex or numbered groups
strings = regex.findall(text)
strings = flatten(strings)
if not replace_entities:
return strings
return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings]
|