/usr/lib/python3/dist-packages/stdnum/numdb.py is in python3-stdnum 1.8.1-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | # numdb.py - module for handling hierarchically organised numbers
#
# Copyright (C) 2010-2017 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA
"""Query structured number format files with number properties.
This module contains functions for reading and querying a database that
stores numbers that use a hierarchical format (e.g. ISBN, IBAN, phone
numbers, etc).
To read a database from a file:
>>> with open('tests/numdb-test.dat', 'r') as f:
... dbfile = read(f)
To split a number:
>>> dbfile.split('01006')
['0', '100', '6']
>>> dbfile.split('902006')
['90', '20', '06']
>>> dbfile.split('909856')
['90', '985', '6']
To split the number and get properties for each part:
>>> dbfile.info('01006') == [
... ('0', {'prop1': 'foo'}),
... ('100', {'prop2': 'bar'}),
... ('6', {}),
... ]
True
>>> dbfile.info('02006') == [
... ('0', {'prop1': 'foo'}),
... ('200', {'prop2': 'bar', 'prop3': 'baz'}),
... ('6', {}),
... ]
True
>>> dbfile.info('03456') == [
... ('0', {'prop1': 'foo'}),
... ('345', {'prop2': 'bar', 'prop3': 'baz'}),
... ('6', {}),
... ]
True
>>> dbfile.info('902006') == [
... ('90', {'prop1': 'booz'}),
... ('20', {'prop2': 'foo'}),
... ('06', {}),
... ]
True
>>> dbfile.info('909856') == [
... ('90', {'prop1': 'booz'}),
... ('985', {'prop2': 'fooz'}),
... ('6', {}),
... ]
True
>>> dbfile.info('9889') == [
... ('98', {'prop1': 'booz'}),
... ('89', {'prop2': 'foo'}),
... ]
True
>>> dbfile.info('633322') == [
... ('6', {'prop1': 'boo'}),
... ('333', {'prop2': 'bar', 'prop3': 'baz', 'prop4': 'bla'}),
... ('22', {}),
... ]
True
"""
import re
from pkg_resources import resource_stream
_line_re = re.compile(
r'^(?P<indent> *)'
r'(?P<ranges>([^-,\s]+(-[^-,\s]+)?)(,[^-,\s]+(-[^-,\s]+)?)*)\s*'
r'(?P<props>.*)$')
_prop_re = re.compile(
r'(?P<prop>[0-9a-zA-Z-_]+)="(?P<value>[^"]*)"')
# this is a cache of open databases
_open_databases = {}
# the prefixes attribute of NumDB is structured as follows:
# prefixes = [
# [ length, low, high, props, children ]
# ...
# ]
# where children is a prefixes structure in its own right
# (there is no expected ordering within the list)
class NumDB(object):
"""Number database."""
def __init__(self):
"""Construct an empty database."""
self.prefixes = []
@staticmethod
def _merge(results):
"""Merge the provided list of possible results into a single result
list (this is a generator)."""
# expand the results to all have the same length
ml = max(len(x) for x in results)
results = [x + (ml - len(x)) * [None]
for x in results]
# go over each part
for parts in zip(*results):
# regroup parts into parts list and properties list
partlist, proplist = list(zip(*(x for x in parts if x)))
part = min(partlist, key=len)
props = {}
for p in proplist:
props.update(p)
yield part, props
@staticmethod
def _find(number, prefixes):
"""Lookup the specified number in the list of prefixes, this will
return basically what info() should return but works recursively."""
if not number:
return []
results = []
if prefixes:
for length, low, high, props, children in prefixes:
if low <= number[:length] <= high and len(number) >= length:
results.append([(number[:length], props)] +
NumDB._find(number[length:], children))
# not-found fallback
if not results:
return [(number, {})]
# merge the results into a single result
return list(NumDB._merge(results))
def info(self, number):
"""Split the provided number in components and associate properties
with each component. This returns a tuple of tuples. Each tuple
consists of a string (a part of the number) and a dict of properties.
"""
return NumDB._find(number, self.prefixes)
def split(self, number):
"""Split the provided number in components. This returns a tuple with
the number of components identified."""
return [part for part, props in self.info(number)]
def _parse(fp):
"""Read lines of text from the file pointer and generate indent, length,
low, high, properties tuples."""
for line in fp:
# ignore comments
if line[0] == '#' or line.strip() == '':
continue # pragma: no cover (optimisation takes it out)
# any other line should parse
match = _line_re.search(line)
indent = len(match.group('indent'))
ranges = match.group('ranges')
props = dict(_prop_re.findall(match.group('props')))
for rnge in ranges.split(','):
if '-' in rnge:
low, high = rnge.split('-')
else:
low, high = rnge, rnge
yield indent, len(low), low, high, props
def read(fp):
"""Return a new database with the data read from the specified file."""
last_indent = 0
db = NumDB()
stack = {0: db.prefixes}
for indent, length, low, high, props in _parse(fp):
if indent > last_indent:
# populate the children field of the last indent
stack[last_indent][-1][4] = []
stack[indent] = stack[last_indent][-1][4]
stack[indent].append([length, low, high, props, None])
last_indent = indent
return db
def get(name):
"""Open a database with the specified name to perform queries on."""
if name not in _open_databases:
import codecs
reader = codecs.getreader('utf-8')
with reader(resource_stream(__name__, name + '.dat')) as fp:
_open_databases[name] = read(fp)
return _open_databases[name]
|