/usr/share/pyshared/larch/codec.py is in python-larch 1.20131130-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 | # Copyright 2010 Lars Wirzenius
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import struct
import larch
class CodecError(larch.Error):
def __init__(self, msg):
self.msg = msg
class NodeCodec(object):
'''Encode and decode nodes from their binary format.
Node identifiers are assumed to fit into 64 bits.
Leaf node values are assumed to fit into 4 gibibytes.
'''
format = 1
# We use the struct module for encoding and decoding. For speed,
# we construct the format string all at once, so that there is only
# one call to struct.pack or struct.unpack for one node. This brought
# a thousand time speedup over doing it one field at a time. However,
# the code is not quite as clear as it might be, what with no symbolic
# names for anything is used anymore. Patches welcome.
def __init__(self, key_bytes):
self.key_bytes = key_bytes
self.leaf_header = struct.Struct('!4sQI')
self.index_header = struct.Struct('!4sQI')
# space for key and length of value is needed for each pair
self.leaf_pair_fixed_size = key_bytes + struct.calcsize('!I')
self.index_pair_size = key_bytes + struct.calcsize('!Q')
def leaf_size(self, keys, values):
'''Return size of a leaf node with the given pairs.'''
return (self.leaf_header.size + len(keys) * self.leaf_pair_fixed_size +
len(''.join([value for value in values])))
def leaf_size_delta_add(self, old_size, value):
'''Return size of node that gets a new key/value pair added.
``old_size`` is the old size of the node. The key must not already
have existed in the node.
'''
delta = self.leaf_pair_fixed_size + len(value)
return old_size + delta
def leaf_size_delta_replace(self, old_size, old_value, new_value):
'''Return size of node that gets a value replaced.'''
return old_size + len(new_value) - len(old_value)
def encode_leaf(self, node):
'''Encode a leaf node as a byte string.'''
keys = node.keys()
values = node.values()
return (self.leaf_header.pack('ORBL', node.id, len(keys)) +
''.join(keys) +
struct.pack('!%dI' % len(values), *map(len, values)) +
''.join(values))
def decode_leaf(self, encoded):
'''Decode a leaf node from its encoded byte string.'''
buf = buffer(encoded)
cookie, node_id, num_pairs = self.leaf_header.unpack_from(buf)
if cookie != 'ORBL':
raise CodecError('Leaf node does not begin with magic cookie '
'(should be ORBL, is %s)' % repr(cookie))
fmt = '!' + ('%ds' % self.key_bytes) * num_pairs + 'I' * num_pairs
items = struct.unpack_from(fmt, buf, self.leaf_header.size)
keys = items[:num_pairs]
lengths = items[num_pairs:num_pairs*2]
values = []
offset = self.leaf_header.size + self.leaf_pair_fixed_size * num_pairs
append = values.append
for length in lengths:
append(encoded[offset:offset + length])
offset += length
return larch.LeafNode(node_id, keys, values)
def max_index_pairs(self, node_size):
'''Return number of index pairs that fit in a node of a given size.'''
return (node_size - self.index_header.size) / self.index_pair_size
def index_size(self, keys, values):
'''Return size of an index node with the given pairs.'''
return self.index_header.size + self.index_pair_size * len(keys)
def encode_index(self, node):
'''Encode an index node as a byte string.'''
keys = node.keys()
child_ids = node.values()
return (self.index_header.pack('ORBI', node.id, len(keys)) +
''.join(keys) +
struct.pack('!%dQ' % len(child_ids), *child_ids))
def decode_index(self, encoded):
'''Decode an index node from its encoded byte string.'''
buf = buffer(encoded)
cookie, node_id, num_pairs = self.index_header.unpack_from(buf)
if cookie != 'ORBI':
raise CodecError('Index node does not begin with magic cookie '
'(should be ORBI, is %s)' % repr(cookie))
fmt = '!' + ('%ds' % self.key_bytes) * num_pairs + 'Q' * num_pairs
items = struct.unpack_from(fmt, buf, self.index_header.size)
keys = items[:num_pairs]
child_ids = items[num_pairs:]
assert len(keys) == len(child_ids)
for x in child_ids:
assert type(x) == int
return larch.IndexNode(node_id, keys, child_ids)
def encode(self, node):
'''Encode a node of any type.'''
if isinstance(node, larch.LeafNode):
return self.encode_leaf(node)
else:
return self.encode_index(node)
def decode(self, encoded):
'''Decode node of any type.'''
if encoded.startswith('ORBL'):
return self.decode_leaf(encoded)
elif encoded.startswith('ORBI'):
return self.decode_index(encoded)
else:
raise CodecError('Unknown magic cookie in encoded node (%s)' %
repr(encoded[:4]))
def size(self, node):
'''Return encoded size of a node, regardless of type.'''
keys = node.keys()
values = node.values()
if isinstance(node, larch.LeafNode):
return self.leaf_size(keys, values)
else:
return self.index_size(keys, values)
|