/usr/lib/python3/dist-packages/hpack/huffman.py is in python3-hpack 2.1.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | # -*- coding: utf-8 -*-
"""
hpack/huffman_decoder
~~~~~~~~~~~~~~~~~~~~~
An implementation of a bitwise prefix tree specially built for decoding
Huffman-coded content where we already know the Huffman table.
"""
from .compat import to_byte, decode_hex
from .exceptions import HPACKDecodingError
def _pad_binary(bin_str, req_len=8):
"""
Given a binary string (returned by bin()), pad it to a full byte length.
"""
bin_str = bin_str[2:] # Strip the 0b prefix
return max(0, req_len - len(bin_str)) * '0' + bin_str
def _hex_to_bin_str(hex_string):
"""
Given a Python bytestring, returns a string representing those bytes in
unicode form.
"""
unpadded_bin_string_list = (bin(to_byte(c)) for c in hex_string)
padded_bin_string_list = map(_pad_binary, unpadded_bin_string_list)
bitwise_message = "".join(padded_bin_string_list)
return bitwise_message
class HuffmanDecoder(object):
"""
Decodes a Huffman-coded bytestream according to the Huffman table laid out
in the HPACK specification.
"""
class _Node(object):
def __init__(self, data):
self.data = data
self.mapping = {}
def __init__(self, huffman_code_list, huffman_code_list_lengths):
self.root = self._Node(None)
for index, (huffman_code, code_length) in enumerate(zip(huffman_code_list, huffman_code_list_lengths)):
self._insert(huffman_code, code_length, index)
def _insert(self, hex_number, hex_length, letter):
"""
Inserts a Huffman code point into the tree.
"""
hex_number = _pad_binary(bin(hex_number), hex_length)
cur_node = self.root
for digit in hex_number:
if digit not in cur_node.mapping:
cur_node.mapping[digit] = self._Node(None)
cur_node = cur_node.mapping[digit]
cur_node.data = letter
def decode(self, encoded_string):
"""
Decode the given Huffman coded string.
"""
number = _hex_to_bin_str(encoded_string)
cur_node = self.root
decoded_message = bytearray()
try:
for digit in number:
cur_node = cur_node.mapping[digit]
if cur_node.data is not None:
# If we get EOS, everything else is padding.
if cur_node.data == 256:
break
decoded_message.append(cur_node.data)
cur_node = self.root
except KeyError:
# We have a Huffman-coded string that doesn't match our trie. This
# is pretty bad: raise a useful exception.
raise HPACKDecodingError("Invalid Huffman-coded string received.")
return bytes(decoded_message)
class HuffmanEncoder(object):
"""
Encodes a string according to the Huffman encoding table defined in the
HPACK specification.
"""
def __init__(self, huffman_code_list, huffman_code_list_lengths):
self.huffman_code_list = huffman_code_list
self.huffman_code_list_lengths = huffman_code_list_lengths
def encode(self, bytes_to_encode):
"""
Given a string of bytes, encodes them according to the HPACK Huffman
specification.
"""
# If handed the empty string, just immediately return.
if not bytes_to_encode:
return b''
final_num = 0
final_int_len = 0
# Turn each byte into its huffman code. These codes aren't necessarily
# octet aligned, so keep track of how far through an octet we are. To
# handle this cleanly, just use a single giant integer.
for char in bytes_to_encode:
byte = to_byte(char)
bin_int_len = self.huffman_code_list_lengths[byte]
bin_int = self.huffman_code_list[byte] & (2 ** (bin_int_len + 1) - 1)
final_num <<= bin_int_len
final_num |= bin_int
final_int_len += bin_int_len
# Pad out to an octet with ones.
bits_to_be_padded = (8 - (final_int_len % 8)) % 8
final_num <<= bits_to_be_padded
final_num |= (1 << (bits_to_be_padded)) - 1
# Convert the number to hex and strip off the leading '0x' and the
# trailing 'L', if present.
final_num = hex(final_num)[2:].rstrip('L')
# If this is odd, prepend a zero.
final_num = '0' + final_num if len(final_num) % 2 != 0 else final_num
# This number should have twice as many digits as bytes. If not, we're
# missing some leading zeroes. Work out how many bytes we want and how
# many digits we have, then add the missing zero digits to the front.
total_bytes = (final_int_len + bits_to_be_padded) // 8
expected_digits = total_bytes * 2
if len(final_num) != expected_digits:
missing_digits = expected_digits - len(final_num)
final_num = ('0' * missing_digits) + final_num
return decode_hex(final_num)
|