/usr/share/doc/python-bitarray/examples/bloom.py is in python-bitarray 0.8.0-2build4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """
Demonstrates the implementation of a Bloom filter, see:
http://en.wikipedia.org/wiki/Bloom_filter
"""
import hashlib
from math import exp, log
from bitarray import bitarray
class BloomFilter(object):
def __init__(self, m, k):
self.m = m
self.k = k
self.array = bitarray(m)
self.array.setall(0)
def add(self, key):
for i in self._hashes(key):
self.array[i] = 1
def contains(self, key):
return all(self.array[i] for i in self._hashes(key))
def _hashes(self, key):
"""
generate k different hashes, each of which maps a key to one of
the m array positions with a uniform random distribution
"""
h = hashlib.new('md5')
h.update(str(key))
x = long(h.hexdigest(), 16)
for _ in xrange(self.k):
if x < self.m:
h.update('.')
x = long(h.hexdigest(), 16)
x, y = divmod(x, self.m)
yield y
def test_bloom(m, k, n):
b = BloomFilter(m, k)
for i in xrange(n):
b.add(i)
assert b.contains(i)
p = (1.0 - exp(-k * (n + 0.5) / (m - 1))) ** k
print 100.0 * p, '%'
N = 100000
false_pos = sum(b.contains(i) for i in xrange(n, n + N))
print 100.0 * false_pos / N, '%'
if __name__ == '__main__':
test_bloom(50000, 6, 5000)
|