def test_repr(): bf = BloomFilter(8000, 3) assert repr(bf) == "<BloomFilter (length: 8000, hashes: 3)>" bf = BloomFilter.create_from_capacity(5000, 0.02) assert repr(bf) == "<BloomFilter (length: 40712, hashes: 5)>"
def test_count_when_full(): length = 8 num_of_hashes = 2 bf = BloomFilter(length, num_of_hashes) # We index 20 strings to kind of guarantee that # filter of length 8 is full afterwards. # NOTE: In perfect situation, only 4 items are required, # but we don't know which ones. for i in range(20): bf.add("test{}".format(i)) assert bf.count() == length / num_of_hashes
def test_init_from_capacity(): bf = BloomFilter.create_from_capacity(5000, 0.02) assert bf.sizeof() == 5089, "Unexpected size in bytes" with pytest.raises(ValueError) as excinfo: bf = BloomFilter.create_from_capacity(5000, 2) assert str(excinfo.value) == 'Error rate shell be in (0, 1)' with pytest.raises(ValueError) as excinfo: bf = BloomFilter.create_from_capacity(0, 0.02) assert str(excinfo.value) == 'Filter capacity can\'t be 0 or negative' bf = BloomFilter.create_from_capacity(5000, 0.999) assert len(bf) == 16
def test_init(): bf = BloomFilter(8000, 3) assert bf.sizeof() == 1000, "Unexpected size in bytes" with pytest.raises(ValueError) as excinfo: bf = BloomFilter(8000, 0) assert str(excinfo.value) == 'At least one hash function is required' with pytest.raises(ValueError) as excinfo: bf = BloomFilter(0, 5) assert str(excinfo.value) == 'Filter length can\'t be 0 or negative'
def test_len(): bf = BloomFilter(8000, 3) assert len(bf) == 8000 bf = BloomFilter(8001, 3) assert len(bf) == 8008
def test_count(): bf = BloomFilter(8000, 3) assert bf.count() == 0 bf.add("test") assert bf.count() == 1 bf.add("test") assert bf.count() == 1 bf.add("test2") assert bf.count() == 2
def test_lookup(): bf = BloomFilter(8000, 3) bf.add("test") assert bf.test("test") == 1, "Can't find recently added element" assert bf.test("test_test") == 0, "False positive detected"
def test_add(): bf = BloomFilter(8000, 3) for word in ["test", 1, {"hello": "world"}]: bf.add(word) assert bf.test(word) == 1, "Can't find recently added element"
"""Example how to use Classical Bloom Filter.""" from pdsa.membership.bloom_filter import BloomFilter LOREM_IPSUM = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit." " Mauris consequat leo ut vehicula placerat. In lacinia, nisl" " id maximus auctor, sem elit interdum urna, at efficitur tellus" " turpis at quam. Pellentesque eget iaculis turpis. Nam ac ligula" " ut nunc porttitor pharetra in non lorem. In purus metus," " sollicitudin tristique sapien.") if __name__ == '__main__': bf = BloomFilter(80000, 4) print(bf) print("Bloom filter uses {} bytes in the memory".format(bf.sizeof())) print("Filter contains approx. {} unique elements".format(bf.count())) print("'Lorem' {} in the filter".format( "is" if bf.test("Lorem") else "is not")) words = set(LOREM_IPSUM.split()) for word in words: bf.add(word.strip(" .,")) print("Added {} words, in the filter approx. {} unique elements".format( len(words), bf.count())) print("'Lorem' {} in the filter".format(