def test_count():
    bf = CountingBloomFilter(8000, 3)
    assert bf.count() == 0

    bf.add("test")
    assert bf.count() == 1

    bf.add("test")
    assert bf.count() == 1

    bf.add("test2")
    assert bf.count() == 2
def test_count_when_full():
    length = 8
    num_of_hashes = 2

    bf = CountingBloomFilter(length, num_of_hashes)

    # We index 20 strings to kind of guarantee that
    # filter of length 8 is full afterwards.
    # NOTE: In perfect situation, only 4 items are required,
    # but we don't know which ones.
    for i in range(20):
        bf.add("test{}".format(i))

    assert bf.count() == length / num_of_hashes
示例#3
0
LOREM_IPSUM = (
    "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
    " Mauris consequat leo ut vehicula placerat. In lacinia, nisl"
    " id maximus auctor, sem elit interdum urna, at efficitur tellus"
    " turpis at quam. Pellentesque eget iaculis turpis. Nam ac ligula"
    " ut nunc porttitor pharetra in non lorem. In purus metus,"
    " sollicitudin tristique sapien.")

if __name__ == '__main__':
    bf = CountingBloomFilter(80000, 4)

    print(bf)
    print("Bloom filter uses {} bytes in the memory".format(bf.sizeof()))

    print("Filter contains approx. {} unique elements".format(bf.count()))

    print("'Lorem' {} in the filter".format(
        "is" if bf.test("Lorem") else "is not"))

    words = set(LOREM_IPSUM.split())
    for word in words:
        bf.add(word.strip(" .,"))

    print("Added {} words, in the filter approx. {} unique elements".format(
        len(words), bf.count()))

    print("'Lorem' {} in the filter".format(
        "is" if bf.test("Lorem") else "is not"))

    print("Delete 'Lorem' from the filter")