def test_params_for_capacity(self): """ Tests that the parameters that are generated for a given capacity and probability are correct given known sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 bytes, k = cBloom.params_for_capacity(1e6, 1e-4) assert bytes - cBloom.extra_buffer() == round(19170117 / 8.0) assert k == 14 # Parameters uses the ceiling instead of rounding
def test_for_capacity(self): """ Tests that the for_capacity method makes a sane bloom filter using parameters that are generated for a given capacity and probability are correct given known sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 bf = cBloom.for_capacity(1e6, 1e-4) # Check the bitmap size assert (len(bf.bitmap) / 8) - cBloom.extra_buffer() == round(19170117 / 8.0) # Check the k num assert bf.k_num == 14 # Parameters uses the ceiling instead of rounding
def test_check_int(self): """ Tests checking for an int in a set. This should fail. """ bf = cBloom.for_capacity(1000, 1e-4) with pytest.raises(TypeError): 1234 in bf
def test_check_none(self): """ Tests checking None in a set. This should fail. """ bf = cBloom.for_capacity(1000, 1e-4) with pytest.raises(TypeError): None in bf
def test_add_int(self): """ Tests adding an int to a set. This should fail. """ bf = cBloom.for_capacity(1000, 1e-4) with pytest.raises(TypeError): bf.add(1234)
def test_add_none(self): """ Tests adding None to a set. This should fail. """ bf = cBloom.for_capacity(1000, 1e-4) with pytest.raises(TypeError): bf.add(None)
def test_expected_prob(self): """ Tests that the expected probability of false positives is correct given known-sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 assert round(cBloom.expected_probability(19170117, 1e6), 4) == 1e-4
def test_required_bytes(self): """ Tests that the number of required bytes that the bloom filter says it needs is the correct given some known-sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 assert round(cBloom.required_bits(1e6, 1e-4) / 8.0) == round(19170117 / 8.0)
def test_small_bitmap(self): """ Tests initializing with a bitmap that is too small (e.g. less than or equal to the extra_buffer() size) """ with pytest.raises(ValueError): cBloom(Bitmap(cBloom.extra_buffer()), 3)
def test_length(self): """ Tests that length works """ bf = cBloom.for_capacity(1000, 1e-4) assert len(bf) == 0 [bf.add("test%d" % x) for x in xrange(1000)] assert len(bf) == 1000
def test_prob(self): """ Tests that the bloom filter is only wrong within a certain threshold. """ # Only wrong once per hundred bf = cBloom.for_capacity(1000, 0.01) res = [bf.add("test%d" % x, True) for x in xrange(1000)] num_wrong = len([x for x in res if x is False]) # Should get about 10 wrong assert num_wrong >= 5 assert num_wrong <= 15
def test_add_without_check(self): """ Tests that adding to a bloom filter while checking for existing entries works """ bf = cBloom.for_capacity(1000, 1e-4) # Assert all adds work assert all([bf.add("test%d" % x, False) for x in xrange(1000)]) assert all([bf.__contains__("test%d" % x) for x in xrange(1000)]) assert len(bf) == 1000 # Assert all adds work assert all([bf.add("test%d" % x, False) for x in xrange(1000)]) assert len(bf) == 2000
def test_swap(self): """ Swaps the mmap files from one implementation to another, check that things work. Start with cBloom, then pyBloom. """ bytes, k = cBloom.params_for_capacity(2e4, 1e-3) bitmap = Bitmap(bytes, "testswap1.mmap") bf1 = cBloom(bitmap, k) [bf1.add("foo%d" % x) for x in xrange(20000)] bf1.close() # Make a new bitmap bitmap = Bitmap(bytes, "testswap1.mmap") bf2 = pyBloom(bitmap, 50) assert len(bf2) == 20000 # Should reload size and k assert bf2.k_num == k # Check all the entries assert all([bf2.__contains__("foo%d" % x) for x in xrange(20000)]) bf2.close()
def test_equality_2(self): """ Tests that the two implementation generate matching mmaps """ bytes, k = cBloom.params_for_capacity(2e4, 1e-3) bitmap = Bitmap(bytes, "testcompatc2.mmap") bf1 = cBloom(bitmap, k) [bf1.add("foo%d" % x) for x in xrange(20000)] # Make a new bitmap bitmap = Bitmap(bytes, "testcompatpy2.mmap") bf2 = pyBloom(bitmap, k) [bf2.add("foo%d" % x) for x in xrange(20000)] # Check the lengths assert len(bf1) == len(bf2) bf1.close() bf2.close() # Compare the mmap files self.compare_files("testcompatc2.mmap", "testcompatpy2.mmap")
def test_ideal_k(self): """ Tests that the ideal K is correct given known-sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 assert round(cBloom.ideal_k(19170117, 1e6)) == 13
def test_expected_capacity(self): """ Tests that the expected capacity is correct given known-sane values. """ # From http://hur.st/bloomfilter?n=1e6&p=1e-4 assert round(cBloom.expected_capacity(19170117, 1e-4)) == 1e6