def test_cbf_jaccard_invalid(self): ''' use an invalid type in a jaccard index cbf ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') self.assertRaises(TypeError, lambda: blm.jaccard_index(1))
def test_bfod_ea(self): ''' test on disk elements added is correct ''' filename = 'tmp.blm' blmd = BloomFilterOnDisk(filename, 10, 0.05) self.assertEqual(blmd.elements_added, 0) blmd.add('this is a test') self.assertEqual(blmd.elements_added, 1) blmd.close() os.remove(filename)
def test_bfod_ee(self): ''' test on disk estimate elements is correct on disk ''' filename = 'tmp.blm' blmd = BloomFilterOnDisk(filename, 10, 0.05) res1 = blmd.estimate_elements() blmd.add('this is a test') res2 = blmd.estimate_elements() self.assertNotEqual(res1, res2) self.assertEqual(res1, 0) self.assertEqual(res2, 1) blmd.close() os.remove(filename)
def test_bfod_check(self): ''' ensure the use of check works on disk bloom ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') self.assertEqual(blm.check('this is a test'), True) self.assertEqual(blm.check('this is another test'), True) self.assertEqual(blm.check('this is yet another test'), False) self.assertEqual(blm.check('this is not another test'), False) blm.close() os.remove(filename)
def test_bfod_check(self): """ensure the use of check works on disk bloom""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") blm.add("this is another test") self.assertEqual(blm.check("this is a test"), True) self.assertEqual(blm.check("this is another test"), True) self.assertEqual(blm.check("this is yet another test"), False) self.assertEqual(blm.check("this is not another test"), False) blm.close()
def test_bfod_ee(self): """test on disk estimate elements is correct on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blmd = BloomFilterOnDisk(fobj.name, 20, 0.05) res1 = blmd.estimate_elements() blmd.add("this is a test") res2 = blmd.estimate_elements() self.assertNotEqual(res1, res2) self.assertEqual(res1, 0) self.assertEqual(res2, 1) blmd.close()
def test_cbf_intersec_invalid_msg(self): ''' check invalid type in a intersection message cbf ''' msg = ('The parameter second must be of type BloomFilter or ' 'a BloomFilterOnDisk') filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') try: blm.intersection(1) except TypeError as ex: self.assertEqual(str(ex), msg) os.remove(filename)
def test_bfod_union_diff(self): ''' make sure checking for different bloom filters on disk works union ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.union(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_cbf_intersec_invalid_msg(self): """check invalid type in a intersection message cbf""" msg = "The parameter second must be of type BloomFilter or a BloomFilterOnDisk" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") try: blm.intersection(1) except TypeError as ex: self.assertEqual(str(ex), msg)
def test_bfod_export_c_header(self): """test exporting a c header""" hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export_c_header(fobj.name) # now load the file, parse it and do some tests! with open(fobj.name, "r") as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual("/* BloomFilter Export of a standard BloomFilter */", data[0]) self.assertEqual("#include <inttypes.h>", data[1]) self.assertEqual( "const uint64_t estimated_elements = {};".format( blm.estimated_elements), data[2]) self.assertEqual( "const uint64_t elements_added = {};".format(blm.elements_added), data[3]) self.assertEqual( "const float false_positive_rate = {};".format( blm.false_positive_rate), data[4]) self.assertEqual( "const uint64_t number_bits = {};".format(blm.number_bits), data[5]) self.assertEqual( "const unsigned int number_hashes = {};".format(blm.number_hashes), data[6]) self.assertEqual("const unsigned char bloom[] = {", data[7]) self.assertEqual("};", data[-1]) # rebuild the hex version! new_hex = "".join([ x.strip().replace("0x", "") for x in " ".join(data[8:-1]).split(",") ]) self.assertEqual(hex_val, new_hex)
def test_bfod_close_del(self): """ close an on disk bloom using the del syntax """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_union_diff(self): """make sure checking for different bloom filters on disk works union""" filename = "tmp.blm" blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.union(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_bfod_export_hex(self): """test that page error is thrown correctly""" hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) hex_out = blm.export_hex() self.assertEqual(hex_out, hex_val)
def test_bfod_close_del(self): ''' close an on disk bloom using the del syntax ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_export(self): ''' export to on disk to new file ''' filename = 'tmp.blm' filename2 = 'tmp2.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.export(filename2) blm.close() md5_1 = calc_file_md5(filename) md5_2 = calc_file_md5(filename2) self.assertEqual(md5_1, md5_2) os.remove(filename) os.remove(filename2)
def test_bfod_jaccard(self): ''' test the on disk jaccard index of two bloom filters ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') blm2 = BloomFilter(10, 0.05) blm2.add('this is another test') blm2.add('this is yet another test') res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close() os.remove(filename)
def test_cbf_union_invalid_msg(self): """ check invalid type in a union message cbf """ msg = "The parameter second must be of type BloomFilter or " "a BloomFilterOnDisk" filename = "tmp.blm" blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") try: blm.union(1) except TypeError as ex: self.assertEqual(str(ex), msg) else: self.assertEqual(True, False) os.remove(filename)
def test_bfod_frombytes_msg(self): """test loading an on disk BloomFilter from bytes (message)""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") bytes_out = bytes(blm) try: BloomFilterOnDisk.frombytes(bytes_out) except NotSupportedError as ex: msg = "Loading from bytes is currently not supported by the on disk Bloom Filter" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_export(self): """ export to on disk to new file """ filename = "tmp.blm" filename2 = "tmp2.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.export(filename2) blm.close() md5_1 = calc_file_md5(filename) md5_2 = calc_file_md5(filename2) self.assertEqual(md5_1, md5_2) os.remove(filename) os.remove(filename2)
def test_bfod_jaccard(self): """ test the on disk jaccard index of two bloom filters """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(10, 0.05) blm2.add("this is another test") blm2.add("this is yet another test") res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close() os.remove(filename)
def test_bfod_jaccard_diff(self): """make sure checking for different bloom filters on disk works jaccard""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) blm.add("this is a test") blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.jaccard_index(blm2) self.assertEqual(blm3, None)
def test_bfod_close_del(self): """close an on disk bloom using the del syntax""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") del blm try: self.assertEqual(True, blm) except UnboundLocalError as ex: msg = "local variable 'blm' referenced before assignment" self.assertEqual(str(ex), msg) else: self.assertEqual(True, False)
def test_bfod_jaccard(self): """test the on disk jaccard index of two bloom filters""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 20, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(20, 0.05) blm2.add("this is another test") blm2.add("this is yet another test") res = blm.jaccard_index(blm2) self.assertGreater(res, 0.33) self.assertLess(res, 0.50) blm.close()
def test_bfod_jaccard_diff(self): ''' make sure checking for different bloom filters on disk works jaccard ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, est_elements=10, false_positive_rate=0.05) blm.add('this is a test') blm2 = BloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) blm3 = blm.jaccard_index(blm2) self.assertEqual(blm3, None) os.remove(filename)
def test_bfod_clear(self): ''' test clearing out the bloom filter on disk ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filepath=filename, est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): tmp = 'this is a test {0}'.format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 10) blm.clear() self.assertEqual(blm.elements_added, 0) for idx in range(blm.bloom_length): self.assertEqual(blm._get_element(idx), 0) os.remove(filename)
def test_bfod_export(self): """export to on disk to new file""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj1: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") blm.export(fobj1.name) blm.close() md5_1 = calc_file_md5(fobj.name) md5_2 = calc_file_md5(fobj1.name) self.assertEqual(md5_1, md5_2)
def test_bfod_clear(self): """ test clearing out the bloom filter on disk """ filename = "tmp.blm" blm = BloomFilterOnDisk(filepath=filename, est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 10) blm.clear() self.assertEqual(blm.elements_added, 0) for idx in range(blm.bloom_length): self.assertEqual(blm._get_element(idx), 0) os.remove(filename)
def test_bfod_union(self): """ test the union of two bloom filters on disk """ filename = "tmp.blm" blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(10, 0.05) blm2.add("this is yet another test") blm3 = blm.union(blm2) self.assertEqual(blm3.estimate_elements(), 3) self.assertEqual(blm3.elements_added, 3) self.assertEqual(blm3.check("this is a test"), True) self.assertEqual(blm3.check("this is another test"), True) self.assertEqual(blm3.check("this is yet another test"), True) self.assertEqual(blm3.check("this is not another test"), False) blm.close() os.remove(filename)
def test_bfod_clear(self): """test clearing out the bloom filter on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(filepath=fobj.name, est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): tmp = "this is a test {0}".format(i) blm.add(tmp) self.assertEqual(blm.elements_added, 10) blm.clear() self.assertEqual(blm.elements_added, 0) for idx in range(blm.bloom_length): self.assertEqual(blm._get_element(idx), 0)
def test_bfod_union(self): ''' test the union of two bloom filters on disk ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') blm2 = BloomFilter(10, 0.05) blm2.add('this is yet another test') blm3 = blm.union(blm2) self.assertEqual(blm3.estimate_elements(), 3) self.assertEqual(blm3.elements_added, 3) self.assertEqual(blm3.check('this is a test'), True) self.assertEqual(blm3.check('this is another test'), True) self.assertEqual(blm3.check('this is yet another test'), True) self.assertEqual(blm3.check('this is not another test'), False) blm.close() os.remove(filename)
def test_bfod_intersection(self): ''' test the intersection of two bloom filters on disk ''' filename = 'tmp.blm' blm = BloomFilterOnDisk(filename, 10, 0.05) blm.add('this is a test') blm.add('this is another test') blm2 = BloomFilter(10, 0.05) blm2.add('this is another test') blm2.add('this is yet another test') blm3 = blm.intersection(blm2) self.assertEqual(blm3.estimate_elements(), 1) self.assertEqual(blm3.elements_added, 1) self.assertEqual(blm3.check('this is a test'), False) self.assertEqual(blm3.check('this is another test'), True) self.assertEqual(blm3.check('this is yet another test'), False) self.assertEqual(blm3.check('this is not another test'), False) blm.close() os.remove(filename)
def test_bfod_union(self): """test the union of two bloom filters on disk""" with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, 20, 0.05) blm.add("this is a test") blm.add("this is another test") blm2 = BloomFilter(20, 0.05) blm2.add("this is yet another test") blm3 = blm.union(blm2) self.assertEqual(blm3.estimate_elements(), 3) self.assertEqual(blm3.elements_added, 3) self.assertEqual(blm3.check("this is a test"), True) self.assertEqual(blm3.check("this is another test"), True) self.assertEqual(blm3.check("this is yet another test"), True) self.assertEqual(blm3.check("this is not another test"), False) blm.close()