print("Optimal number of Hash Functions:{}\n".format(bloom_obj.hash)) # words to be added word_present = [ 'abound', 'abounds', 'abundance', 'abundant', 'accessable', 'bloom', 'blossom', 'bolster', 'bonny', 'bonus', 'bonuses' ] # word not added word_absent = [ 'bluff', 'cheater', 'hate', 'war', 'humanity', 'racism', 'hurt', 'nuke', 'gloomy', 'facebook' ] for item in word_present: bloom_obj.add(item) shuffle(word_present) shuffle(word_absent) test_words = word_present[:10] + word_absent shuffle(test_words) for word in test_words: if bloom_obj.check(word): if word in word_absent: print("'{}' is a false positive!".format(word)) else: print("'{}' is probably present!".format(word)) else: print("'{}' is definitely not present!".format(word))
'ironman', 'thor', 'american_captain', 'spiderman', 'loki', 'wolverine', 'black_widow', 'hulk', 'deadpool', 'nick_fury', 'thanos', 'Dr_strange', 'venon', 'odin', 'magneto', 'black_panter', 'rocket', 'gamora', 'ultron', 'groot', 'ant_man' ] # Palabras no existentes nombres_no_existentes = [ 'superman', 'batman', 'wonder_woman', 'green_Lantern', 'he_Man', 'batgirl', 'lion_O', 'shazam', 'aquaman', 'green_arrow', 'flash', 'tygro', 'Cheetara' ] for item in nombres_existentes: bloom_filtro.add(item) shuffle(nombres_existentes) shuffle(nombres_no_existentes) prueba = nombres_existentes[:10] + nombres_no_existentes shuffle(prueba) print("=======================================================") for word in prueba: if bloom_filtro.check(word): if word in nombres_no_existentes: print("'{}' es un falso positivo!".format(word)) else: print("'{}' probablemente existe!".format(word)) else: print("'{}' definitivamente no existe!".format(word)) print("=======================================================")
def test(): ''' basic testing functions ''' blm = BloomFilter() blm.init(10, 0.05) blm.add("this is a test") print(blm.check("this is a test")) print(blm.check("blah")) print(blm) print(blm.bloom_array) blm.export('./dist/py_bloom.blm') print('\n\ncheck imported BloomFilter!') blm2 = BloomFilter() blm2.load('./dist/py_bloom.blm') print(blm2.check("this is a test")) print(blm2.check("blah")) print(blm2) print(blm2.bloom_array) blm2.add('yet another test') print("\n\ncheck intersection") blm3 = blm.intersection(blm2) print(blm3) print(blm3.check("this is a test")) print(blm3.check("yet another test")) print("\n\ncheck union") blm3 = blm.union(blm2) print(blm3) print(blm3.check("this is a test")) print('\n\ntest using `in`') print("this is a test" in blm3) print(blm3.check("yet another test")) print(blm3.estimate_elements()) print(blm.jaccard_index(blm2)) print ('\n\nexport to hex') hex_out = blm.export_hex() print(hex_out) print('import hex') blm4 = BloomFilter() blm4.load_hex(hex_out) print(blm4) # on disk code check print('\n\nbloom filter on disk') blmd = BloomFilterOnDisk() blmd.initialize('./dist/py_ondisk.blm', 10, 0.05) blmd.add("this is a test") print(blmd.check('this is a test')) print('Check use of in keyword ("this is a test" in blmd): ', 'this is a test' in blmd) print(blmd.check('yet another test')) # blmd.union(blm4) # blmd.intersection(blm) # print(blmd.jaccard_index(blm2)) print(blmd) # print ('\n\nexport to hex') # hex_out = blmd.export_hex() # print(hex_out) blmd.close()