def test_lookup3(): bloomfilter_size = 250 number_hash_functions = 1 kmers1 = ["ATC", "ATG", "ATA", "ATT"] kmers2 = ["ATC", "ATG", "ATA", "TTT"] bloomfilter1 = BloomFilter(bloomfilter_size, number_hash_functions).update( convert_query_kmers(kmers1)) bloomfilter2 = BloomFilter(bloomfilter_size, number_hash_functions).update( convert_query_kmers(kmers2)) bloomfilters = [bloomfilter1, bloomfilter2] for storage in get_storages(): storage.delete_all() ksi = KmerSignatureIndex.create(storage, bloomfilters, bloomfilter_size, number_hash_functions) assert ksi.lookup(["ATC"]) == {"ATC": bitarray("11")} assert ksi.lookup(["ATC", "ATC", "ATT"]) == { "ATC": bitarray("11"), "ATT": bitarray("10"), } assert ksi.lookup(["ATC", "ATC", "ATT", "TTT"]) == { "ATC": bitarray("11"), "ATT": bitarray("10"), "TTT": bitarray("01"), }
def test_bloomfilter_updated_success(len_bloom_filter, num_hash_functions, len_kmer, num_kmers): kmers = _generate_random_kmers(len_kmer, num_kmers) hashes = _generate_kmer_hashes(kmers, len_bloom_filter, num_hash_functions) expected = bitarray("0" * len_bloom_filter) for h in hashes: expected[h] = True bloom_filter = BloomFilter(m=len_bloom_filter, h=num_hash_functions) bloom_filter.update(kmers) assert bloom_filter.bitarray == expected
def test_bloomfilters_updated_with_same_kmers(len_bloom_filter, num_hash_functions, len_kmer, num_kmers): kmers = _generate_random_kmers(len_kmer, num_kmers) bloom_filter1 = BloomFilter(m=len_bloom_filter, h=num_hash_functions) bloom_filter1.update(kmers) bloom_filter2 = BloomFilter(m=len_bloom_filter, h=num_hash_functions) bloom_filter2.update(kmers) assert bloom_filter1.bitarray == bloom_filter2.bitarray
def test_create_bloom(): for i in range(3): kmers1 = ["ATT", "ATC"] bloomfilter1 = BloomFilter(m=25, h=3) bloomfilter1.update(kmers1) kmers2 = ["ATT", "ATT"] bloomfilter2 = BloomFilter(m=25, h=3) bloomfilter2.update(kmers2) assert bloomfilter1.bitarray != bloomfilter2.bitarray
def test_bloomfilters_updated_with_different_kmers(len_bloom_filter, num_hash_functions, len_kmer, num_kmers): kmers1 = _generate_random_kmers(len_kmer, num_kmers) hashes1 = _generate_kmer_hashes(kmers1, len_bloom_filter, num_hash_functions) kmers2 = _generate_random_kmers(len_kmer, num_kmers) hashes2 = _generate_kmer_hashes(kmers2, len_bloom_filter, num_hash_functions) assume(hashes1 != hashes2) bloom_filter1 = BloomFilter(m=len_bloom_filter, h=num_hash_functions) bloom_filter1.update(kmers1) bloom_filter2 = BloomFilter(m=len_bloom_filter, h=num_hash_functions) bloom_filter2.update(kmers2) assert bloom_filter1.bitarray != bloom_filter2.bitarray
def bloom(cls, config, kmers): kmers = convert_query_kmers(kmers) ## Convert to canonical kmers bloomfilter = BloomFilter(m=config["m"], h=config["h"]) bloomfilter.update(kmers) return bloomfilter.bitarray
def test_bloomfilter_created_with_initialisation(len_bloom_filter, num_hash_functions): bloom_filter = BloomFilter(m=len_bloom_filter, h=num_hash_functions) assert bloom_filter.bitarray == bitarray("0" * len_bloom_filter)