示例#1
0
def test_lookup3():
    bloomfilter_size = 250
    number_hash_functions = 1
    kmers1 = ["ATC", "ATG", "ATA", "ATT"]
    kmers2 = ["ATC", "ATG", "ATA", "TTT"]
    bloomfilter1 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers1))
    bloomfilter2 = BloomFilter(bloomfilter_size, number_hash_functions).update(
        convert_query_kmers(kmers2))
    bloomfilters = [bloomfilter1, bloomfilter2]
    for storage in get_storages():
        storage.delete_all()
        ksi = KmerSignatureIndex.create(storage, bloomfilters,
                                        bloomfilter_size,
                                        number_hash_functions)

        assert ksi.lookup(["ATC"]) == {"ATC": bitarray("11")}
        assert ksi.lookup(["ATC", "ATC", "ATT"]) == {
            "ATC": bitarray("11"),
            "ATT": bitarray("10"),
        }
        assert ksi.lookup(["ATC", "ATC", "ATT", "TTT"]) == {
            "ATC": bitarray("11"),
            "ATT": bitarray("10"),
            "TTT": bitarray("01"),
        }
示例#2
0
def test_bloomfilter_updated_success(len_bloom_filter, num_hash_functions, len_kmer, num_kmers):
    kmers = _generate_random_kmers(len_kmer, num_kmers)
    hashes = _generate_kmer_hashes(kmers, len_bloom_filter, num_hash_functions)

    expected = bitarray("0" * len_bloom_filter)
    for h in hashes:
        expected[h] = True

    bloom_filter = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    bloom_filter.update(kmers)

    assert bloom_filter.bitarray == expected
示例#3
0
def test_bloomfilters_updated_with_same_kmers(len_bloom_filter, num_hash_functions, len_kmer, num_kmers):
    kmers = _generate_random_kmers(len_kmer, num_kmers)

    bloom_filter1 = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    bloom_filter1.update(kmers)
    bloom_filter2 = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    bloom_filter2.update(kmers)

    assert bloom_filter1.bitarray == bloom_filter2.bitarray
示例#4
0
def test_create_bloom():
    for i in range(3):
        kmers1 = ["ATT", "ATC"]
        bloomfilter1 = BloomFilter(m=25, h=3)
        bloomfilter1.update(kmers1)

        kmers2 = ["ATT", "ATT"]
        bloomfilter2 = BloomFilter(m=25, h=3)
        bloomfilter2.update(kmers2)

        assert bloomfilter1.bitarray != bloomfilter2.bitarray
示例#5
0
def test_bloomfilters_updated_with_different_kmers(len_bloom_filter, num_hash_functions, len_kmer, num_kmers):
    kmers1 = _generate_random_kmers(len_kmer, num_kmers)
    hashes1 = _generate_kmer_hashes(kmers1, len_bloom_filter, num_hash_functions)
    kmers2 = _generate_random_kmers(len_kmer, num_kmers)
    hashes2 = _generate_kmer_hashes(kmers2, len_bloom_filter, num_hash_functions)

    assume(hashes1 != hashes2)

    bloom_filter1 = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    bloom_filter1.update(kmers1)
    bloom_filter2 = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    bloom_filter2.update(kmers2)

    assert bloom_filter1.bitarray != bloom_filter2.bitarray
示例#6
0
 def bloom(cls, config, kmers):
     kmers = convert_query_kmers(kmers)  ## Convert to canonical kmers
     bloomfilter = BloomFilter(m=config["m"], h=config["h"])
     bloomfilter.update(kmers)
     return bloomfilter.bitarray
示例#7
0
def test_bloomfilter_created_with_initialisation(len_bloom_filter, num_hash_functions):
    bloom_filter = BloomFilter(m=len_bloom_filter, h=num_hash_functions)
    assert bloom_filter.bitarray == bitarray("0" * len_bloom_filter)