def test_abund_similarity_zero(): E1 = MinHash(n=5, ksize=20, track_abundance=True) E2 = MinHash(n=5, ksize=20, track_abundance=True) for i in [1]: E1.add_hash(i) assert E1.similarity(E2) == 0.0
def test_diff_seed(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=1) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance, seed=2) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4, 6]: E2.add_hash(i) with pytest.raises(ValueError): E1.count_common(E2)
def test_common_1(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4, 6]: E2.add_hash(i) assert E1.count_common(E2) == 4 assert E2.count_common(E1) == 4
def test_jaccard_2_difflen(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4]: E2.add_hash(i) print(E1.jaccard(E2)) assert round(E1.jaccard(E2), 2) == 4 / 5.0 assert round(E2.jaccard(E1), 2) == 4 / 5.0
def test_abund_similarity(): E1 = MinHash(n=5, ksize=20, track_abundance=True) E2 = MinHash(n=5, ksize=20, track_abundance=True) for i in [1]: E1.add_hash(i) for i in [1, 2]: E2.add_hash(i) assert round(E1.similarity(E1)) == 1.0 assert round(E1.similarity(E2), 2) == 0.5 assert round(E1.similarity(E1, ignore_abundance=True)) == 1.0 assert round(E1.similarity(E2, ignore_abundance=True), 2) == 0.5
def test_jaccard_1(track_abundance): E1 = MinHash(n=5, ksize=20, track_abundance=track_abundance) E2 = MinHash(n=5, ksize=20, track_abundance=track_abundance) for i in [1, 2, 3, 4, 5]: E1.add_hash(i) for i in [1, 2, 3, 4, 6]: E2.add_hash(i) # here the union is [1, 2, 3, 4, 5] # and the intesection is [1, 2, 3, 4] => 4/5. assert round(E1.jaccard(E2), 2) == round(4 / 5.0, 2) assert round(E2.jaccard(E1), 2) == round(4 / 5.0, 2)