def test_similarity_downsample(track_abundance): e = sourmash_lib.MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=2**63) f = sourmash_lib.MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=2**2) e.add_hash(1) e.add_hash(5) assert len(e.get_mins()) == 2 f.add_hash(1) f.add_hash(5) # should be discarded due to max_hash assert len(f.get_mins()) == 1 ee = SourmashSignature(e) ff = SourmashSignature(f) with pytest.raises(ValueError): # mismatch in max_hash ee.similarity(ff) x = ee.similarity(ff, downsample=True) assert round(x, 1) == 1.0
def test_roundtrip(track_abundance): e = sourmash_lib.Estimators(n=1, ksize=20, track_abundance=track_abundance) e.add("AT" * 10) sig = SourmashSignature('*****@*****.**', e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.estimator assert sig.similarity(sig2) == 1.0 assert sig2.similarity(sig) == 1.0
def test_roundtrip(track_abundance): e = sourmash_lib.MinHash(n=1, ksize=20, track_abundance=track_abundance) e.add("AT" * 10) sig = SourmashSignature(e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.minhash assert sig.similarity(sig2) == 1.0 assert sig2.similarity(sig) == 1.0
def test_roundtrip_empty(track_abundance): # edge case, but: empty estimator? :) e = sourmash_lib.Estimators(n=1, ksize=20, track_abundance=track_abundance) sig = SourmashSignature('*****@*****.**', e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.estimator assert sig.similarity(sig2) == 0 assert sig2.similarity(sig) == 0
def test_roundtrip_empty(track_abundance): # edge case, but: empty minhash? :) e = sourmash_lib.MinHash(n=1, ksize=20, track_abundance=track_abundance) sig = SourmashSignature(e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.minhash assert sig.similarity(sig2) == 0 assert sig2.similarity(sig) == 0
def test_roundtrip_seed(track_abundance): e = sourmash_lib.Estimators(n=1, ksize=20, track_abundance=track_abundance, seed=10) e.mh.add_hash(5) sig = SourmashSignature('*****@*****.**', e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.estimator assert e.seed == e2.seed assert sig.similarity(sig2) == 1.0 assert sig2.similarity(sig) == 1.0
def test_roundtrip_max_hash(track_abundance): e = sourmash_lib.MinHash(n=0, ksize=20, track_abundance=track_abundance, max_hash=10) e.add_hash(5) sig = SourmashSignature(e) s = save_signatures([sig]) siglist = list(load_signatures(s)) sig2 = siglist[0] e2 = sig2.minhash assert e.max_hash == e2.max_hash assert sig.similarity(sig2) == 1.0 assert sig2.similarity(sig) == 1.0