Exemplo n.º 1
0
def test_similarity_downsample(track_abundance):
    e = sourmash_lib.MinHash(n=0,
                             ksize=20,
                             track_abundance=track_abundance,
                             max_hash=2**63)
    f = sourmash_lib.MinHash(n=0,
                             ksize=20,
                             track_abundance=track_abundance,
                             max_hash=2**2)

    e.add_hash(1)
    e.add_hash(5)
    assert len(e.get_mins()) == 2

    f.add_hash(1)
    f.add_hash(5)  # should be discarded due to max_hash
    assert len(f.get_mins()) == 1

    ee = SourmashSignature(e)
    ff = SourmashSignature(f)

    with pytest.raises(ValueError):  # mismatch in max_hash
        ee.similarity(ff)

    x = ee.similarity(ff, downsample=True)
    assert round(x, 1) == 1.0
Exemplo n.º 2
0
def test_roundtrip(track_abundance):
    e = sourmash_lib.Estimators(n=1, ksize=20, track_abundance=track_abundance)
    e.add("AT" * 10)
    sig = SourmashSignature('*****@*****.**', e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.estimator

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0
Exemplo n.º 3
0
def test_roundtrip(track_abundance):
    e = sourmash_lib.MinHash(n=1, ksize=20, track_abundance=track_abundance)
    e.add("AT" * 10)
    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0
Exemplo n.º 4
0
def test_roundtrip_empty(track_abundance):
    # edge case, but: empty estimator? :)
    e = sourmash_lib.Estimators(n=1, ksize=20, track_abundance=track_abundance)

    sig = SourmashSignature('*****@*****.**', e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.estimator

    assert sig.similarity(sig2) == 0
    assert sig2.similarity(sig) == 0
Exemplo n.º 5
0
def test_roundtrip_empty(track_abundance):
    # edge case, but: empty minhash? :)
    e = sourmash_lib.MinHash(n=1, ksize=20, track_abundance=track_abundance)

    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert sig.similarity(sig2) == 0
    assert sig2.similarity(sig) == 0
Exemplo n.º 6
0
def test_roundtrip_seed(track_abundance):
    e = sourmash_lib.Estimators(n=1,
                                ksize=20,
                                track_abundance=track_abundance,
                                seed=10)
    e.mh.add_hash(5)
    sig = SourmashSignature('*****@*****.**', e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.estimator

    assert e.seed == e2.seed

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0
Exemplo n.º 7
0
def test_roundtrip_max_hash(track_abundance):
    e = sourmash_lib.MinHash(n=0,
                             ksize=20,
                             track_abundance=track_abundance,
                             max_hash=10)
    e.add_hash(5)
    sig = SourmashSignature(e)
    s = save_signatures([sig])
    siglist = list(load_signatures(s))
    sig2 = siglist[0]
    e2 = sig2.minhash

    assert e.max_hash == e2.max_hash

    assert sig.similarity(sig2) == 1.0
    assert sig2.similarity(sig) == 1.0