Пример #1
0
def test_set_abundance_num_hypothesis(hashes, abundances, sketch_size):
    a = MinHash(sketch_size, 10, track_abundance=True)
    oracle = dict(zip(hashes, abundances))

    a.set_abundances(oracle)

    mins = a.get_mins(with_abundance=True)
    size = min(sum(1 for v in oracle.values() if v > 0), sketch_size)
    assert len(mins) == size

    for k, v in mins.items():
        assert oracle[k] == v
Пример #2
0
def test_set_abundance_scaled_hypothesis(hashes, abundances, scaled):
    a = MinHash(0, 10, track_abundance=True, scaled=scaled)
    oracle = dict(zip(hashes, abundances))

    a.set_abundances(oracle)

    max_hash = get_max_hash_for_scaled(scaled)
    below_max_hash = sum(1 for (k, v) in oracle.items()
                         if k <= max_hash and v > 0)

    mins = a.get_mins(with_abundance=True)
    assert len(mins) == below_max_hash

    for k, v in mins.items():
        assert oracle[k] == v
        assert k <= max_hash
        assert v > 0