def test_dna_mh(track_abundance): e1 = MinHash(n=5, ksize=4, track_abundance=track_abundance) e2 = MinHash(n=5, ksize=4, track_abundance=track_abundance) seq = 'ATGGCAGTGACGATGCCAG' e1.add_sequence(seq) for i in range(len(seq) - 3): e2.add(seq[i:i + 4]) assert e1.get_mins() == e2.get_mins() print(e1.get_mins()) assert 726311917625663847 in e1.get_mins() assert 3697418565283905118 in e1.get_mins()
def test_protein_mh(track_abundance): e1 = MinHash(n=5, ksize=6, is_protein=True, track_abundance=track_abundance) e2 = MinHash(n=5, ksize=6, is_protein=True, track_abundance=track_abundance) seq = 'ATGGCAGTGACGATGCCG' e1.add_sequence(seq) for i in range(len(seq) - 5): kmer = seq[i:i + 6] e2.add(kmer) assert e1.get_mins() == e2.get_mins() assert 901193879228338100 in e1.get_mins()