def test_bytes_protein(track_abundance): # verify that we can hash protein/aa sequences mh = MinHash(10, 6, True, track_abundance=track_abundance) mh.add_protein('AGYYG') mh.add_protein(u'AGYYG') mh.add_protein(b'AGYYG') assert len(mh.get_mins()) == 4
def test_protein_hp(track_abundance, hp): # verify that we can hash protein/aa sequences mh = MinHash(10, 6, True, dayhoff=False, hp=hp, track_abundance=track_abundance) mh.add_protein('AGYYG') if hp: assert len(mh.get_mins()) == 1 else: assert len(mh.get_mins()) == 4
def test_protein_dayhoff(track_abundance, dayhoff): # verify that we can hash protein/aa sequences mh = MinHash(10, 6, True, dayhoff=dayhoff, hp=False, track_abundance=track_abundance) mh.add_protein('AGYYG') assert len(mh.get_mins()) == 4
def test_bytes_protein_dayhoff(track_abundance, dayhoff): # verify that we can hash protein/aa sequences mh = MinHash(10, 6, True, dayhoff=dayhoff, hp=False, track_abundance=track_abundance) expected_moltype = 'protein' if dayhoff: expected_moltype = 'dayhoff' assert mh.moltype == expected_moltype mh.add_protein('AGYYG') mh.add_protein('AGYYG') mh.add_protein(b'AGYYG') assert len(mh.get_mins()) == 4
def test_bytes_protein_hp(track_abundance, hp): # verify that we can hash protein/aa sequences mh = MinHash(10, 6, True, dayhoff=False, hp=hp, track_abundance=track_abundance) expected_moltype = 'protein' if hp: expected_moltype = 'hp' assert mh.moltype == expected_moltype mh.add_protein('AGYYG') mh.add_protein(u'AGYYG') mh.add_protein(b'AGYYG') if hp: assert len(mh.get_mins()) == 1 else: assert len(mh.get_mins()) == 4
def test_basic_dna_bad_2(track_abundance): # test behavior on bad DNA mh = MinHash(1, 6, track_abundance=track_abundance) with pytest.raises(ValueError): mh.add_protein('YYYY')
def test_protein_short(track_abundance): # verify that we can hash protein/aa sequences mh = MinHash(10, 9, True, track_abundance=track_abundance) mh.add_protein('AG') assert len(mh.get_mins()) == 0, mh.get_mins()