def test_validate_minabund(): filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate() assert readset.valid == (4, 32) readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate(casemin=9) assert readset.valid == (0, 0)
def test_ctrl3_refr_contam(bogusrefrcontam): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7, mask=bogusrefrcontam) for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) readset.validate() assert readset.valid == (13, 171)
def test_ctrl3(): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7) for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) readset.validate(casemin=6) assert readset.valid == (424, 5782)
def test_load_readset(): filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 1e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) assert len(readset) == 8 assert readset kmers = [ 'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT', 'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG', ] for kmer in kmers: assert readset._counts.get(kmer) == 8
def test_validate_with_mask(): kmer = 'AGGGGCGTGACTTAATAAG' mask = khmer.Nodetable(19, 1e3, 2) mask.add(kmer) filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate(mask=mask) assert readset.valid == (3, 24) for record in readset: for ikmer in record.ikmers: assert ikmer.sequence != kmer assert kevlar.revcom(ikmer.sequence) != kmer
def test_validate(): filelist = kevlar.tests.data_glob('collect.alpha.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate() assert readset.valid == (4, 32) assert len(readset) == 9 assert readset.discarded == 1 badkmers = ['CAGGCCAGGGATCGCCGTG'] goodkmers = [ 'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT', 'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG', ] for record in readset: for kmer in record.ikmers: assert kmer.sequence not in badkmers and \ kevlar.revcom(kmer.sequence) not in badkmers assert kmer.sequence in goodkmers or \ kevlar.revcom(kmer.sequence) in goodkmers
def ctrl3(): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7) for record in kevlar.parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) return readset