def test_validate_minabund(): filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate() assert readset.valid == (4, 32) readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate(casemin=9) assert readset.valid == (0, 0)
def test_ikmer_abund_after_recalc(): """ Ensure interesting k-mer abundances are correct after recalculation. The interesting k-mer has an advertised abundance of 28, but a true abundance (in `counts`) of 10. The readset "validate" function should check and correct this. """ read = screed.Record( name='read1', sequence='AAGCAGGGGTCTACATTGTCCTCGGGACTCGAGATTTCTTCGCTGT', ikmers=[KmerOfInterest('CATTGTCCTCGGGACTC', 13, [28, 0, 0])], ) counts = khmer.Counttable(17, 1e5, 4) seq = 'TTCGTTCCCGAAGCAGGGGTCTACATTGTCCTCGGGACTCGAGATTTCTTCGCTGTTCCGTCCTTCA' for _ in range(10): counts.consume(seq) rs = ReadSet() rs.add(read) assert read.ikmers[0].abund[0] == 28 rs.validate(counts, minabund=8) assert rs.valid == (1, 1) assert read.ikmers[0].abund[0] == 10
def test_ctrl3_refr_contam(bogusrefrcontam): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7, mask=bogusrefrcontam) for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) readset.validate() assert readset.valid == (13, 171)
def test_ctrl3(): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7) for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) readset.validate(casemin=6) assert readset.valid == (424, 5782)
def test_filter_abundfilt(): readset = ReadSet(31, 1000) augfastq = kevlar.tests.data_file('worm.augfasta') for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) readset.validate(casemin=5, ctrlmax=0) assert readset.valid == (1, 5) assert readset.discarded == 2
def test_load_readset(): filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 1e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) assert len(readset) == 8 assert readset kmers = [ 'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT', 'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG', ] for kmer in kmers: assert readset._counts.get(kmer) == 8
def test_validate_with_mask(): kmer = 'AGGGGCGTGACTTAATAAG' mask = khmer.Nodetable(19, 1e3, 2) mask.add(kmer) filelist = kevlar.tests.data_glob('collect.beta.?.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate(mask=mask) assert readset.valid == (3, 24) for record in readset: for ikmer in record.ikmers: assert ikmer.sequence != kmer assert kevlar.revcom(ikmer.sequence) != kmer
def test_validate(): filelist = kevlar.tests.data_glob('collect.alpha.txt') readset = ReadSet(19, 5e3) for record in kevlar.seqio.afxstream(filelist): readset.add(record) readset.validate() assert readset.valid == (4, 32) assert len(readset) == 9 assert readset.discarded == 1 badkmers = ['CAGGCCAGGGATCGCCGTG'] goodkmers = [ 'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT', 'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG', ] for record in readset: for kmer in record.ikmers: assert kmer.sequence not in badkmers and \ kevlar.revcom(kmer.sequence) not in badkmers assert kmer.sequence in goodkmers or \ kevlar.revcom(kmer.sequence) in goodkmers
def ctrl3(): augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt') readset = ReadSet(13, 1e7) for record in kevlar.parse_augmented_fastx(kevlar.open(augfastq, 'r')): readset.add(record) return readset