示例#1
0
def test_validate_minabund():
    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate()
    assert readset.valid == (4, 32)

    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate(casemin=9)
    assert readset.valid == (0, 0)
示例#2
0
def test_ikmer_abund_after_recalc():
    """
    Ensure interesting k-mer abundances are correct after recalculation.

    The interesting k-mer has an advertised abundance of 28, but a true
    abundance (in `counts`) of 10. The readset "validate" function should check
    and correct this.
    """
    read = screed.Record(
        name='read1',
        sequence='AAGCAGGGGTCTACATTGTCCTCGGGACTCGAGATTTCTTCGCTGT',
        ikmers=[KmerOfInterest('CATTGTCCTCGGGACTC', 13, [28, 0, 0])],
    )

    counts = khmer.Counttable(17, 1e5, 4)
    seq = 'TTCGTTCCCGAAGCAGGGGTCTACATTGTCCTCGGGACTCGAGATTTCTTCGCTGTTCCGTCCTTCA'
    for _ in range(10):
        counts.consume(seq)

    rs = ReadSet()
    rs.add(read)
    assert read.ikmers[0].abund[0] == 28

    rs.validate(counts, minabund=8)
    assert rs.valid == (1, 1)
    assert read.ikmers[0].abund[0] == 10
示例#3
0
def test_ctrl3_refr_contam(bogusrefrcontam):
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7, mask=bogusrefrcontam)
    for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    readset.validate()
    assert readset.valid == (13, 171)
示例#4
0
def test_ctrl3():
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7)
    for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    readset.validate(casemin=6)
    assert readset.valid == (424, 5782)
示例#5
0
def test_filter_abundfilt():
    readset = ReadSet(31, 1000)
    augfastq = kevlar.tests.data_file('worm.augfasta')
    for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    readset.validate(casemin=5, ctrlmax=0)
    assert readset.valid == (1, 5)
    assert readset.discarded == 2
示例#6
0
def test_load_readset():
    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 1e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)

    assert len(readset) == 8
    assert readset
    kmers = [
        'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT',
        'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG',
    ]
    for kmer in kmers:
        assert readset._counts.get(kmer) == 8
示例#7
0
def test_validate_with_mask():
    kmer = 'AGGGGCGTGACTTAATAAG'
    mask = khmer.Nodetable(19, 1e3, 2)
    mask.add(kmer)

    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate(mask=mask)
    assert readset.valid == (3, 24)
    for record in readset:
        for ikmer in record.ikmers:
            assert ikmer.sequence != kmer
            assert kevlar.revcom(ikmer.sequence) != kmer
示例#8
0
def test_validate():
    filelist = kevlar.tests.data_glob('collect.alpha.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate()

    assert readset.valid == (4, 32)
    assert len(readset) == 9
    assert readset.discarded == 1

    badkmers = ['CAGGCCAGGGATCGCCGTG']
    goodkmers = [
        'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT',
        'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG',
    ]
    for record in readset:
        for kmer in record.ikmers:
            assert kmer.sequence not in badkmers and \
                kevlar.revcom(kmer.sequence) not in badkmers
            assert kmer.sequence in goodkmers or \
                kevlar.revcom(kmer.sequence) in goodkmers
示例#9
0
def ctrl3():
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7)
    for record in kevlar.parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    return readset