示例#1
0
def test_validate_minabund():
    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate()
    assert readset.valid == (4, 32)

    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate(casemin=9)
    assert readset.valid == (0, 0)
示例#2
0
def test_ctrl3_refr_contam(bogusrefrcontam):
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7, mask=bogusrefrcontam)
    for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    readset.validate()
    assert readset.valid == (13, 171)
示例#3
0
def test_ctrl3():
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7)
    for record in parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    readset.validate(casemin=6)
    assert readset.valid == (424, 5782)
示例#4
0
def test_load_readset():
    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 1e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)

    assert len(readset) == 8
    assert readset
    kmers = [
        'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT',
        'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG',
    ]
    for kmer in kmers:
        assert readset._counts.get(kmer) == 8
示例#5
0
def test_validate_with_mask():
    kmer = 'AGGGGCGTGACTTAATAAG'
    mask = khmer.Nodetable(19, 1e3, 2)
    mask.add(kmer)

    filelist = kevlar.tests.data_glob('collect.beta.?.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate(mask=mask)
    assert readset.valid == (3, 24)
    for record in readset:
        for ikmer in record.ikmers:
            assert ikmer.sequence != kmer
            assert kevlar.revcom(ikmer.sequence) != kmer
示例#6
0
def test_validate():
    filelist = kevlar.tests.data_glob('collect.alpha.txt')
    readset = ReadSet(19, 5e3)
    for record in kevlar.seqio.afxstream(filelist):
        readset.add(record)
    readset.validate()

    assert readset.valid == (4, 32)
    assert len(readset) == 9
    assert readset.discarded == 1

    badkmers = ['CAGGCCAGGGATCGCCGTG']
    goodkmers = [
        'AGGGGCGTGACTTAATAAG', 'GGGCGTGACTTAATAAGGT',
        'TAGGGGCGTGACTTAATAA', 'GGGGCGTGACTTAATAAGG',
    ]
    for record in readset:
        for kmer in record.ikmers:
            assert kmer.sequence not in badkmers and \
                kevlar.revcom(kmer.sequence) not in badkmers
            assert kmer.sequence in goodkmers or \
                kevlar.revcom(kmer.sequence) in goodkmers
示例#7
0
def ctrl3():
    augfastq = kevlar.tests.data_file('trio1/novel_3_1,2.txt')
    readset = ReadSet(13, 1e7)
    for record in kevlar.parse_augmented_fastx(kevlar.open(augfastq, 'r')):
        readset.add(record)
    return readset