Пример #1
0
def test_count_threading():
    with NamedTemporaryFile(suffix='.counttable') as ctrl1out, \
            NamedTemporaryFile(suffix='.counttable') as ctrl2out, \
            NamedTemporaryFile(suffix='.counttable') as caseout:
        case = data_file('trio1/case1.fq')
        ctrls = data_glob('trio1/ctrl[1,2].fq')
        arglist = [
            'count',
            '--ksize',
            '19',
            '--memory',
            '500K',
            '--threads',
            '2',
            '--case',
            caseout.name,
            case,
            '--control',
            ctrl1out.name,
            ctrls[0],
            '--control',
            ctrl2out.name,
            ctrls[1],
        ]
        args = kevlar.cli.parser().parse_args(arglist)
        kevlar.count.main(args)
Пример #2
0
def test_count_simple(numbands, band, kmers_stored, capsys):
    with NamedTemporaryFile(suffix='.counttable') as ctrl1out, \
            NamedTemporaryFile(suffix='.counttable') as ctrl2out, \
            NamedTemporaryFile(suffix='.counttable') as caseout:
        case = data_file('simple-genome-case-reads.fa.gz')
        ctrls = data_glob('simple-genome-ctrl[1,2]-reads.fa.gz')
        arglist = [
            'count',
            '--case',
            caseout.name,
            case,
            '--control',
            ctrl1out.name,
            ctrls[0],
            '--control',
            ctrl2out.name,
            ctrls[1],
            '--ksize',
            '25',
            '--memory',
            '5K',
            '--ctrl-max',
            '0',
            '--num-bands',
            str(numbands),
            '--band',
            str(band),
        ]
        args = kevlar.cli.parser().parse_args(arglist)
        kevlar.count.main(args)
Пример #3
0
def test_unband_beta():
    infiles = data_glob('collect.beta.?.txt')
    instream = kevlar.seqio.afxstream(infiles)
    merger = kevlar.unband.unband(instream, numbatches=2)
    reads = list(merger)
    reads.sort(key=lambda r: r.name)
    assert len(reads) == 8
    assert len(reads[0].annotations) == 4
Пример #4
0
def test_unband_cli(capsys):
    infiles = data_glob('helium-unband/novel.band?.augfastq.gz')
    arglist = ['unband'] + infiles
    args = kevlar.cli.parser().parse_args(arglist)
    kevlar.unband.main(args)
    out, err = capsys.readouterr()
    outlines = out.strip().split('\n')
    qualdeflines = [ln for ln in outlines if ln == '+']
    assert len(qualdeflines) == 135
Пример #5
0
def test_unband_helium():
    infiles = data_glob('helium-unband/novel.band?.augfastq.gz')
    instream = kevlar.seqio.afxstream(infiles)
    merger = kevlar.unband.unband(instream, numbatches=16)
    reads = list(merger)
    assert len(reads) == 135

    readname = 'seq1_haplo1_285110_285519_1:0:0_0:0:0_2dbcd/1'
    someread = [r for r in reads if r.name == readname][0]
    assert len(someread.annotations) == 75
Пример #6
0
def test_load_threading(mask, numbands, band):
    # Smoke test: make sure things don't explode when run in "threaded" mode.
    infiles = data_glob('trio1/case1.fq')
    sketch = kevlar.count.load_sample_seqfile(infiles,
                                              19,
                                              1e7,
                                              mask=mask,
                                              numbands=numbands,
                                              band=band,
                                              numthreads=2)
Пример #7
0
def test_novel_load_counts(capsys):
    file1 = data_file('simple-genome-case-reads.fa.gz')
    file2 = data_file('ambig.fasta')
    file3 = data_file('simple-genome-case.ct')
    file4, file5 = data_glob('simple-genome-ctrl?.ct')
    arglist = [
        'novel', '-k', '25', '--case', file1, file2, '--case-counts', file3,
        '--control-counts', file4, file5
    ]
    args = kevlar.cli.parser().parse_args(arglist)
    kevlar.novel.main(args)

    out, err = capsys.readouterr()
    assert 'counttables for 2 sample(s) provided' in err
Пример #8
0
                                          filterfpr=0.005,
                                          logstream=sys.stderr)
        variants = [v for v in workflow]
    variants = sorted(variants, key=lambda v: v._pos)
    startpos = [v._pos + 1 for v in variants]
    teststartpos = [
        4073, 185752, 226611, 636699, 834646, 901124, 1175768, 1527139,
        1631013, 2265795
    ]
    assert len(variants) == 10
    assert startpos == teststartpos


def test_simplex_trio1(capsys):
    case = data_file('trio1/case1.fq')
    controls = data_glob('trio1/ctrl[1,2].fq')
    refr = data_file('bogus-genome/refr.fa')
    arglist = [
        'simplex', '--case', case, '--control', controls[0], '--control',
        controls[1], '--case-min', '6', '--ctrl-max', '0', '--novel-memory',
        '1M', '--novel-fpr', '0.2', '--filter-memory', '50K', '--mask-files',
        refr, '--mask-memory', '1M', '--filter-fpr', '0.005', '--ksize', '21',
        refr
    ]
    args = kevlar.cli.parser().parse_args(arglist)
    kevlar.simplex.main(args)

    out, err = capsys.readouterr()
    testvcf = '\t'.join([
        'bogus-genome-chr1', '3567', '.', 'A', 'C', '.', 'PASS', 'RW=GAAGGGCAC'
        'ACCTAACCGCAACATTTGCCGTGGAAGCATAA;VW=GAAGGGCACACCTAACCGCACCATTTGCCGTGG'
Пример #9
0
def test_load_sketches_fpr_fail():
    infiles = data_glob('test.counttable')
    with pytest.raises(kevlar.sketch.KevlarUnsuitableFPRError) as e:
        sketches = kevlar.sketch.load_sketchfiles(infiles, maxfpr=0.001)
    assert 'FPR too high, bailing out!!!' in str(e)
Пример #10
0
def test_load_sketches():
    infiles = data_glob('test.counttable')
    sketches = kevlar.sketch.load_sketchfiles(infiles, maxfpr=0.5)
    for sketch in sketches:
        assert sketch.get('CCTGATATCCGGAATCTTAGC') > 0
        assert sketch.get('GATTACA' * 3) == 0
Пример #11
0
def test_load_sketches_fpr_fail():
    infiles = data_glob('test.counttable')
    errormsg = r'FPR too high, bailing out!!!'
    with pytest.raises(kevlar.sketch.KevlarUnsuitableFPRError, match=errormsg):
        sketches = kevlar.sketch.load_sketchfiles(infiles, maxfpr=0.001)