Пример #1
0
def test_lastal_task_multithreaded(tmpdir, datadir):
    with tmpdir.as_cwd():
        for n_threads in (3,4,5):
            prot = datadir('test-protein.fa')
            tr = datadir('pom.50.fa')
            out_single = tmpdir.join('out-single').strpath
            out_multi = tmpdir.join('out-multi').strpath

            db_task = lastdb_task(prot, prot)
            aln_task_single = lastal_task(tr, prot, out_single, 
                                           translate=True, 
                                           cutoff=None)

            aln_task_multi = lastal_task(tr, prot, out_multi,
                                         translate=True, 
                                         cutoff=None,
                                         n_threads=n_threads)
            run_tasks([db_task, aln_task_multi, aln_task_single], 
                      ['run'])

            alns_single = MafParser(out_single).read()
            alns_multi = MafParser(out_multi).read()

            assert all(alns_single['E'].sort_values() == \
                       alns_multi['E'].sort_values())
Пример #2
0
def test_crbl(tmpdir, datadir):
    query = datadir('sacPom.cdna.fa')
    database = datadir('sacPom.pep.fa')
    out_fn = str(tmpdir.join('test.csv'))

    args = ['crbl', '-q', query, '-d', database, '-o', out_fn]
    status, out, err = runscript('shmlast', args, directory=str(tmpdir))
    assert status == 0
    assert tmpdir.ensure(out_fn)
Пример #3
0
def test_reciprocal_best_hits(datadir):
    query_df = pd.read_csv(datadir('query.maf.csv'))
    db_df = pd.read_csv(datadir('db.maf.csv'))
    expected_df = pd.read_csv(datadir('reciprocals.expected.csv'))

    bh = BestHits()
    results_df = bh.reciprocal_best_hits(query_df, db_df, inplace=False)
    
    assert check_df_equals(results_df, expected_df)
Пример #4
0
def test_rbl(tmpdir, datadir):
    query = datadir('sacPom.cdna.fa')
    database = datadir('sacPom.pep.fa')

    print(query, database, tmpdir)
    args = ['rbl', '-q', query, '-d', database]
    runscript('shmlast', args, directory=str(tmpdir))

    assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
Пример #5
0
def test_reciprocal_best_hits(datadir):
    query_df = pd.read_csv(datadir('query.maf.csv'))
    db_df = pd.read_csv(datadir('db.maf.csv'))
    expected_df = pd.read_csv(datadir('reciprocals.expected.csv'))

    bh = BestHits()
    results_df = bh.reciprocal_best_hits(query_df, db_df, inplace=False)
    
    assert check_df_equals(results_df, expected_df)
Пример #6
0
def test_crbl_tasks_empty(tmpdir, datadir):
    with tmpdir.as_cwd():
        input_fa = datadir('pom.single.fa')
        pep_fa = datadir('odb_subset.fa')
        results_fn = tmpdir.join('result.csv').strpath

        crbl = CRBL(input_fa, pep_fa, results_fn)
        result = run_tasks([tsk for tsk in crbl.tasks()], ['run'])

        assert result == 0
Пример #7
0
def test_besthits_non_inplace(datadir):
    '''Test BestHits.best_hits with inplace=False
    '''
    input_df = pd.read_csv(datadir('query.maf.csv'))
    expected_df = pd.read_csv(datadir('besthits.expected.csv'))

    bh = BestHits()
    results_df = bh.best_hits(input_df, inplace=False)
    
    assert check_df_equals(expected_df, results_df)
Пример #8
0
def test_besthits_non_inplace(datadir):
    '''Test BestHits.best_hits with inplace=False
    '''
    input_df = pd.read_csv(datadir('query.maf.csv'))
    expected_df = pd.read_csv(datadir('besthits.expected.csv'))

    bh = BestHits()
    results_df = bh.best_hits(input_df, inplace=False)
    
    assert check_df_equals(expected_df, results_df)
Пример #9
0
def test_crbl_tasks_empty(tmpdir, datadir):
    with tmpdir.as_cwd():
        input_fa   = datadir('pom.single.fa')
        pep_fa     = datadir('odb_subset.fa')
        results_fn = tmpdir.join('result.csv').strpath
        
        crbl = CRBL(input_fa,
                    pep_fa,
                    results_fn)
        result = run_tasks([tsk for tsk in crbl.tasks()], ['run'])

        assert result == 0
Пример #10
0
def test_rbl(tmpdir, datadir, n_threads, benchmark):
    query = datadir('sacPom.cdna.fa')
    database = datadir('sacPom.pep.fa')

    args = ['rbl', '--n_threads', str(n_threads), '-q', query, '-d', database]
    status, out, err = benchmark.pedantic(runscript,
                                          args=('shmlast', args),
                                          kwargs={'directory': str(tmpdir)},
                                          iterations=1,
                                          rounds=1)

    assert status == 0
    assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
Пример #11
0
def test_rbl(tmpdir, datadir, n_threads, benchmark):
    query = datadir('sacPom.cdna.fa')
    database = datadir('sacPom.pep.fa')
    
    args = ['rbl', '--n_threads', str(n_threads),
            '-q', query, '-d', database]
    status, out, err = benchmark.pedantic(runscript,
                                          args=('shmlast', args),
                                          kwargs={'directory': str(tmpdir)},
                                          iterations=1,
                                          rounds=1)

    assert status == 0
    assert tmpdir.ensure('sacPom.cdna.fa.x.sacPom.pep.fa.rbl.csv')
Пример #12
0
def test_lastal_task_nucl_x_prot(tmpdir, datadir):
    with tmpdir.as_cwd():
        prot = datadir('test-protein.fa')
        tr = datadir('test-transcript.fa')
        out = tmpdir.join('test-out').strpath

        db_task = lastdb_task(prot, prot)
        aln_task = lastal_task(tr, prot, out,  
                                translate=True, 
                                cutoff=None)
        run_tasks([db_task, aln_task], ['run'])

        aln = ''.join(open(out).readlines())
        print(aln, file=sys.stderr)

        assert 'SPAC212_RecQ_type_DNA_helicase_PROTEIN' in aln
        assert 'SPAC212_RecQ_type_DNA_helicase_TRANSCRIPT' in aln
        assert 'lambda' in aln, 'lambda missing, wrong LAST version?'
Пример #13
0
def lastdb_dir(tmpdir_factory, datadir):
    d = tmpdir_factory.mktemp('sacpom_lastdb')
    with d.as_cwd():
        data = datadir('sacPom.pep.fa')
        task = lastdb_task(data, data, prot=True)
        result = run_tasks([task], ['run'])
        assert result == 0
    
        return d
Пример #14
0
def test_lastdb_task_existing(tmpdir, datadir):
    with tmpdir.as_cwd():
        tf = datadir('test-protein.fa')
        for ext in LASTDB_EXTENSIONS:
            touch(tf + ext)

        task = lastdb_task(tf, tf, prot=True)
        run_tasks([task], ['run'])
        print(task, file=sys.stderr)
        status = check_status(task)

        assert status.status == 'up-to-date'
Пример #15
0
def test_lastdb_task_prot(tmpdir, datadir):
    with tmpdir.as_cwd():
        tf = datadir('test-protein.fa')

        task = lastdb_task(tf, tf, prot=True)
        run_tasks([task], ['run'])
        status = check_status(task)
        
        for ext in LASTDB_EXTENSIONS:
            assert os.path.isfile(tf + ext)

        assert status.status == 'up-to-date'
Пример #16
0
def test_lastdb_task_nucl(tmpdir, datadir):
    with tmpdir.as_cwd():
        tf = datadir('test-transcript.fa')

        task = lastdb_task(tf, tf, prot=False)
        run_tasks([task], ['run'])
        status = check_status(task)
        print('PATH:', os.environ['PATH'], file=sys.stderr)

        for ext in LASTDB_EXTENSIONS:
            assert os.path.isfile(tf + ext)

        assert status.status == 'up-to-date'
Пример #17
0
def test_lastal_task_large(datadir, lastdb_dir, tmpdir_factory, benchmark, n_threads):
    with tmpdir_factory.mktemp('THREADS_{0}'.format(n_threads)).as_cwd():
        query    = datadir('sacPom.cdna.fa')
        database = str(lastdb_dir.join('sacPom.pep.fa'))
        output   = 'out'
        
        aln_task = lastal_task(query, database, output,
                               translate=True,
                               cutoff=None,
                               n_threads=n_threads)
        result = benchmark.pedantic(run_tasks,
                                    args=([aln_task], ['run']),
                                    iterations=1,
                                    rounds=1)
        assert result == 0
Пример #18
0
def test_lastal_task_uptodate(tmpdir, datadir):
    with tmpdir.as_cwd():
        prot = datadir('test-protein.fa')
        out = tmpdir.join('test-out').strpath

        db_task = lastdb_task(prot, prot)
        aln_task = lastal_task(prot, prot, out,
                                translate=False,
                                cutoff=None)
        # Run it once
        run_tasks([db_task, aln_task], ['run'])
        # Now run again and check the status
        #run_tasks(aln_tasks, ['run'])
        print(aln_task)
        status = check_status(aln_task, tasks=[aln_task, db_task])
        assert status.status == 'up-to-date'