Пример #1
0
    def test_hmmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('20aa-alitest.fa', td) as prot, \
                     TestData('20aa.hmm', td) as hmm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:
                    
                    for n_threads in (2,3,4,5):
                        db_task = tasks.get_hmmpress_task(hmm, self.hmmpress_cfg)
                        aln_task_single = tasks.get_hmmscan_task(prot, out_single, 
                                                                 hmm, 1.0, 1,
                                                                 self.hmmscan_cfg)
                        aln_task_multi = tasks.get_hmmscan_task(prot, out_multi,
                                                                hmm, 1.0, n_threads,
                                                                self.hmmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)
                        print(os.listdir(td), file=sys.stderr)

                        print(open(out_single).read())
                        alns_single = pd.concat(hmmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(hmmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['domain_i_evalue'].sort_values() == \
                                            alns_multi['domain_i_evalue'].sort_values()))
Пример #2
0
    def test_lastal_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('pom.50.fa', td) as tr, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:
                        
                    for n_threads in (3,4,5):
                        print(os.listdir(td), file=sys.stderr)

                        db_task = tasks.get_lastdb_task(prot, prot, self.lastdb_cfg)
                        aln_task_single = tasks.get_lastal_task(tr, prot, out_single, 
                                                                self.lastal_cfg, 
                                                                translate=True, 
                                                                cutoff=None)

                        aln_task_multi = tasks.get_lastal_task(tr, prot, out_multi, 
                                                                self.lastal_cfg, 
                                                                translate=True, 
                                                                cutoff=None,
                                                                n_threads=n_threads)
                        run_tasks([db_task, aln_task_single, aln_task_multi], 
                                  ['run'])

                        alns_single = MafParser(out_single).read()
                        alns_multi = MafParser(out_multi).read()

                        self.assertTrue(all(alns_single['E'].sort_values() == \
                                        alns_multi['E'].sort_values()))
Пример #3
0
    def test_cmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('rnaseP-bsu.fa', td) as transcript, \
                     TestData('rnaseP-eubact.c.cm', td) as cm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2, 3, 4, 5):

                        db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                        aln_task_single = tasks.get_cmscan_task(
                            transcript, out_single, cm, 1.0, 1,
                            self.cmscan_cfg)
                        aln_task_multi = tasks.get_cmscan_task(
                            transcript, out_multi, cm, 1.0, n_threads,
                            self.cmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)

                        alns_single = pd.concat(cmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(cmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['e_value'].sort_values() == \
                                            alns_multi['e_value'].sort_values()))
Пример #4
0
    def test_lastal_task_nucl_x_prot(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('test-transcript.fa', td) as tr, \
                     TemporaryFile(td) as out:

                    print(os.listdir(td), file=sys.stderr)
                    db_task = tasks.get_lastdb_task(prot, prot,
                                                    self.lastdb_cfg)
                    aln_task = tasks.get_lastal_task(tr,
                                                     prot,
                                                     out,
                                                     self.lastal_cfg,
                                                     translate=True,
                                                     cutoff=None)
                    run_tasks([db_task, aln_task], ['run'])

                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    self.assertIn('SPAC212_RecQ_type_DNA_helicase_PROTEIN',
                                  aln)
                    self.assertIn('SPAC212_RecQ_type_DNA_helicase_TRANSCRIPT',
                                  aln)
                    self.assertIn('lambda',
                                  aln,
                                  msg='lambda missing, wrong LAST version?')
Пример #5
0
    def test_cmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('rnaseP-bsu.fa', td) as transcript, \
                     TestData('rnaseP-eubact.c.cm', td) as cm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2,3,4,5):
                            
                        db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                        aln_task_single = tasks.get_cmscan_task(transcript, out_single, 
                                                                cm, 1.0, 1,
                                                                self.cmscan_cfg)
                        aln_task_multi = tasks.get_cmscan_task(transcript, out_multi, 
                                                                cm, 1.0,
                                                                n_threads,
                                                                self.cmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)

                        alns_single = pd.concat(cmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(cmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['e_value'].sort_values() == \
                                            alns_multi['e_value'].sort_values()))
Пример #6
0
    def test_hmmscan_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('20aa-alitest.fa', td) as prot, \
                     TestData('20aa.hmm', td) as hmm, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (2, 3, 4, 5):
                        db_task = tasks.get_hmmpress_task(
                            hmm, self.hmmpress_cfg)
                        aln_task_single = tasks.get_hmmscan_task(
                            prot, out_single, hmm, 1.0, 1, self.hmmscan_cfg)
                        aln_task_multi = tasks.get_hmmscan_task(
                            prot, out_multi, hmm, 1.0, n_threads,
                            self.hmmscan_cfg)
                        run_tasks([db_task, aln_task_single], ['run'])
                        run_task(aln_task_multi)
                        print(os.listdir(td), file=sys.stderr)

                        print(open(out_single).read())
                        alns_single = pd.concat(hmmscan_to_df_iter(out_single))
                        alns_multi = pd.concat(hmmscan_to_df_iter(out_multi))

                        self.assertTrue(all(alns_single['domain_i_evalue'].sort_values() == \
                                            alns_multi['domain_i_evalue'].sort_values()))
Пример #7
0
    def test_hmmpress_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-profile.hmm', td) as tf:
                    task = tasks.get_hmmpress_task(tf, self.hmmpress_cfg)
                    run_tasks([task], ['run'])
                    status = check_status(task)
                    print(os.listdir(td), file=sys.stderr)
                    
                    for ext in self.extensions:
                        self.assertTrue(os.path.isfile(tf + ext))

                    self.assertEquals(status.status, 'up-to-date')
Пример #8
0
    def test_hmmpress_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-profile.hmm', td) as tf:
                    task = tasks.get_hmmpress_task(tf, self.hmmpress_cfg)
                    run_tasks([task], ['run'])
                    status = check_status(task)
                    print(os.listdir(td), file=sys.stderr)

                    for ext in self.extensions:
                        self.assertTrue(os.path.isfile(tf + ext))

                    self.assertEquals(status.status, 'up-to-date')
Пример #9
0
    def test_cmpress_task_existing(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-covariance-model.cm', td) as tf:
                    for ext in self.extensions:
                        touch(tf + ext)

                    task = tasks.get_cmpress_task(tf, self.cmpress_cfg)
                    run_tasks([task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    print(task, file=sys.stderr)
                    status = check_status(task)
                    
                    self.assertEquals(status.status, 'up-to-date')
Пример #10
0
    def test_cmpress_task_existing(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-covariance-model.cm', td) as tf:
                    for ext in self.extensions:
                        touch(tf + ext)

                    task = tasks.get_cmpress_task(tf, self.cmpress_cfg)
                    run_tasks([task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    print(task, file=sys.stderr)
                    status = check_status(task)
                    
                    self.assertEquals(status.status, 'up-to-date')
Пример #11
0
    def test_lastdb_task_existing(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as tf:
                    for ext in self.extensions:
                        touch(tf + ext)

                    task = tasks.get_lastdb_task(tf, tf, self.lastdb_cfg,
                                                 prot=True)
                    run_tasks([task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    print(task, file=sys.stderr)
                    status = check_status(task)

                    self.assertEquals(status.status, 'up-to-date')
Пример #12
0
    def test_lastdb_task_nucl(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as tf:
                    task = tasks.get_lastdb_task(tf, tf, self.lastdb_cfg,
                                                 prot=False)
                    run_tasks([task], ['run'])
                    status = check_status(task)
                    print(os.listdir(td), file=sys.stderr)
                    print('PATH:', os.environ['PATH'], file=sys.stderr)

                    
                    for ext in self.extensions:
                        self.assertTrue(os.path.isfile(tf + ext))

                    self.assertEquals(status.status, 'up-to-date')
Пример #13
0
    def test_hmmscan_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('test-profile.hmm', td) as hmm, \
                     TemporaryFile(td) as out:

                    db_task = tasks.get_hmmpress_task(hmm, self.hmmpress_cfg)
                    aln_task = tasks.get_hmmscan_task(prot, out, hmm, 1.0, 1,
                                                      self.hmmscan_cfg)
                    run_tasks([db_task, aln_task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    aln = open(out).read()
                    print(aln)

                    self.assertEquals(aln.count('accession'), 2)
                    self.assertIn('i-Evalue', aln)
Пример #14
0
    def test_lastal_task_uptodate(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TemporaryFile(td) as out:
                        
                    print(os.listdir(td), file=sys.stderr)
                    db_task = tasks.get_lastdb_task(prot, prot, self.lastdb_cfg)
                    aln_task = tasks.get_lastal_task(prot, prot, out, False,
                                                     None, 1, self.lastal_cfg)
                    # Run it once
                    run_tasks([db_task, aln_task], ['run'])

                    # Now run again and check the status
                    run_tasks([aln_task], ['run'])
                    status = check_status(aln_task)
                    self.assertEquals(status.status, 'up-to-date')
Пример #15
0
    def test_hmmscan_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('test-profile.hmm', td) as hmm, \
                     TemporaryFile(td) as out:
                        
                    db_task = tasks.get_hmmpress_task(hmm, self.hmmpress_cfg)
                    aln_task = tasks.get_hmmscan_task(prot, out, hmm, 1.0, 1,
                                                      self.hmmscan_cfg)
                    run_tasks([db_task, aln_task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    aln = open(out).read()
                    print(aln)

                    self.assertEquals(aln.count('accession'), 2)
                    self.assertIn('i-Evalue', aln)
Пример #16
0
    def test_cmscan_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript, \
                     TestData('test-covariance-model.cm', td) as cm, \
                     TemporaryFile(td) as out:
                        
                    db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                    aln_task = tasks.get_cmscan_task(transcript, out, cm, 1.0, 1,
                                                      self.cmscan_cfg)
                    run_tasks([db_task, aln_task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    # TODO: better correctness check
                    self.assertEquals(aln.count('accession'), 2)
                    self.assertIn('E-value', aln)
Пример #17
0
    def test_output(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(
                        transcript, output_fn)
                    run_tasks([tsk], ['run'])

                    with open(output_fn) as fp:
                        results = json.load(fp)

                    self.assertIn('n_ambiguous', results)
                    self.assertEquals(results['n_ambiguous'], 0)

                    self.assertIn('N', results)
                    self.assertEquals(results['N'], 1)
Пример #18
0
    def test_cmscan_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript, \
                     TestData('test-covariance-model.cm', td) as cm, \
                     TemporaryFile(td) as out:
                        
                    db_task = tasks.get_cmpress_task(cm, self.cmpress_cfg)
                    aln_task = tasks.get_cmscan_task(transcript, out, cm, 1.0, 1,
                                                      self.cmscan_cfg)
                    run_tasks([db_task, aln_task], ['run'])
                    print(os.listdir(td), file=sys.stderr)
                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    # TODO: better correctness check
                    self.assertEquals(aln.count('accession'), 2)
                    self.assertIn('E-value', aln)
Пример #19
0
    def test_longorfs_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript, \
                     TestData('test-transcript-orf.pep', td) as exp_orf:

                    task = tasks.get_transdecoder_orf_task(transcript,
                                                           self.longorfs_cfg)
                    run_tasks([task], ['run'])
                    output_dir = transcript + '.transdecoder_dir'

                    exp_pep = open(exp_orf).read()

                    pep_fn = os.path.join(output_dir, 'longest_orfs.pep')
                    self.assertTrue(os.path.isfile(pep_fn))
                    pep = open(pep_fn).read()

                    self.assertIn(exp_pep, pep)
Пример #20
0
    def test_output(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript:

                    output_fn = os.path.join(td, 'test')
                    tsk = tasks.get_transcriptome_stats_task(transcript,
                                                             output_fn)
                    run_tasks([tsk], ['run'])

                    with open(output_fn) as fp:
                        results = json.load(fp)

                    self.assertIn('n_ambiguous', results)
                    self.assertEquals(results['n_ambiguous'], 0)


                    self.assertIn('N', results)
                    self.assertEquals(results['N'], 1)
Пример #21
0
    def test_lastal_task_prot_x_prot(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TemporaryFile(td) as out:
                        
                    print(os.listdir(td), file=sys.stderr)
                    db_task = tasks.get_lastdb_task(prot, prot, self.lastdb_cfg)
                    aln_task = tasks.get_lastal_task(prot, prot, out, False,
                                                     None, 1, self.lastal_cfg)
                    run_tasks([db_task, aln_task], ['run'])

                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    self.assertEquals(
                            aln.count('SPAC212_RecQ_type_DNA_helicase_PROTEIN'),
                            2)
                    self.assertIn('EG2=0', aln)
                    self.assertIn('E=0', aln)
                    self.assertIn('lambda', aln, 
                                  msg='lambda missing, wrong LAST version?')
Пример #22
0
    def test_lastal_task_nucl_x_prot(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('test-transcript.fa', td) as tr, \
                     TemporaryFile(td) as out:
                        
                    print(os.listdir(td), file=sys.stderr)
                    db_task = tasks.get_lastdb_task(prot, prot, self.lastdb_cfg)
                    aln_task = tasks.get_lastal_task(tr, prot, out, True, None, 1,
                                                     self.lastal_cfg)
                    run_tasks([db_task, aln_task], ['run'])

                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    self.assertIn('SPAC212_RecQ_type_DNA_helicase_PROTEIN', 
                                  aln)
                    self.assertIn('SPAC212_RecQ_type_DNA_helicase_TRANSCRIPT',
                                  aln)
                    self.assertIn('lambda', aln, 
                                  msg='lambda missing, wrong LAST version?')
Пример #23
0
    def test_predict_task(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-transcript.fa', td) as transcript, \
                     TestData('test-protein-x-pfam-a.tbl', td) as pfam:

                    orf_task = tasks.get_transdecoder_orf_task(transcript,
                                                               self.longorfs_cfg)
                    pred_task = tasks.get_transdecoder_predict_task(transcript,
                                                                    pfam,
                                                                    self.predict_cfg)
                    run_tasks([orf_task, pred_task], ['run'])
                    
                    for ext in self.extensions:
                        fn = os.path.join(td, transcript+'.transdecoder'+ext)
                        self.assertTrue(os.path.isfile(fn))
                        contents = open(fn).read()
                        if ext == '.gff3':
                            self.assertIn('mRNA', contents)
                            self.assertIn('gene', contents)
                            self.assertIn('CDS', contents)
                            self.assertIn('three_prime_UTR', contents)
                            self.assertIn('exon', contents)
Пример #24
0
    def test_lastal_task_multithreaded(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TestData('pom.50.fa', td) as tr, \
                     TemporaryFile(td) as out_single,\
                     TemporaryFile(td) as out_multi:

                    for n_threads in (3, 4, 5):
                        print(os.listdir(td), file=sys.stderr)

                        db_task = tasks.get_lastdb_task(
                            prot, prot, self.lastdb_cfg)
                        aln_task_single = tasks.get_lastal_task(
                            tr,
                            prot,
                            out_single,
                            self.lastal_cfg,
                            translate=True,
                            cutoff=None)

                        aln_task_multi = tasks.get_lastal_task(
                            tr,
                            prot,
                            out_multi,
                            self.lastal_cfg,
                            translate=True,
                            cutoff=None,
                            n_threads=n_threads)
                        run_tasks([db_task, aln_task_single, aln_task_multi],
                                  ['run'])

                        alns_single = MafParser(out_single).read()
                        alns_multi = MafParser(out_multi).read()

                        self.assertTrue(all(alns_single['E'].sort_values() == \
                                        alns_multi['E'].sort_values()))
Пример #25
0
    def test_lastal_task_prot_x_prot(self):
        with TemporaryDirectory() as td:
            with Move(td):
                with TestData('test-protein.fa', td) as prot, \
                     TemporaryFile(td) as out:
                        
                    print(os.listdir(td), file=sys.stderr)
                    db_task = tasks.get_lastdb_task(prot, prot, self.lastdb_cfg)
                    aln_task = tasks.get_lastal_task(prot, prot, out,
                                                     self.lastal_cfg,
                                                     translate=False,
                                                     cutoff=None)
                    run_tasks([db_task, aln_task], ['run'])

                    aln = ''.join(open(out).readlines())
                    print(aln, file=sys.stderr)

                    self.assertEquals(
                            aln.count('SPAC212_RecQ_type_DNA_helicase_PROTEIN'),
                            2)
                    self.assertIn('EG2=0', aln)
                    self.assertIn('E=0', aln)
                    self.assertIn('lambda', aln, 
                                  msg='lambda missing, wrong LAST version?')
Пример #26
0
def run_task(task, cmd='run', verbosity=2):
    return run_tasks([task], [cmd], config={'verbosity': verbosity})
Пример #27
0
def run_task(task, cmd='run', verbosity=2):
    return run_tasks([task], [cmd], config={'verbosity': verbosity})