Пример #1
0
    def test_cmscan_task_multithreaded(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('rnaseP-bsu.fa')
            cm = datadir('rnaseP-eubact.c.cm')
            out_single = str(tmpdir.join('single'))
            out_multi = str(tmpdir.join('multi'))

            for n_threads in (2, 3, 4, 5):

                db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
                aln_tasks_single = CMScanTask().task(transcript,
                                                     out_single,
                                                     cm,
                                                     cutoff=1.0,
                                                     n_threads=1)
                aln_tasks_multi = CMScanTask().task(transcript,
                                                    out_multi,
                                                    cm,
                                                    cutoff=1.0,
                                                    n_threads=n_threads)
                run_tasks([db_task, aln_tasks_single], ['run'])
                run_task(aln_tasks_multi)

                alns_single = pd.concat(InfernalParser(out_single))
                alns_multi = pd.concat(InfernalParser(out_multi))

                assert all(alns_single['e_value'].sort_values() == \
                           alns_multi['e_value'].sort_values())
Пример #2
0
    def test_cmpress_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            tf = datadir('test-covariance-model.cm')
            task = CMPressTask().task(tf, params=self.cmpress_cfg)
            run_tasks([task], ['run'])
            status = check_status(task)
            print(os.listdir(), file=sys.stderr)

            for ext in self.extensions:
                assert os.path.isfile(tf + ext)

            assert status.status == 'up-to-date'
Пример #3
0
def register_rfam_tasks(handler, params, databases):
    rfam = databases['Rfam']
    archive_fn = '{0}.{1}'.format(rfam['filename'], rfam['fileformat'])
    target_fn = path.join(handler.directory, rfam['filename'])

    dl_task = get_download_task(rfam['url'], archive_fn, md5=rfam['md5'])
    gz_task = get_gunzip_task(archive_fn, target_fn)

    handler.register_task('download:Rfam',
                          dl_task,
                          files={'Rfam-gz': archive_fn})
    handler.register_task('gunzip:Rfam', gz_task, files={'Rfam': target_fn})
    handler.register_task('cmpress:Rfam',
                          CMPressTask().task(target_fn, params=params))
    return handler
Пример #4
0
    def test_cmscan_task(self, tmpdir, datadir):
        with tmpdir.as_cwd():
            transcript = datadir('test-transcript.fa')
            cm = datadir('test-covariance-model.cm')
            out = str(tmpdir.join('test.out'))

            db_task = CMPressTask().task(cm, params=self.cmpress_cfg)
            aln_task = CMScanTask().task(transcript,
                                         out,
                                         cm,
                                         cutoff=1.0,
                                         n_threads=1)
            run_tasks([db_task, aln_task], ['run'])
            print(os.listdir(), file=sys.stderr)
            aln = ''.join(open(out).readlines())
            print(aln, file=sys.stderr)

            # TODO: better correctness check
            assert aln.count('accession') == 2
            assert 'E-value' in aln