def test_cmscan_task_multithreaded(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('rnaseP-bsu.fa') cm = datadir('rnaseP-eubact.c.cm') out_single = str(tmpdir.join('single')) out_multi = str(tmpdir.join('multi')) for n_threads in (2, 3, 4, 5): db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_tasks_single = CMScanTask().task(transcript, out_single, cm, cutoff=1.0, n_threads=1) aln_tasks_multi = CMScanTask().task(transcript, out_multi, cm, cutoff=1.0, n_threads=n_threads) run_tasks([db_task, aln_tasks_single], ['run']) run_task(aln_tasks_multi) alns_single = pd.concat(InfernalParser(out_single)) alns_multi = pd.concat(InfernalParser(out_multi)) assert all(alns_single['e_value'].sort_values() == \ alns_multi['e_value'].sort_values())
def test_cmpress_task(self, tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-covariance-model.cm') task = CMPressTask().task(tf, params=self.cmpress_cfg) run_tasks([task], ['run']) status = check_status(task) print(os.listdir(), file=sys.stderr) for ext in self.extensions: assert os.path.isfile(tf + ext) assert status.status == 'up-to-date'
def register_rfam_tasks(handler, params, databases): rfam = databases['Rfam'] archive_fn = '{0}.{1}'.format(rfam['filename'], rfam['fileformat']) target_fn = path.join(handler.directory, rfam['filename']) dl_task = get_download_task(rfam['url'], archive_fn, md5=rfam['md5']) gz_task = get_gunzip_task(archive_fn, target_fn) handler.register_task('download:Rfam', dl_task, files={'Rfam-gz': archive_fn}) handler.register_task('gunzip:Rfam', gz_task, files={'Rfam': target_fn}) handler.register_task('cmpress:Rfam', CMPressTask().task(target_fn, params=params)) return handler
def test_cmscan_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('test-transcript.fa') cm = datadir('test-covariance-model.cm') out = str(tmpdir.join('test.out')) db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_task = CMScanTask().task(transcript, out, cm, cutoff=1.0, n_threads=1) run_tasks([db_task, aln_task], ['run']) print(os.listdir(), file=sys.stderr) aln = ''.join(open(out).readlines()) print(aln, file=sys.stderr) # TODO: better correctness check assert aln.count('accession') == 2 assert 'E-value' in aln