def test_cmscan_task_multithreaded(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('rnaseP-bsu.fa') cm = datadir('rnaseP-eubact.c.cm') out_single = str(tmpdir.join('single')) out_multi = str(tmpdir.join('multi')) for n_threads in (2, 3, 4, 5): db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_tasks_single = CMScanTask().task(transcript, out_single, cm, cutoff=1.0, n_threads=1) aln_tasks_multi = CMScanTask().task(transcript, out_multi, cm, cutoff=1.0, n_threads=n_threads) run_tasks([db_task, aln_tasks_single], ['run']) run_task(aln_tasks_multi) alns_single = pd.concat(InfernalParser(out_single)) alns_multi = pd.concat(InfernalParser(out_multi)) assert all(alns_single['e_value'].sort_values() == \ alns_multi['e_value'].sort_values())
def test_hmmscan_task_multithreaded(self, tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('20aa-alitest.fa') hmm = datadir('20aa.hmm') out_single = str(tmpdir.join('out-single')) out_multi = str(tmpdir.join('out-multi')) for n_threads in (2, 3, 4, 5): db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg) aln_task_single = HMMScanTask().task(prot, out_single, hmm, cutoff=1.0, n_threads=1) aln_task_multi = HMMScanTask().task(prot, out_multi, hmm, cutoff=1.0, n_threads=n_threads) run_tasks([db_task, aln_task_single], ['run']) run_tasks([aln_task_multi], ['run']) print(os.listdir(), file=sys.stderr) print(open(out_single).read()) alns_single = pd.concat(HMMerParser(out_single)) alns_multi = pd.concat(HMMerParser(out_multi)) assert all(alns_single['domain_i_evalue'].sort_values().reset_index(drop=True) == \ alns_multi['domain_i_evalue'].sort_values().reset_index(drop=True))
def test_hmmpress_task_existing(self, tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-profile.hmm') for ext in self.extensions: touch(tf + ext) task = HMMPressTask().task(tf) run_tasks([task], ['run']) print(os.listdir(), file=sys.stderr) print(task, file=sys.stderr) status = check_status(task) assert status.status == 'up-to-date'
def test_cmpress_task(self, tmpdir, datadir): with tmpdir.as_cwd(): tf = datadir('test-covariance-model.cm') task = CMPressTask().task(tf, params=self.cmpress_cfg) run_tasks([task], ['run']) status = check_status(task) print(os.listdir(), file=sys.stderr) for ext in self.extensions: assert os.path.isfile(tf + ext) assert status.status == 'up-to-date'
def get_prices(data): marketId_entries, urls = data dbMgr.persist_marketId(marketId_entries) concurrent_tasks = 10 tasks = [] global counter counter += 1 if counter % interval_historcial == 0: counter = 0 def persist_intraday(data): d = defer.maybeDeferred(dbMgr.persist_intraday_data, data) return d def persist_historical(data): d = defer.maybeDeferred(dbMgr.persist_historical_data, data) return d for intraday_url, historical_url in urls: tasks.append((download_price_data, intraday_url, persist_intraday)) if counter == 0: tasks.append((download_price_data, historical_url, persist_historical)) defer_list = run_tasks(tasks, concurrent_tasks) return defer_list
def test_hmmscan_task(self, tmpdir, datadir): with tmpdir.as_cwd(): prot = datadir('test-protein.fa') hmm = datadir('test-profile.hmm') out = str(tmpdir.join('test.out')) db_task = HMMPressTask().task(hmm, params=self.hmmpress_cfg) aln_task = HMMScanTask().task(prot, out, hmm, cutoff=1.0, n_threads=1) run_tasks([db_task, aln_task], ['run']) print(os.listdir(), file=sys.stderr) aln = open(out).read() print(aln) assert aln.count('accession') == 2 assert 'i-Evalue' in aln
def test_longorfs_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('test-transcript.fa') exp_orf = datadir('test-transcript-orf.pep') task = TransDecoderLongOrfsTask().task(transcript, params=self.longorfs_cfg) run_tasks([task], ['run']) output_dir = transcript + '.transdecoder_dir' exp_pep = open(exp_orf).read() pep_fn = os.path.join(output_dir, 'longest_orfs.pep') assert os.path.isfile(pep_fn) pep = open(pep_fn).read() assert exp_pep in pep
def test_cmscan_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('test-transcript.fa') cm = datadir('test-covariance-model.cm') out = str(tmpdir.join('test.out')) db_task = CMPressTask().task(cm, params=self.cmpress_cfg) aln_task = CMScanTask().task(transcript, out, cm, cutoff=1.0, n_threads=1) run_tasks([db_task, aln_task], ['run']) print(os.listdir(), file=sys.stderr) aln = ''.join(open(out).readlines()) print(aln, file=sys.stderr) # TODO: better correctness check assert aln.count('accession') == 2 assert 'E-value' in aln
def test_predict_task(self, tmpdir, datadir): with tmpdir.as_cwd(): transcript = datadir('pom.50.fa') pfam = datadir('test-protein-x-pfam-a.tbl') orf_task = TransDecoderLongOrfsTask().task( transcript, params=self.longorfs_cfg) pred_task = TransDecoderPredictTask().task(transcript, pfam, params=self.predict_cfg) run_tasks([orf_task, pred_task], ['run']) pprint(tmpdir.listdir()) for ext in self.extensions: fn = transcript + '.transdecoder' + ext assert os.path.isfile(fn) contents = open(fn).read() if ext == '.gff3': assert 'mRNA' in contents assert 'gene' in contents assert 'CDS' in contents assert 'three_prime_UTR' in contents assert 'exon' in contents