def compute_clustalo_clasic(file, clustalo_params=''): ml.info('Running clustalo.') ml.debug(fname()) fd, out_path = mkstemp(prefix='rba_', suffix='_01', dir=CONFIG.tmpdir) os.close(fd) with TemporaryFile(mode='w+', encoding='utf-8') as tmp: parlist = clustalo_params.split() cmd = [ '{}clustalo'.format(CONFIG.clustal_path), ] + parlist + ['-i', file, '-o', out_path] ml.debug(cmd) r = call(cmd, stdout=tmp, stderr=tmp) if r: msgfail = 'Call to clustalo failed.' tmp.seek(0) ml.error(msgfail) ml.error(cmd) raise exceptions.ClustaloException(msgfail, tmp.read()) return out_path
def run_clustal_profile2seqs_align(msa_file, fasta_seq_file, clustalo_params='', outfile=None): """ run clustal align MSA to seqs aligned columns in input MSA file are preserved and only new sequences are aligned and together they form new alignment :param msa_file: msa file (works with stockholm) :param fasta_seq_file: file with sequences to be aligned (format can be enforced with --infmt in clustalo_params) :param clustalo_params: params as accepted by clustalo :param outfile: outfile path, if not provided, tempfile will be created with output :return: outfile MSA path """ ml.info('Runing clustalo profile.') ml.debug(fname()) def _try_rescue(profile_file): # beware AlignIO truncates sequence names so they become non-unique, then clustalo also fails ml.warning( 'Trying rescue for profile alignment if profile has no gaps, sequences appears not aligned. ' 'Appending trailing gap to overcome the issue.') a = AlignIO.read(profile_file, format='clustal') s = [SeqRecord(Seq(str(i.seq) + '-'), id=i.id) for i in a] fa = AlignIO.MultipleSeqAlignment(s) fd, temp = mkstemp(prefix='rba_', suffix='_56', dir=CONFIG.tmpdir) with os.fdopen(fd, 'w') as fh: AlignIO.write(fa, fh, format='fasta') return temp if outfile: clustalo_file = outfile else: c_fd, clustalo_file = mkstemp(prefix='rba_', suffix='_57', dir=CONFIG.tmpdir) os.close(c_fd) with TemporaryFile(mode='w+', encoding='utf-8') as tmp: cmd = [ '{}clustalo'.format(CONFIG.clustal_path), '--force', '-i', fasta_seq_file, '--profile1', msa_file, '-o', clustalo_file ] if clustalo_params != '': cmd += clustalo_params.split() ml.debug(cmd) r = call(cmd, stdout=tmp, stderr=tmp) if r: ml.warning('Profile align failed.') # Initiate rescue attempt rewriten_msa = _try_rescue(msa_file) cmd2 = [ '{}clustalo'.format(CONFIG.clustal_path), '--force', '-i', fasta_seq_file, '--profile1', rewriten_msa, '-o', clustalo_file ] if clustalo_params: cmd2 += clustalo_params.split() ml.debug(cmd2) r2 = call(cmd2, stdout=tmp, stderr=tmp) remove_one_file_with_try(rewriten_msa) if r2 != 0: msgfail = 'Call to clustalo for aligning profile to sequences failed.' ml.error(msgfail) ml.error(cmd) ml.error(cmd2) raise exceptions.ClustaloException(msgfail, tmp.read()) return clustalo_file
class TestPredictExceptions(unittest.TestCase): def setUp(self): f = open(ref_json_file, 'r') mydata = json.load(f) f.close() bb = convert_classes.blastsearchrecomputefromdict(mydata) self.data = bb ff, csv = tempfile.mkstemp(prefix='rba_', suffix='_t1') os.close(ff) self.csv = csv ff, html = tempfile.mkstemp(prefix='rba_', suffix='_t2') os.close(ff) self.html = blast_query + 'test_html.html' ff, pandas_dump = tempfile.mkstemp(prefix='rba_', suffix='_t3') os.close(ff) self.pandas_dump = pandas_dump ff, json_file = tempfile.mkstemp(prefix='rba_', suffix='_t4') os.close(ff) self.json = json_file ff, fasta = tempfile.mkstemp(prefix='rba_', suffix='_t5') os.close(ff) self.fasta = fasta ff, allHits_fasta = tempfile.mkstemp(prefix='rba_', suffix='_t6') with os.fdopen(ff, 'w') as f: SeqIO.write([i.extension for i in self.data.hits], f, 'fasta') self.fasta_structures = allHits_fasta self.args = Pseudoargs( blast_query, blast_in, blast_db, b_type='plain', prediction_method=['rnafold'], blast_regexp=r'(?<=\|)[A-Z0-9]*\.?\d*$', enable_overwrite=True, html=self.html, ) self.func_args = { 'query': self.data.query, 'seqs2predict_fasta': allHits_fasta, 'pred_method_params': {}, 'all_hits_list': [i.extension for i in self.data.hits], 'seqs2predict_list': [i.extension for i in self.data.hits], 'use_cm_file': 'abc', } def tearDown(self): remove_files_with_try([ self.csv, self.html, self.json, self.fasta, self.fasta_structures ], '') @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta", side_effect=exceptions.RNAfoldException( 'expected_error_rnafold', 'b')) def test_rnafold(self, callMock): self.func_args['prediction_method'] = 'rnafold' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ['expected_error_rnafold']) @mock.patch("rna_blast_analyze.BR_core.repredict_structures.run_cmemit", side_effect=exceptions.CmemitException('expected_error_cmemit', 'b')) def test_cmemit(self, callMock): self.func_args['prediction_method'] = 'rfam-centroid' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_cmemit"]) @mock.patch( "rna_blast_analyze.BR_core.predict_structures.run_cmalign_on_fasta", side_effect=exceptions.CmemitException('expected_error_cmalign', 'b')) def test_cmalign(self, callMock): self.func_args['prediction_method'] = 'rfam-Rc' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_cmalign"]) @mock.patch( "rna_blast_analyze.BR_core.centroid_homfold.run_centroid_homfold", side_effect=exceptions.CentroidHomfoldException( 'expected_error_centroid', 'b')) def test_centroid_homfold(self, callMock): self.func_args['prediction_method'] = 'centroid-fast' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_centroid"]) @mock.patch("rna_blast_analyze.BR_core.turbofold._turbofold_worker", side_effect=exceptions.TurboFoldException( 'expected_error_TurboFold', 'b')) def test_TurboFold(self, callMock): self.func_args['prediction_method'] = 'Turbo-fast' self.func_args['threads'] = 1 a, b, c = repredict_structures_for_homol_seqs(**self.func_args) # check that sequences are returned does not have Turbofold structure and have error message that turbo failed for s in a: self.assertNotIn('ss0', s.letter_annotations) self.assertEqual(['expected_error_TurboFold'], s.annotations['msgs']) @mock.patch("rna_blast_analyze.BR_core.BA_support.run_muscle", side_effect=exceptions.MuscleException('expected_error_muscle', 'b')) def test_muscle(self, callMock): self.func_args['prediction_method'] = 'M-A-U-r-Rc' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_muscle"]) @mock.patch( "rna_blast_analyze.BR_core.predict_structures.compute_clustalo_clasic", side_effect=exceptions.ClustaloException('expected_error_clustalo', 'b')) def test_clustalo(self, callMock): self.func_args['prediction_method'] = 'C-A-sub' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_clustalo"]) @mock.patch("rna_blast_analyze.BR_core.predict_structures.compute_alifold", side_effect=exceptions.RNAalifoldException( 'expected_error_alifold', 'b')) def test_clustalo2(self, callMock): self.func_args['prediction_method'] = 'C-A-sub' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_alifold"]) @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta", side_effect=exceptions.NoHomologousSequenceException()) def test_nh_ext(self, callMock): self.func_args['prediction_method'] = 'rnafold' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertTrue(c[0].startswith('No sequence was inferred homologous')) @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta", side_effect=exceptions.AmbiguousQuerySequenceException()) def test_nh_ext2(self, callMock): self.func_args['prediction_method'] = 'rnafold' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertTrue( c[0].startswith('Query sequence contains ambiguous characters')) @mock.patch("rna_blast_analyze.BR_core.predict_structures.compute_refold", side_effect=exceptions.RefoldException('expected_error_refold', 'b')) def test_refold_pl(self, callMock): self.func_args['prediction_method'] = 'C-A-r-Rc' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_refold"]) @mock.patch( "rna_blast_analyze.BR_core.predict_structures.run_clustal_profile2seqs_align", side_effect=exceptions.RefoldException('expected_error_clustalo', 'b')) def test_clustalo_profile(self, callMock): self.func_args['prediction_method'] = 'C-A-r-Rc' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_clustalo"]) @mock.patch( "rna_blast_analyze.BR_core.predict_structures.run_hybrid_ss_min", side_effect=exceptions.HybridssminException( 'expected_error_hybrid_ss_min', 'b')) def test_hybrid_ss_min(self, callMock): self.func_args['prediction_method'] = 'fq-sub' a, b, c = repredict_structures_for_homol_seqs(**self.func_args) self.assertIsNone(a) self.assertIsNone(b) self.assertEqual(c, ["expected_error_hybrid_ss_min"]) def test_wrong_pm(self): self.func_args['prediction_method'] = 'blabla' with self.assertRaises(AssertionError): a, b, c = repredict_structures_for_homol_seqs(**self.func_args)