示例#1
0
def compute_clustalo_clasic(file, clustalo_params=''):
    ml.info('Running clustalo.')
    ml.debug(fname())

    fd, out_path = mkstemp(prefix='rba_', suffix='_01', dir=CONFIG.tmpdir)
    os.close(fd)

    with TemporaryFile(mode='w+', encoding='utf-8') as tmp:
        parlist = clustalo_params.split()
        cmd = [
            '{}clustalo'.format(CONFIG.clustal_path),
        ] + parlist + ['-i', file, '-o', out_path]
        ml.debug(cmd)
        r = call(cmd, stdout=tmp, stderr=tmp)

        if r:
            msgfail = 'Call to clustalo failed.'
            tmp.seek(0)
            ml.error(msgfail)
            ml.error(cmd)
            raise exceptions.ClustaloException(msgfail, tmp.read())

    return out_path
示例#2
0
def run_clustal_profile2seqs_align(msa_file,
                                   fasta_seq_file,
                                   clustalo_params='',
                                   outfile=None):
    """
    run clustal align MSA to seqs
    aligned columns in input MSA file are preserved and only new sequences are aligned and together they form new
     alignment
    :param msa_file: msa file (works with stockholm)
    :param fasta_seq_file: file with sequences to be aligned (format can be enforced with --infmt in clustalo_params)
    :param clustalo_params: params as accepted by clustalo
    :param outfile: outfile path, if not provided, tempfile will be created with output
    :return: outfile MSA path
    """
    ml.info('Runing clustalo profile.')
    ml.debug(fname())

    def _try_rescue(profile_file):
        # beware AlignIO truncates sequence names so they become non-unique, then clustalo also fails
        ml.warning(
            'Trying rescue for profile alignment if profile has no gaps, sequences appears not aligned. '
            'Appending trailing gap to overcome the issue.')
        a = AlignIO.read(profile_file, format='clustal')
        s = [SeqRecord(Seq(str(i.seq) + '-'), id=i.id) for i in a]
        fa = AlignIO.MultipleSeqAlignment(s)

        fd, temp = mkstemp(prefix='rba_', suffix='_56', dir=CONFIG.tmpdir)
        with os.fdopen(fd, 'w') as fh:
            AlignIO.write(fa, fh, format='fasta')
        return temp

    if outfile:
        clustalo_file = outfile
    else:
        c_fd, clustalo_file = mkstemp(prefix='rba_',
                                      suffix='_57',
                                      dir=CONFIG.tmpdir)
        os.close(c_fd)

    with TemporaryFile(mode='w+', encoding='utf-8') as tmp:
        cmd = [
            '{}clustalo'.format(CONFIG.clustal_path), '--force', '-i',
            fasta_seq_file, '--profile1', msa_file, '-o', clustalo_file
        ]
        if clustalo_params != '':
            cmd += clustalo_params.split()

        ml.debug(cmd)
        r = call(cmd, stdout=tmp, stderr=tmp)

        if r:
            ml.warning('Profile align failed.')

            # Initiate rescue attempt
            rewriten_msa = _try_rescue(msa_file)
            cmd2 = [
                '{}clustalo'.format(CONFIG.clustal_path), '--force', '-i',
                fasta_seq_file, '--profile1', rewriten_msa, '-o', clustalo_file
            ]
            if clustalo_params:
                cmd2 += clustalo_params.split()

            ml.debug(cmd2)
            r2 = call(cmd2, stdout=tmp, stderr=tmp)

            remove_one_file_with_try(rewriten_msa)

            if r2 != 0:
                msgfail = 'Call to clustalo for aligning profile to sequences failed.'
                ml.error(msgfail)
                ml.error(cmd)
                ml.error(cmd2)
                raise exceptions.ClustaloException(msgfail, tmp.read())
    return clustalo_file
class TestPredictExceptions(unittest.TestCase):
    def setUp(self):
        f = open(ref_json_file, 'r')
        mydata = json.load(f)
        f.close()
        bb = convert_classes.blastsearchrecomputefromdict(mydata)
        self.data = bb

        ff, csv = tempfile.mkstemp(prefix='rba_', suffix='_t1')
        os.close(ff)
        self.csv = csv

        ff, html = tempfile.mkstemp(prefix='rba_', suffix='_t2')
        os.close(ff)
        self.html = blast_query + 'test_html.html'

        ff, pandas_dump = tempfile.mkstemp(prefix='rba_', suffix='_t3')
        os.close(ff)
        self.pandas_dump = pandas_dump

        ff, json_file = tempfile.mkstemp(prefix='rba_', suffix='_t4')
        os.close(ff)
        self.json = json_file

        ff, fasta = tempfile.mkstemp(prefix='rba_', suffix='_t5')
        os.close(ff)
        self.fasta = fasta

        ff, allHits_fasta = tempfile.mkstemp(prefix='rba_', suffix='_t6')
        with os.fdopen(ff, 'w') as f:
            SeqIO.write([i.extension for i in self.data.hits], f, 'fasta')

        self.fasta_structures = allHits_fasta

        self.args = Pseudoargs(
            blast_query,
            blast_in,
            blast_db,
            b_type='plain',
            prediction_method=['rnafold'],
            blast_regexp=r'(?<=\|)[A-Z0-9]*\.?\d*$',
            enable_overwrite=True,
            html=self.html,
        )

        self.func_args = {
            'query': self.data.query,
            'seqs2predict_fasta': allHits_fasta,
            'pred_method_params': {},
            'all_hits_list': [i.extension for i in self.data.hits],
            'seqs2predict_list': [i.extension for i in self.data.hits],
            'use_cm_file': 'abc',
        }

    def tearDown(self):
        remove_files_with_try([
            self.csv, self.html, self.json, self.fasta, self.fasta_structures
        ], '')

    @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta",
                side_effect=exceptions.RNAfoldException(
                    'expected_error_rnafold', 'b'))
    def test_rnafold(self, callMock):
        self.func_args['prediction_method'] = 'rnafold'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ['expected_error_rnafold'])

    @mock.patch("rna_blast_analyze.BR_core.repredict_structures.run_cmemit",
                side_effect=exceptions.CmemitException('expected_error_cmemit',
                                                       'b'))
    def test_cmemit(self, callMock):
        self.func_args['prediction_method'] = 'rfam-centroid'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_cmemit"])

    @mock.patch(
        "rna_blast_analyze.BR_core.predict_structures.run_cmalign_on_fasta",
        side_effect=exceptions.CmemitException('expected_error_cmalign', 'b'))
    def test_cmalign(self, callMock):
        self.func_args['prediction_method'] = 'rfam-Rc'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_cmalign"])

    @mock.patch(
        "rna_blast_analyze.BR_core.centroid_homfold.run_centroid_homfold",
        side_effect=exceptions.CentroidHomfoldException(
            'expected_error_centroid', 'b'))
    def test_centroid_homfold(self, callMock):
        self.func_args['prediction_method'] = 'centroid-fast'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_centroid"])

    @mock.patch("rna_blast_analyze.BR_core.turbofold._turbofold_worker",
                side_effect=exceptions.TurboFoldException(
                    'expected_error_TurboFold', 'b'))
    def test_TurboFold(self, callMock):
        self.func_args['prediction_method'] = 'Turbo-fast'
        self.func_args['threads'] = 1
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        # check that sequences are returned does not have Turbofold structure and have error message that turbo failed
        for s in a:
            self.assertNotIn('ss0', s.letter_annotations)
            self.assertEqual(['expected_error_TurboFold'],
                             s.annotations['msgs'])

    @mock.patch("rna_blast_analyze.BR_core.BA_support.run_muscle",
                side_effect=exceptions.MuscleException('expected_error_muscle',
                                                       'b'))
    def test_muscle(self, callMock):
        self.func_args['prediction_method'] = 'M-A-U-r-Rc'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_muscle"])

    @mock.patch(
        "rna_blast_analyze.BR_core.predict_structures.compute_clustalo_clasic",
        side_effect=exceptions.ClustaloException('expected_error_clustalo',
                                                 'b'))
    def test_clustalo(self, callMock):
        self.func_args['prediction_method'] = 'C-A-sub'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_clustalo"])

    @mock.patch("rna_blast_analyze.BR_core.predict_structures.compute_alifold",
                side_effect=exceptions.RNAalifoldException(
                    'expected_error_alifold', 'b'))
    def test_clustalo2(self, callMock):
        self.func_args['prediction_method'] = 'C-A-sub'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_alifold"])

    @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta",
                side_effect=exceptions.NoHomologousSequenceException())
    def test_nh_ext(self, callMock):
        self.func_args['prediction_method'] = 'rnafold'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertTrue(c[0].startswith('No sequence was inferred homologous'))

    @mock.patch("rna_blast_analyze.BR_core.predict_structures.rnafold_fasta",
                side_effect=exceptions.AmbiguousQuerySequenceException())
    def test_nh_ext2(self, callMock):
        self.func_args['prediction_method'] = 'rnafold'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertTrue(
            c[0].startswith('Query sequence contains ambiguous characters'))

    @mock.patch("rna_blast_analyze.BR_core.predict_structures.compute_refold",
                side_effect=exceptions.RefoldException('expected_error_refold',
                                                       'b'))
    def test_refold_pl(self, callMock):
        self.func_args['prediction_method'] = 'C-A-r-Rc'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_refold"])

    @mock.patch(
        "rna_blast_analyze.BR_core.predict_structures.run_clustal_profile2seqs_align",
        side_effect=exceptions.RefoldException('expected_error_clustalo', 'b'))
    def test_clustalo_profile(self, callMock):
        self.func_args['prediction_method'] = 'C-A-r-Rc'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_clustalo"])

    @mock.patch(
        "rna_blast_analyze.BR_core.predict_structures.run_hybrid_ss_min",
        side_effect=exceptions.HybridssminException(
            'expected_error_hybrid_ss_min', 'b'))
    def test_hybrid_ss_min(self, callMock):
        self.func_args['prediction_method'] = 'fq-sub'
        a, b, c = repredict_structures_for_homol_seqs(**self.func_args)
        self.assertIsNone(a)
        self.assertIsNone(b)
        self.assertEqual(c, ["expected_error_hybrid_ss_min"])

    def test_wrong_pm(self):
        self.func_args['prediction_method'] = 'blabla'
        with self.assertRaises(AssertionError):
            a, b, c = repredict_structures_for_homol_seqs(**self.func_args)