示例#1
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()'''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
示例#2
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()'''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
示例#3
0
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs):
    '''Run from SMARTS string and SMILES string. This is NOT recommended
    for library application, since initialization is pretty slow. You should
    separately initialize the template and molecules and call run()
    
    Args:
        reaction_smarts (str): Reaction SMARTS string
        reactant_smiles (str): Reactant SMILES string
        **kwargs: passed through to `rdchiralRun`

    Returns:
        list: List of outcomes from `rdchiralRun`
    '''
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    return rdchiralRun(rxn, reactants, **kwargs)
示例#4
0
        final_outcomes.add(smiles_new)
        mapped_outcomes[smiles_new] = (mapped_outcome, atoms_changed)
    ###############################################################################
    # One last fix for consolidating multiple stereospecified products...
    if combine_enantiomers:
        final_outcomes = combine_enantiomers_into_racemic(final_outcomes)
    ###############################################################################
    if return_mapped:
        return list(final_outcomes), mapped_outcomes
    else:
        return list(final_outcomes)


if __name__ == '__main__':
    # Directly use SMILES/SMARTS
    reaction_smarts = '[C:1][OH:2]>>[C:1][O:2][C]'
    reactant_smiles = 'OCC(=O)OCCCO'
    outcomes = rdchiralRunText(reaction_smarts, reactant_smiles)
    print(outcomes)

    # Pre-initialize
    rxn = rdchiralReaction(reaction_smarts)
    reactants = rdchiralReactants(reactant_smiles)
    outcomes = rdchiralRun(rxn, reactants)
    print(outcomes)

    # Get list of atoms that changed as well
    outcomes, mapped_outcomes = rdchiralRun(rxn, reactants, return_mapped=True)
    print(outcomes, mapped_outcomes)
示例#5
0
    def load_from_file(self,
                       retro,
                       file_path,
                       chiral=False,
                       rxns=True,
                       refs=False,
                       efgs=False,
                       rxn_ex=False):
        '''
        Read the template database from a previously saved file, of which the path is specified in the general
        configuration
        retro: whether in the retrosynthetic direction
        file_path: .pickle file to read dumped templates from 
        chiral: whether to handle chirality properly (only for retro for now)
        rxns : whether or not to actually load the reaction objects (or just the info)
        '''

        print('Loading templates from {}'.format(file_path))

        if os.path.isfile(file_path):
            with open(file_path, 'rb') as file:
                if retro and chiral and rxns:  # cannot pickle rdchiralReactions, so need to reload from SMARTS
                    pickle_templates = pickle.load(file)
                    self.templates = []
                    for template in pickle_templates:
                        try:
                            template['rxn'] = rdchiralReaction(
                                str('(' + template['reaction_smarts'].replace(
                                    '>>', ')>>(') + ')'))
                        except Exception as e:
                            template['rxn'] = None
                        self.templates.append(template)
                else:
                    self.templates = pickle.load(file)
        else:
            print("No file to read data from.")
            raise IOError('File not found to load template_transformer from!')

        # Clear out unnecessary info
        if not refs:
            [
                self.templates[i].pop('references', None)
                for i in range(len(self.templates))
            ]
        elif 'references' not in self.templates[0]:
            raise IOError(
                'Save file does not contain references (which were requested!)'
            )

        if not efgs:
            [
                self.templates[i].pop('efgs', None)
                for i in range(len(self.templates))
            ]
        elif 'efgs' not in self.templates[0]:
            raise IOError(
                'Save file does not contain efg info (which was requested!)')

        if not rxn_ex:
            [
                self.templates[i].pop('rxn_example', None)
                for i in range(len(self.templates))
            ]
        elif 'rxn_example' not in self.templates[0]:
            raise IOError(
                'Save file does not contain a reaction example (which was requested!)'
            )

        self.num_templates = len(self.templates)
        print('Loaded templates. Using {} templates'.format(
            self.num_templates))
示例#6
0
    def load_from_database(self,
                           retro,
                           chiral=False,
                           refs=False,
                           rxns=True,
                           efgs=False,
                           rxn_ex=False):
        """Read the template data from the database.

        Args:
            retro (bool): Whether in the retrosynthetic direction.
            chiral (bool, optional): Whether to handle chirality properly
                (only for retro for now). (default: {False})
            refs (bool, optional): Whether to include references.
                (default: {False})
            rxns (bool, optional): Whether to actually load the reaction objects
                (or just the info). (default: {True})
            efgs (bool, optional): Whether to include efg information.
                (default: {False})
            rxn_ex (bool, optional): Whether to include reaction examples.
                (default: {False})
        """
        # Save collection TEMPLATE_DB
        self.load_databases(retro, chiral=chiral)
        self.chiral = chiral
        if self.lookup_only:
            return
        if self.mincount and 'count' in self.TEMPLATE_DB.find_one():
            if retro:
                filter_dict = {
                    'count': {
                        '$gte': min(self.mincount, self.mincount_chiral)
                    }
                }
            else:
                filter_dict = {'count': {'$gte': self.mincount}}
        else:
            filter_dict = {}

        # Look for all templates in collection
        to_retrieve = [
            '_id', 'reaction_smarts', 'necessary_reagent', 'count',
            'intra_only', 'dimer_only'
        ]
        if refs:
            to_retrieve.append('references')
        if efgs:
            to_retrieve.append('efgs')
        if rxn_ex:
            to_retrieve.append('rxn_example')
        for document in self.TEMPLATE_DB.find(filter_dict, to_retrieve):
            # Skip if no reaction SMARTS
            if 'reaction_smarts' not in document:
                continue
            reaction_smarts = str(document['reaction_smarts'])
            if not reaction_smarts:
                continue

            if retro:
                # different thresholds for chiral and non chiral reactions
                chiral_rxn = False
                for c in reaction_smarts:
                    if c in ('@', '/', '\\'):
                        chiral_rxn = True
                        break

                if chiral_rxn and document['count'] < self.mincount_chiral:
                    continue
                if not chiral_rxn and document['count'] < self.mincount:
                    continue

            # Define dictionary
            template = {
                'name':
                document['name'] if 'name' in document else '',
                'reaction_smarts':
                reaction_smarts,
                'incompatible_groups':
                document['incompatible_groups']
                if 'incompatible_groups' in document else [],
                'reference':
                document['reference'] if 'reference' in document else '',
                'references':
                document['references'] if 'references' in document else [],
                'rxn_example':
                document['rxn_example'] if 'rxn_example' in document else '',
                'explicit_H':
                document['explicit_H'] if 'explicit_H' in document else False,
                '_id':
                document['_id'] if '_id' in document else -1,
                'product_smiles':
                document['product_smiles']
                if 'product_smiles' in document else [],
                'necessary_reagent':
                document['necessary_reagent']
                if 'necessary_reagent' in document else '',
                'efgs':
                document['efgs'] if 'efgs' in document else None,
                'intra_only':
                document['intra_only'] if 'intra_only' in document else False,
                'dimer_only':
                document['dimer_only'] if 'dimer_only' in document else False,
            }
            if retro:
                template['chiral'] = chiral_rxn

            # Frequency/popularity score
            if 'count' in document:
                template['count'] = document['count']
            else:
                template['count'] = 1

            # Define reaction in RDKit and validate
            if rxns:
                try:
                    # Force reactants and products to be one pseudo-molecule (bookkeeping)
                    reaction_smarts_one = '(' + reaction_smarts.replace(
                        '>>', ')>>(') + ')'

                    if retro:
                        if chiral:
                            rxn = rdchiralReaction(str(reaction_smarts_one))
                            template['rxn'] = rxn
                        else:
                            rxn = AllChem.ReactionFromSmarts(
                                str(reaction_smarts_one))
                            if rxn.Validate()[1] == 0:
                                template['rxn'] = rxn
                            else:
                                template['rxn'] = None
                    else:
                        rxn_f = AllChem.ReactionFromSmarts(reaction_smarts_one)
                        if rxn_f.Validate()[1] == 0:
                            template['rxn_f'] = rxn_f
                        else:
                            template['rxn_f'] = None

                except Exception as e:
                    if gc.DEBUG:
                        MyLogger.print_and_log('Couldnt load : {}: {}'.format(
                            reaction_smarts_one, e),
                                               transformer_loc,
                                               level=1)
                    template['rxn'] = None
                    template['rxn_f'] = None

            # Add to list
            self.templates.append(template)

        self.reorder()
示例#7
0
    def load_from_file(self,
                       retro,
                       file_path,
                       chiral=False,
                       rxns=True,
                       refs=False,
                       efgs=False,
                       rxn_ex=False):
        """Read the template database from a previously saved file.

        Args:
            retro (bool): Whether in the retrosynthetic direction.
            file_path (str): Pickle file to read dumped templates from.
            chiral (bool, optional): Whether to handle chirality properly
                (only for retro for now). (default: {False})
            rxns (bool, optional): Whether to actually load the reaction objects
                (or just the info). (default: {True})
            refs (bool, optional): Whether to include references.
                (default: {False})
            efgs (bool, optional): Whether to include efg information.
                (default: {False})
            rxn_ex (bool, optional): Whether to include reaction examples.
                (default: {False})
        """

        MyLogger.print_and_log('Loading templates from {}'.format(file_path),
                               transformer_loc)

        if os.path.isfile(file_path):
            with open(file_path, 'rb') as file:
                if retro and chiral and rxns:  # cannot pickle rdchiralReactions, so need to reload from SMARTS
                    pickle_templates = pickle.load(file)
                    self.templates = []
                    for template in pickle_templates:
                        try:
                            template['rxn'] = rdchiralReaction(
                                str('(' + template['reaction_smarts'].replace(
                                    '>>', ')>>(') + ')'))
                        except Exception as e:
                            template['rxn'] = None
                        self.templates.append(template)
                else:
                    self.templates = pickle.load(file)
        else:
            MyLogger.print_and_log("No file to read data from.",
                                   transformer_loc,
                                   level=1)
            raise IOError('File not found to load template_transformer from!')

        # Clear out unnecessary info
        if not refs:
            [
                self.templates[i].pop('references', None)
                for i in range(len(self.templates))
            ]
        elif 'references' not in self.templates[0]:
            raise IOError(
                'Save file does not contain references (which were requested!)'
            )

        if not efgs:
            [
                self.templates[i].pop('efgs', None)
                for i in range(len(self.templates))
            ]
        elif 'efgs' not in self.templates[0]:
            raise IOError(
                'Save file does not contain efg info (which was requested!)')

        if not rxn_ex:
            [
                self.templates[i].pop('rxn_example', None)
                for i in range(len(self.templates))
            ]
        elif 'rxn_example' not in self.templates[0]:
            raise IOError(
                'Save file does not contain a reaction example (which was requested!)'
            )

        self.num_templates = len(self.templates)
        MyLogger.print_and_log(
            'Loaded templates. Using {} templates'.format(self.num_templates),
            transformer_loc)
示例#8
0
    def doc_to_template(self, document, retro=True):
        """Returns a template given a document from the database or file.

        Args:
            document (dict): Document of template from database or file.

        Returns:
            dict: Retrosynthetic template.
        """
        if 'reaction_smarts' not in document:
            return
        reaction_smarts = str(document['reaction_smarts'])
        if not reaction_smarts:
            return

        if not retro:
            document['rxn_f'] = AllChem.ReactionFromSmarts(reaction_smarts)
            return document

        # different thresholds for chiral and non chiral reactions
        chiral_rxn = False
        for c in reaction_smarts:
            if c in ('@', '/', '\\'):
                chiral_rxn = True
                break

        # Define dictionary
        template = {
            'name':
            document['name'] if 'name' in document else '',
            'reaction_smarts':
            reaction_smarts,
            'incompatible_groups':
            document['incompatible_groups']
            if 'incompatible_groups' in document else [],
            'reference':
            document['reference'] if 'reference' in document else '',
            'references':
            document['references'] if 'references' in document else [],
            'rxn_example':
            document['rxn_example'] if 'rxn_example' in document else '',
            'explicit_H':
            document['explicit_H'] if 'explicit_H' in document else False,
            '_id':
            document['_id'] if '_id' in document else -1,
            'product_smiles':
            document['product_smiles'] if 'product_smiles' in document else [],
            'necessary_reagent':
            document['necessary_reagent']
            if 'necessary_reagent' in document else '',
            'efgs':
            document['efgs'] if 'efgs' in document else None,
            'intra_only':
            document['intra_only'] if 'intra_only' in document else False,
            'dimer_only':
            document['dimer_only'] if 'dimer_only' in document else False,
            'template_set':
            document.get('template_set', ''),
            'index':
            document.get('index')
        }
        template['chiral'] = chiral_rxn

        # Frequency/popularity score
        template['count'] = document.get('count', 1)

        # Define reaction in RDKit and validate
        try:
            # Force reactants and products to be one pseudo-molecule (bookkeeping)
            reaction_smarts_one = '(' + reaction_smarts.replace('>>',
                                                                ')>>(') + ')'

            rxn = rdchiralReaction(str(reaction_smarts_one))
            template['rxn'] = rxn

        except Exception as e:
            if gc.DEBUG:
                MyLogger.print_and_log('Couldnt load : {}: {}'.format(
                    reaction_smarts_one, e),
                                       transformer_loc,
                                       level=1)
            template['rxn'] = None
        return template
示例#9
0
    def apply_one_template_by_idx(
        self,
        _id,
        smiles,
        template_idx,
        calculate_next_probs=True,
        fast_filter_threshold=0.75,
        max_num_templates=100,
        max_cum_prob=0.995,
        template_prioritizer=None,
        template_set=None,
        fast_filter=None,
        use_ban_list=True,
    ):
        """Applies one template by index.

        Args:
            _id (int): Pathway id used by tree builder.
            smiles (str): SMILES string of molecule to apply template to.
            template_idx (int): index of template to apply.
            calculate_next_probs (bool): F*g to caculate probabilies (template 
                relevance scores) for precursors generated by template 
                application.
            fast_filter_threshold (float): Fast filter threshold to filter
                bad predictions. 1.0 means use all templates.
            max_num_templates (int): Maximum number of template scores and 
                indices to return when calculating next probabilities.
            max_cum_prob (float): Maximum cumulative probabilites to use 
                when returning next probabilities.
            template_prioritizer (Prioritizer): Use to override
                prioritizer created during initialization. This can be 
                any Prioritizer instance that implements a predict method 
                that accepts (smiles, templates, max_num_templates, max_cum_prob) 
                as arguments and returns a (scores, indices) for templates
                up until max_num_templates or max_cum_prob.
            template_set (str): Name of template set to use when multiple 
                template sets are available.

        Returns:
            List of outcomes wth (_id, smiles, template_idx, precursors, fast_filter_score)
        """
        if template_prioritizer is None:
            template_prioritizer = self.template_prioritizer

        if template_set is None:
            template_set = self.template_set

        if fast_filter == None:
            fast_filter = self.fast_filter

        mol = Chem.MolFromSmiles(smiles)
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
        mol = rdchiralReactants(smiles)

        all_outcomes = []
        seen_reactants = {}
        seen_reactant_combos = []

        if use_ban_list and smiles in BANNED_SMILES:
            return [(_id, smiles, template_idx, [], 0.0)]  # dummy outcome

        template = self.get_one_template_by_idx(template_idx, template_set)
        template['rxn'] = rdchiralReaction(template['reaction_smarts'])

        for precursor in self.apply_one_template(mol, template):
            reactant_smiles = precursor['smiles']
            if reactant_smiles in seen_reactant_combos:
                continue
            seen_reactant_combos.append(reactant_smiles)
            fast_filter_score = fast_filter(reactant_smiles, smiles)
            if fast_filter_score < fast_filter_threshold:
                continue

            reactants = []
            if calculate_next_probs:
                for reactant_smi in precursor['smiles_split']:
                    if reactant_smi not in seen_reactants:
                        scores, indeces = template_prioritizer.predict(
                            reactant_smi,
                            max_num_templates=max_num_templates,
                            max_cum_prob=max_cum_prob)
                        # scores and indeces will be passed through celery, need to be lists
                        scores = scores.tolist()
                        indeces = indeces.tolist()
                        value = 1
                        seen_reactants[reactant_smi] = (reactant_smi, scores,
                                                        indeces, value)
                    reactants.append(seen_reactants[reactant_smi])
                all_outcomes.append(
                    (_id, smiles, template_idx, reactants, fast_filter_score))
            else:
                all_outcomes.append(
                    (_id, smiles, template_idx, precursor['smiles_split'],
                     fast_filter_score))
        if not all_outcomes:
            all_outcomes.append(
                (_id, smiles, template_idx, [], 0.0))  # dummy outcome

        return all_outcomes