def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs): '''Run from SMARTS string and SMILES string. This is NOT recommended for library application, since initialization is pretty slow. You should separately initialize the template and molecules and call run()''' rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) return rdchiralRun(rxn, reactants, **kwargs)
def rdchiralRunText(reaction_smarts, reactant_smiles, **kwargs): '''Run from SMARTS string and SMILES string. This is NOT recommended for library application, since initialization is pretty slow. You should separately initialize the template and molecules and call run() Args: reaction_smarts (str): Reaction SMARTS string reactant_smiles (str): Reactant SMILES string **kwargs: passed through to `rdchiralRun` Returns: list: List of outcomes from `rdchiralRun` ''' rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) return rdchiralRun(rxn, reactants, **kwargs)
final_outcomes.add(smiles_new) mapped_outcomes[smiles_new] = (mapped_outcome, atoms_changed) ############################################################################### # One last fix for consolidating multiple stereospecified products... if combine_enantiomers: final_outcomes = combine_enantiomers_into_racemic(final_outcomes) ############################################################################### if return_mapped: return list(final_outcomes), mapped_outcomes else: return list(final_outcomes) if __name__ == '__main__': # Directly use SMILES/SMARTS reaction_smarts = '[C:1][OH:2]>>[C:1][O:2][C]' reactant_smiles = 'OCC(=O)OCCCO' outcomes = rdchiralRunText(reaction_smarts, reactant_smiles) print(outcomes) # Pre-initialize rxn = rdchiralReaction(reaction_smarts) reactants = rdchiralReactants(reactant_smiles) outcomes = rdchiralRun(rxn, reactants) print(outcomes) # Get list of atoms that changed as well outcomes, mapped_outcomes = rdchiralRun(rxn, reactants, return_mapped=True) print(outcomes, mapped_outcomes)
def load_from_file(self, retro, file_path, chiral=False, rxns=True, refs=False, efgs=False, rxn_ex=False): ''' Read the template database from a previously saved file, of which the path is specified in the general configuration retro: whether in the retrosynthetic direction file_path: .pickle file to read dumped templates from chiral: whether to handle chirality properly (only for retro for now) rxns : whether or not to actually load the reaction objects (or just the info) ''' print('Loading templates from {}'.format(file_path)) if os.path.isfile(file_path): with open(file_path, 'rb') as file: if retro and chiral and rxns: # cannot pickle rdchiralReactions, so need to reload from SMARTS pickle_templates = pickle.load(file) self.templates = [] for template in pickle_templates: try: template['rxn'] = rdchiralReaction( str('(' + template['reaction_smarts'].replace( '>>', ')>>(') + ')')) except Exception as e: template['rxn'] = None self.templates.append(template) else: self.templates = pickle.load(file) else: print("No file to read data from.") raise IOError('File not found to load template_transformer from!') # Clear out unnecessary info if not refs: [ self.templates[i].pop('references', None) for i in range(len(self.templates)) ] elif 'references' not in self.templates[0]: raise IOError( 'Save file does not contain references (which were requested!)' ) if not efgs: [ self.templates[i].pop('efgs', None) for i in range(len(self.templates)) ] elif 'efgs' not in self.templates[0]: raise IOError( 'Save file does not contain efg info (which was requested!)') if not rxn_ex: [ self.templates[i].pop('rxn_example', None) for i in range(len(self.templates)) ] elif 'rxn_example' not in self.templates[0]: raise IOError( 'Save file does not contain a reaction example (which was requested!)' ) self.num_templates = len(self.templates) print('Loaded templates. Using {} templates'.format( self.num_templates))
def load_from_database(self, retro, chiral=False, refs=False, rxns=True, efgs=False, rxn_ex=False): """Read the template data from the database. Args: retro (bool): Whether in the retrosynthetic direction. chiral (bool, optional): Whether to handle chirality properly (only for retro for now). (default: {False}) refs (bool, optional): Whether to include references. (default: {False}) rxns (bool, optional): Whether to actually load the reaction objects (or just the info). (default: {True}) efgs (bool, optional): Whether to include efg information. (default: {False}) rxn_ex (bool, optional): Whether to include reaction examples. (default: {False}) """ # Save collection TEMPLATE_DB self.load_databases(retro, chiral=chiral) self.chiral = chiral if self.lookup_only: return if self.mincount and 'count' in self.TEMPLATE_DB.find_one(): if retro: filter_dict = { 'count': { '$gte': min(self.mincount, self.mincount_chiral) } } else: filter_dict = {'count': {'$gte': self.mincount}} else: filter_dict = {} # Look for all templates in collection to_retrieve = [ '_id', 'reaction_smarts', 'necessary_reagent', 'count', 'intra_only', 'dimer_only' ] if refs: to_retrieve.append('references') if efgs: to_retrieve.append('efgs') if rxn_ex: to_retrieve.append('rxn_example') for document in self.TEMPLATE_DB.find(filter_dict, to_retrieve): # Skip if no reaction SMARTS if 'reaction_smarts' not in document: continue reaction_smarts = str(document['reaction_smarts']) if not reaction_smarts: continue if retro: # different thresholds for chiral and non chiral reactions chiral_rxn = False for c in reaction_smarts: if c in ('@', '/', '\\'): chiral_rxn = True break if chiral_rxn and document['count'] < self.mincount_chiral: continue if not chiral_rxn and document['count'] < self.mincount: continue # Define dictionary template = { 'name': document['name'] if 'name' in document else '', 'reaction_smarts': reaction_smarts, 'incompatible_groups': document['incompatible_groups'] if 'incompatible_groups' in document else [], 'reference': document['reference'] if 'reference' in document else '', 'references': document['references'] if 'references' in document else [], 'rxn_example': document['rxn_example'] if 'rxn_example' in document else '', 'explicit_H': document['explicit_H'] if 'explicit_H' in document else False, '_id': document['_id'] if '_id' in document else -1, 'product_smiles': document['product_smiles'] if 'product_smiles' in document else [], 'necessary_reagent': document['necessary_reagent'] if 'necessary_reagent' in document else '', 'efgs': document['efgs'] if 'efgs' in document else None, 'intra_only': document['intra_only'] if 'intra_only' in document else False, 'dimer_only': document['dimer_only'] if 'dimer_only' in document else False, } if retro: template['chiral'] = chiral_rxn # Frequency/popularity score if 'count' in document: template['count'] = document['count'] else: template['count'] = 1 # Define reaction in RDKit and validate if rxns: try: # Force reactants and products to be one pseudo-molecule (bookkeeping) reaction_smarts_one = '(' + reaction_smarts.replace( '>>', ')>>(') + ')' if retro: if chiral: rxn = rdchiralReaction(str(reaction_smarts_one)) template['rxn'] = rxn else: rxn = AllChem.ReactionFromSmarts( str(reaction_smarts_one)) if rxn.Validate()[1] == 0: template['rxn'] = rxn else: template['rxn'] = None else: rxn_f = AllChem.ReactionFromSmarts(reaction_smarts_one) if rxn_f.Validate()[1] == 0: template['rxn_f'] = rxn_f else: template['rxn_f'] = None except Exception as e: if gc.DEBUG: MyLogger.print_and_log('Couldnt load : {}: {}'.format( reaction_smarts_one, e), transformer_loc, level=1) template['rxn'] = None template['rxn_f'] = None # Add to list self.templates.append(template) self.reorder()
def load_from_file(self, retro, file_path, chiral=False, rxns=True, refs=False, efgs=False, rxn_ex=False): """Read the template database from a previously saved file. Args: retro (bool): Whether in the retrosynthetic direction. file_path (str): Pickle file to read dumped templates from. chiral (bool, optional): Whether to handle chirality properly (only for retro for now). (default: {False}) rxns (bool, optional): Whether to actually load the reaction objects (or just the info). (default: {True}) refs (bool, optional): Whether to include references. (default: {False}) efgs (bool, optional): Whether to include efg information. (default: {False}) rxn_ex (bool, optional): Whether to include reaction examples. (default: {False}) """ MyLogger.print_and_log('Loading templates from {}'.format(file_path), transformer_loc) if os.path.isfile(file_path): with open(file_path, 'rb') as file: if retro and chiral and rxns: # cannot pickle rdchiralReactions, so need to reload from SMARTS pickle_templates = pickle.load(file) self.templates = [] for template in pickle_templates: try: template['rxn'] = rdchiralReaction( str('(' + template['reaction_smarts'].replace( '>>', ')>>(') + ')')) except Exception as e: template['rxn'] = None self.templates.append(template) else: self.templates = pickle.load(file) else: MyLogger.print_and_log("No file to read data from.", transformer_loc, level=1) raise IOError('File not found to load template_transformer from!') # Clear out unnecessary info if not refs: [ self.templates[i].pop('references', None) for i in range(len(self.templates)) ] elif 'references' not in self.templates[0]: raise IOError( 'Save file does not contain references (which were requested!)' ) if not efgs: [ self.templates[i].pop('efgs', None) for i in range(len(self.templates)) ] elif 'efgs' not in self.templates[0]: raise IOError( 'Save file does not contain efg info (which was requested!)') if not rxn_ex: [ self.templates[i].pop('rxn_example', None) for i in range(len(self.templates)) ] elif 'rxn_example' not in self.templates[0]: raise IOError( 'Save file does not contain a reaction example (which was requested!)' ) self.num_templates = len(self.templates) MyLogger.print_and_log( 'Loaded templates. Using {} templates'.format(self.num_templates), transformer_loc)
def doc_to_template(self, document, retro=True): """Returns a template given a document from the database or file. Args: document (dict): Document of template from database or file. Returns: dict: Retrosynthetic template. """ if 'reaction_smarts' not in document: return reaction_smarts = str(document['reaction_smarts']) if not reaction_smarts: return if not retro: document['rxn_f'] = AllChem.ReactionFromSmarts(reaction_smarts) return document # different thresholds for chiral and non chiral reactions chiral_rxn = False for c in reaction_smarts: if c in ('@', '/', '\\'): chiral_rxn = True break # Define dictionary template = { 'name': document['name'] if 'name' in document else '', 'reaction_smarts': reaction_smarts, 'incompatible_groups': document['incompatible_groups'] if 'incompatible_groups' in document else [], 'reference': document['reference'] if 'reference' in document else '', 'references': document['references'] if 'references' in document else [], 'rxn_example': document['rxn_example'] if 'rxn_example' in document else '', 'explicit_H': document['explicit_H'] if 'explicit_H' in document else False, '_id': document['_id'] if '_id' in document else -1, 'product_smiles': document['product_smiles'] if 'product_smiles' in document else [], 'necessary_reagent': document['necessary_reagent'] if 'necessary_reagent' in document else '', 'efgs': document['efgs'] if 'efgs' in document else None, 'intra_only': document['intra_only'] if 'intra_only' in document else False, 'dimer_only': document['dimer_only'] if 'dimer_only' in document else False, 'template_set': document.get('template_set', ''), 'index': document.get('index') } template['chiral'] = chiral_rxn # Frequency/popularity score template['count'] = document.get('count', 1) # Define reaction in RDKit and validate try: # Force reactants and products to be one pseudo-molecule (bookkeeping) reaction_smarts_one = '(' + reaction_smarts.replace('>>', ')>>(') + ')' rxn = rdchiralReaction(str(reaction_smarts_one)) template['rxn'] = rxn except Exception as e: if gc.DEBUG: MyLogger.print_and_log('Couldnt load : {}: {}'.format( reaction_smarts_one, e), transformer_loc, level=1) template['rxn'] = None return template
def apply_one_template_by_idx( self, _id, smiles, template_idx, calculate_next_probs=True, fast_filter_threshold=0.75, max_num_templates=100, max_cum_prob=0.995, template_prioritizer=None, template_set=None, fast_filter=None, use_ban_list=True, ): """Applies one template by index. Args: _id (int): Pathway id used by tree builder. smiles (str): SMILES string of molecule to apply template to. template_idx (int): index of template to apply. calculate_next_probs (bool): F*g to caculate probabilies (template relevance scores) for precursors generated by template application. fast_filter_threshold (float): Fast filter threshold to filter bad predictions. 1.0 means use all templates. max_num_templates (int): Maximum number of template scores and indices to return when calculating next probabilities. max_cum_prob (float): Maximum cumulative probabilites to use when returning next probabilities. template_prioritizer (Prioritizer): Use to override prioritizer created during initialization. This can be any Prioritizer instance that implements a predict method that accepts (smiles, templates, max_num_templates, max_cum_prob) as arguments and returns a (scores, indices) for templates up until max_num_templates or max_cum_prob. template_set (str): Name of template set to use when multiple template sets are available. Returns: List of outcomes wth (_id, smiles, template_idx, precursors, fast_filter_score) """ if template_prioritizer is None: template_prioritizer = self.template_prioritizer if template_set is None: template_set = self.template_set if fast_filter == None: fast_filter = self.fast_filter mol = Chem.MolFromSmiles(smiles) smiles = Chem.MolToSmiles(mol, isomericSmiles=True) mol = rdchiralReactants(smiles) all_outcomes = [] seen_reactants = {} seen_reactant_combos = [] if use_ban_list and smiles in BANNED_SMILES: return [(_id, smiles, template_idx, [], 0.0)] # dummy outcome template = self.get_one_template_by_idx(template_idx, template_set) template['rxn'] = rdchiralReaction(template['reaction_smarts']) for precursor in self.apply_one_template(mol, template): reactant_smiles = precursor['smiles'] if reactant_smiles in seen_reactant_combos: continue seen_reactant_combos.append(reactant_smiles) fast_filter_score = fast_filter(reactant_smiles, smiles) if fast_filter_score < fast_filter_threshold: continue reactants = [] if calculate_next_probs: for reactant_smi in precursor['smiles_split']: if reactant_smi not in seen_reactants: scores, indeces = template_prioritizer.predict( reactant_smi, max_num_templates=max_num_templates, max_cum_prob=max_cum_prob) # scores and indeces will be passed through celery, need to be lists scores = scores.tolist() indeces = indeces.tolist() value = 1 seen_reactants[reactant_smi] = (reactant_smi, scores, indeces, value) reactants.append(seen_reactants[reactant_smi]) all_outcomes.append( (_id, smiles, template_idx, reactants, fast_filter_score)) else: all_outcomes.append( (_id, smiles, template_idx, precursor['smiles_split'], fast_filter_score)) if not all_outcomes: all_outcomes.append( (_id, smiles, template_idx, [], 0.0)) # dummy outcome return all_outcomes