def get_sgs(record_dict, n_min, n_max, method="exhaustive"): if method == "exhaustive": return Chem.rdmolops.FindAllSubgraphsOfLengthMToN( record_dict["mol"], n_min, n_max) elif method == "RECAP": hierarchy = Recap.RecapDecompose(record_dict["mol"]) sgs = [] for substructure in hierarchy.GetAllChildren().values(): substructure = Chem.DeleteSubstructs(substructure.mol, Chem.MolFromSmarts('[#0]')) edge_idxs = get_substructure_bond_idx(substructure, record_dict["mol"]) if edge_idxs is not None: sgs.append(edge_idxs) return subset_sgs_sizes([sgs], n_min, n_max) elif method == "BRICS": substructures = BRICS.BRICSDecompose(record_dict["mol"]) sgs = [] for substructure in substructures: substructure = Chem.DeleteSubstructs( Chem.MolFromSmiles(substructure), Chem.MolFromSmarts('[#0]')) edge_idxs = get_substructure_bond_idx(substructure, record_dict["mol"]) if edge_idxs is not None: sgs.append(edge_idxs) return subset_sgs_sizes([sgs], n_min, n_max)
def get_fragments(self): fragments = None if False not in [self.is_mol(), self.is_small(), self.has_2_rings()]: # 3 requirements fulfilled fragments = [] # 1st add scf of the fragments hierarch = Recap.RecapDecompose(self.get_mol()) ks = hierarch.children for s, obj in ks.items(): m = obj.mol if (m is None) or (Chem.GetSSSR(m) < 2): continue # Fragments into scaffolds conversion try: core = MurckoScaffold.GetScaffoldForMol(m) except ValueError: # scf calculation not possible continue smiles_scf = Chem.MolToSmiles(core) if Chem.MolFromSmiles(smiles_scf) is None: continue fragments.append(smiles_scf) # 2nd add scf of itself try: core = MurckoScaffold.GetScaffoldForMol(self.get_mol()) smiles_scf = Chem.MolToSmiles(core) if Chem.MolFromSmiles(smiles_scf) is not None: fragments.append(smiles_scf) except ValueError: # scf calculation not possible pass return (fragments)
def __init__(self, smi, lib_smiles, core_smi=None, lib_file=None, fam_sub=True): self.smi = Chem.MolFromSmiles(smi) if lib_file is None: self.lib = gen_lib(lib_smiles) else: self.lib = load_lib(lib_file) if not fam_sub: self.core = Expand.get_stripped_core(self, core_smi) self.expanded = Expand.add(self) else: main_fragments = Recap.RecapDecompose(self.smi).children.keys() all_fragments = copy.deepcopy(self.lib) all_fragments.extend(list(main_fragments)) affin_matrix = Expand.build_affin_matrix(self, all_fragments) families = Expand.build_families(self, all_fragments, affin_matrix) self.lib = Expand.get_family_map(self, families, main_fragments) self.expanded = Expand.fam_add(self) for x in self.expanded: print(x)
def pair_frags(fname, out, method='Recap', is_mf=True): smiles = pd.read_table(fname).Smiles.dropna() pairs = [] for i, smile in enumerate(tqdm(smiles)): smile = utils.clean_mol(smile) mol = Chem.MolFromSmiles(smile) if method == 'recap': frags = np.array(sorted(Recap.RecapDecompose(mol).GetLeaves().keys())) else: frags = BRICS.BRICSDecompose(mol) frags = np.array(sorted({re.sub(r'\[\d+\*\]', '*', f) for f in frags})) if len(frags) == 1: continue du, hy = Chem.MolFromSmiles('*'), Chem.MolFromSmiles('[H]') subs = np.array([Chem.MolFromSmiles(f) for f in frags]) subs = np.array([Chem.RemoveHs(Chem.ReplaceSubstructs(f, du, hy, replaceAll=True)[0]) for f in subs]) subs = np.array([m for m in subs if m.GetNumAtoms() > 1]) match = np.array([[m.HasSubstructMatch(f) for f in subs] for m in subs]) frags = subs[match.sum(axis=0) == 1] frags = sorted(frags, key=lambda x:-x.GetNumAtoms())[:voc.n_frags] frags = [Chem.MolToSmiles(Chem.RemoveHs(f)) for f in frags] max_comb = len(frags) if is_mf else 1 for ix in range(1, max_comb+1): combs = combinations(frags, ix) for comb in combs: input = '.'.join(comb) if len(input) > len(smile): continue if mol.HasSubstructMatch(Chem.MolFromSmarts(input)): pairs.append([input, smile]) df = pd.DataFrame(pairs, columns=['Frags', 'Smiles']) df.to_csv(out, sep='\t', index=False)
def recap( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Fragment the molecule using the recap algorithm. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ res = Recap.RecapDecompose(mol) frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def main(name, argv): if len(argv) != 2: print_usage(name) return with open(argv[0], 'r') as f: smile = f.readline().split()[0] mol = Chem.MolFromSmiles(smile) size = mol.GetNumHeavyAtoms() hierarch = Recap.RecapDecompose(mol) children = [] for child in hierarch.GetAllChildren().keys() + [smile]: new_smiles = child.replace('[*]', '[H]') new = Chem.MolFromSmiles(new_smiles) if not new == None: new_size = new.GetNumHeavyAtoms() if new_size > 7 and new_size <= 25: if rdMolDescriptors.CalcNumRotatableBonds(new) <= 5: children.append(Chem.MolToSmiles(new, isomericSmiles=True)) #children.append(new_smiles) core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles( new_smiles, includeChirality=True) core = Chem.MolFromSmiles(core_smile) if rdMolDescriptors.CalcNumRotatableBonds( core) <= 5 and core.GetNumHeavyAtoms() > 7: children.append(core_smile) with open(argv[1], 'w') as f: i = 1 for m in set(children): if len(m) > 0: f.write(m + '\t' + str(i) + '\n') i += 1
def make_feat_data(mol, offset=1): res = [] check_atom = set() nohmol = Chem.RemoveHs(mol) recap_res = Recap.RecapDecompose(nohmol) leaves = [ key.replace('*', '').replace('()', '') for key in recap_res.GetLeaves().keys() ] leaves = [leave.replace('[H]', '') for leave in leaves if leave != '[H]'] leaves = sorted(leaves, key=lambda x: Chem.MolFromSmarts(x).GetNumAtoms(), reverse=True) if len(leaves) == 0: line = [i for i in range(mol.GetNumAtoms())] line = [str(n + offset) for n in line] line = [Chem.MolToSmiles(mol)] + line return [line] for leavsmi in leaves: leav = Chem.MolFromSmarts(leavsmi) matches = mol.GetSubstructMatches(leav) for i, match in enumerate(matches): line = list(match) if len(check_atom & set(line)) > 0: continue check_atom = check_atom | set(line) for idx in match: nei = get_neighbor_h(idx, mol) line += nei line = [str(j + offset) for j in line] line = [leavsmi + '_' + str(i)] + line res.append(line) return res
def get_frag_list(mol): hierarch = Recap.RecapDecompose(mol, minFragmentSize=5) frag_list = list() for frag_smi in hierarch.GetLeaves().keys(): frag_mol = Chem.MolFromSmiles(frag_smi) frag_list.append(frag_mol) return frag_list
def get_stripped_core(self, core_smi): if core_smi is None: print('No core provided generating core with Murcko Scaffold') core = MurckoScaffold.GetScaffoldForMol(self.smi) if Chem.MolToSmiles(core) == Chem.MolToSmiles(self.smi): print( 'Murcko Scaffold failed selecting largest fragment as core' ) hierarch = Recap.RecapDecompose(self.smi).children.keys() tmp = Chem.MolFromSmiles(max(hierarch, key=len)) else: core = Chem.MolFromSmiles(core_smi) tmp = Chem.ReplaceSidechains(self.smi, core) return tmp
def get_bicyclic(mol): bicyclic = [] leaves = Recap.RecapDecompose(mol).GetLeaves() if len(leaves) != 0: fragments = leaves.keys() for fragment in fragments: fragment_obj = Chem.MolFromSmiles(fragment) sssr = Chem.GetSSSR(fragment_obj) if sssr >= 2: scaffold = Chem.MurckoDecompose(fragment_obj) bicyclic.append(Chem.MolToSmiles(scaffold)) else: sssr = Chem.GetSSSR(mol) if sssr >= 2: scaffold = Chem.MurckoDecompose(mol) bicyclic.append(Chem.MolToSmiles(scaffold)) return bicyclic
def build_library(in_smile, frags, lib, rules=os.environ["COVALIB"] + "/Code/Covalentizer/numbered_reaction.re", linker_lib=False, linker_smiles=''): argv = [in_smile, frags, lib] with open(argv[0], 'r') as f: smile = f.readline().split()[0] mol = Chem.MolFromSmiles(smile) if mol == None: return size = mol.GetNumHeavyAtoms() hierarch = Recap.RecapDecompose(mol) children = [] for child in hierarch.GetAllChildren().keys() + [smile]: new_smiles = child.replace('[*]', '[H]') new = Chem.MolFromSmiles(new_smiles) if not new == None: new_size = new.GetNumHeavyAtoms() if new_size > 7: if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds( new) <= 5: children.append(Chem.MolToSmiles(new, isomericSmiles=True)) core_smile = MurckoScaffold.MurckoScaffoldSmilesFromSmiles( new_smiles, includeChirality=True) core = Chem.MolFromSmiles(core_smile) if new_size <= 25 and rdMolDescriptors.CalcNumRotatableBonds( core) <= 5 and core.GetNumHeavyAtoms() > 7: children.append(core_smile) with open(argv[1], 'w') as f: i = 1 for m in set(children): if len(m) > 0: f.write(m + '\t' + str(i) + '\n') i += 1 if not linker_lib: multi_react([rules, argv[1], argv[2]]) else: multi_linkers([rules, argv[1], argv[2]], linker_smiles)
ts.append(t2 - t1) if tests[10]: logger.info('Writing: Mol blocks') t1 = time.time() for mol in mols: mb = Chem.MolToMolBlock(mol) t2 = time.time() logger.info('Results10: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[11]: logger.info('RECAP decomposition') t1 = time.time() for mol in mols: d = Recap.RecapDecompose(mol) t2 = time.time() logger.info('Results11: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[12]: logger.info('Generate 2D coords') t1 = time.time() for mol in mols: AllChem.Compute2DCoords(mol) t2 = time.time() logger.info('Results12: %.2f seconds' % (t2 - t1)) ts.append(t2 - t1) if tests[13]: logger.info('Generate 3D coords')
'/drug_development/studyRdkit/st_rdcit/data/2d.rxn') ps = rxn.RunReactants(acid, base) print('the num of reactions', len(ps)) for reaction in ps: reactions_smi = Chem.MolToSimiles(reaction) print('this reaction is', reactions_smi) # 三、把分子切成片段 # 3.1 Recap方法 # Recap方法通过模拟实验室中常用的化学反应,将反应分子分解成功合理的片段 # Recap 方法返回的是类似于node tree结构的数据。 # > 注:分子片段的Smiles前面通常会加一个 * 如 ‘* c1ccccc1’ m = Chem.MolFromSmiles('c1ccccc1OCCOC(=O)CC') hierarch = Recap.RecapDecompose(m) type(hierarch) # 层次结构的原始分子 print('smi=', hierarch.smiles) # smi= CCC(=O)OCCOc1ccccc1 # 每个节点使用smiles键控的字典跟踪其子节点 ks = hierarch.children.keys() print(sorted(ks)) # ['*C(=O)CC', '*CCOC(=O)CC', '*CCOc1ccccc1', '*OCCOc1ccccc1', '*c1ccccc1'] # # 3.2 BRICS方法 # RDKit 还提供了另一种把分子切成片段的方法——BRICS方法。 BRICS方法主要是根据可合成的的键对分子进行切断,因此其返回的数据结构是来自于该分子的不同分子片段, 虚拟原子(*)是告诉我们是如何切断的。 # 对下图中的分子进行BRICS分解 smi = 'C=CC(=O)N1CCC(CC1)C2CCNC3=C(C(=NN23)C4=CC=C(C=C4)OC5=CC=CC=C5)C(=O)N' m = Chem.MolFromSmiles(smi)
def gen_lib(smiles): mol_lib = [Chem.MolFromSmiles(x) for x in smiles] hierarch = [Recap.RecapDecompose(x).children.keys() for x in mol_lib] fragments = [j for i in hierarch for j in i] return fragments