def all_join_on_attach_point(mol1, mol2): """Join two molecules on all possible attaching point Arguments --------- mol1: <Chem.Mol> input molecule 1 mol2: <Chem.Mol> input molecule 2 Returns: iterator of all possible way to attach both molecules from dummy indicators. """ atom_map_min = 100 mol_idxs = [] count = 0 mod_mols = [] for ind, m in enumerate([mol1, mol2]): atms = [(a.GetIdx(), a) for a in m.GetAtoms() if not a.IsInRing() and a.GetAtomicNum() == 0] atms.sort(reverse=True, key=operator.itemgetter(0)) for a_idx, a in atms: for a_nei in a.GetNeighbors(): a_nei.SetAtomMapNum(atom_map_min + count) count += 1 mod_mol = dm.fix_mol(m) mod_mols.append(mod_mol) mol_idxs.append([ a.GetIdx() for a in mod_mol.GetAtoms() if a.GetAtomMapNum() >= atom_map_min ]) for ind1, ind2 in itertools.product(*mol_idxs): yield random_fragment_add(copy.copy(mod_mols[0]), copy.copy(mod_mols[1]), ind1, ind2)
def _preprocess(i, row): # print('hello') mol = dm.to_mol(str(row[smiles_column]), ordered=True) mol = dm.fix_mol(mol) mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect pars = { "radius": 2, "nBits": 8192, "invariants": [], "fromAtoms": [], "useChirality": True, "useBondTypes": True, "useFeatures": False, } fp = fingerprint_function(mol, **pars) row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol)) row["selfies"] = dm.to_selfies(mol) row["inchi"] = dm.to_inchi(mol) row["inchikey"] = dm.to_inchikey(mol) row["onbits_fp"] =list(fp.GetOnBits()) return row
def recap( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Fragment the molecule using the recap algorithm. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ res = Recap.RecapDecompose(mol) frags = [dm.to_mol(x) for x in res.GetAllChildren().keys()] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def frag( mol: Chem.Mol, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Generate all possible fragmentation of a molecule. Args: mol: a molecule. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ frags = FraggleSim.generate_fraggle_fragmentation(mol) smiles = set([]) for seq in frags: smiles |= {s.strip() for s in seq.split(".")} smiles = list(sorted(smiles, reverse=True)) frags = [dm.to_mol(s) for s in smiles] if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] frags = [x for x in frags if x is not None] if remove_parent: return frags return [mol] + frags
def brics( mol: Chem.Mol, singlepass: bool = True, remove_parent: bool = False, sanitize: bool = True, fix: bool = True, ): """Run BRICS on the molecules and potentially fix dummy atoms. Args: mol: a molecule. singlepass: Single pass for `BRICSDecompose`. remove_parent: Remove parent from the fragments. sanitize: Wether to sanitize the fragments. fix: Wether to fix the fragments. """ frags = BRICS.BRICSDecompose(mol, returnMols=True, singlePass=singlepass) frags = list(frags) if fix: frags = [dm.fix_mol(x) for x in frags] if sanitize: frags = [dm.sanitize_mol(x) for x in frags] if remove_parent: frags.pop(0) frags = [x for x in frags if x is not None] return frags
def compute_reaction_product(out, single_output=True): """Compute the product of a reaction""" out = [dm.fix_mol(x[0], n_iter=0) for x in out] if not single_output: return [dm.sanitize_mol(x) for x in out] # Might be a important to make a tradeoff decision in selecting products for greater speed. # product = sorted(out, key=lambda x: MoleculeEnv.compute_reward_from_mol(x, True))[-1] # sampling from list of products is an alternative return dm.sanitize_first(np.random.permutation(out))
def test_fixmol(): sm = "C.Cl.CC.[H][N:1]1(C)=CC(O)=CC2CCCCC12" mol = Chem.MolFromSmiles(sm, sanitize=False) # mol.UpdatePropertyCache(False) # Chem.Kekulize(mol) res = dm.fix_mol(mol, n_iter=1) # copy by default # should still be invalid in term of valence for nitrogen assert not dm.incorrect_valence(res) res2 = dm.fix_mol(mol, n_iter=2) # not expecting difference between res2 and res3 assert Chem.MolToSmiles(res) == Chem.MolToSmiles(res2) # only largest expected_here res_largest = dm.fix_mol(mol, largest_only=True) dm.fix_mol(mol, remove_singleton=True, largest_only=True) assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1 expected_largest_fix = dm.standardize_smiles("OC1=CC2CCCCC2[N:1]=C1") assert dm.standardize_smiles( Chem.MolToSmiles(res_largest)) == expected_largest_fix res_no_singleton = dm.fix_mol(mol, n_iter=2, remove_singleton=True) assert len(Chem.rdmolops.GetMolFrags(res_largest)) == 1 assert len(Chem.rdmolops.GetMolFrags(res_no_singleton)) == 2
def _preprocess(i, row): # print('hello') try: mol = dm.to_mol(str(row[smiles_column]), ordered=True) mol = dm.fix_mol(mol) mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) opts = StereoEnumerationOptions(unique=True, maxIsomers=20, rand=0xf00d) isomers = EnumerateStereoisomers(mol, options=opts) enum_smiles = sorted( Chem.MolToSmiles(y, isomericSmiles=True) for y in isomers) smiles_list = [] for count, smi in enumerate(enum_smiles): smiles_string = smi smiles_list.append(smiles_string) # fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect # pars = { "radius": 2, # "nBits": 8192, # "invariants": [], # "fromAtoms": [], # "useChirality": False, # "useBondTypes": True, # "useFeatures": False, # } # fp = fingerprint_function(mol, **pars) row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol)) row["selfies"] = dm.to_selfies(mol) row["inchi"] = dm.to_inchi(mol) row["inchikey"] = dm.to_inchikey(mol) row["enumerated_smiles"] = smiles_list # row["onbits_fp"] =list(fp.GetOnBits()) return row except ValueError: row["standard_smiles"] = 'dropped' row["selfies"] = 'dropped' row["inchi"] = 'dropped' row["inchikey"] = 'dropped' row["enumerated_smiles"] = list('dropped') return row
def trim_side_chain(mol: Chem.rdchem.Mol, core, unwanted_side_chains): """Trim list of side chain from a molecule.""" mol = Chem.AddHs(mol) match = mol.GetSubstructMatch(core) map2idx = {} map2nei = {} unwanted2map = {} for patt in unwanted_side_chains: unwanted2map[patt] = [ a.GetAtomMapNum() for a in patt.GetAtoms() if a.GetAtomMapNum() ] unwanted_mapping = list( itertools.chain.from_iterable(unwanted2map.values())) for atom in core.GetAtoms(): num = atom.GetAtomMapNum() if num and num in unwanted_mapping: mol_atom_idx = match[atom.GetIdx()] map2idx[mol_atom_idx] = num nei_atoms = mol.GetAtomWithIdx(mol_atom_idx).GetNeighbors() map2nei[mol_atom_idx] = [ n.GetIdx() for n in nei_atoms if n.GetIdx() in match ] emol = Chem.EditableMol(mol) for atom_idx, atom_map in map2idx.items(): dummy = Chem.rdchem.Atom("*") dummy.SetAtomMapNum(atom_map) nei_idx = map2nei.get(atom_idx, [None])[0] if nei_idx: bond = mol.GetBondBetweenAtoms(atom_idx, nei_idx) emol.RemoveBond(atom_idx, nei_idx) new_ind = emol.AddAtom(dummy) emol.AddBond(nei_idx, new_ind, bond.GetBondType()) mol = emol.GetMol() mol = Chem.RemoveHs(mol) query_param = AdjustQueryParameters() query_param.makeDummiesQueries = False query_param.adjustDegree = False query_param.aromatizeIfPossible = True for patt, _ in unwanted2map.items(): cur_frag = dm.fix_mol(patt) mol = Chem.DeleteSubstructs(mol, cur_frag, onlyFrags=True) return dm.keep_largest_fragment(mol)
def break_mol( mol: Chem.Mol, minFragmentSize: int = 1, silent: bool = True, onlyUseReactions: list = [], randomize: bool = False, mode: str = "brics", returnTree: bool = False, ): """Breaks a molecules into a list of fragment.""" if mode.lower() == "brics": all_reactions = ALL_BRICS all_reactions_type = ALL_BRICS_TYPE elif mode.lower() == "rxn": all_reactions = ALL_RXNS all_reactions_type = ALL_RXNS_TYPE else: all_reactions = ALL_BRICS + ALL_RXNS all_reactions_type = ALL_BRICS_TYPE + ALL_RXNS_TYPE if randomize: p = np.random.permutation(len(all_reactions)) all_reactions = [all_reactions[ind] for ind in p] all_reactions_type = [all_reactions_type[ind] for ind in p] nx = dm.graph._get_networkx() mSmi = Chem.MolToSmiles(mol, isomericSmiles=True) G = nx.DiGraph() node_num = 0 G.add_node(node_num, smiles=mSmi, mol=mol) allNodes = set() activePool = {mSmi: node_num} allNodes.add(mSmi) while activePool: nSmi = list(activePool.keys())[0] parent = activePool.pop(nSmi) node = G.nodes[parent] mol = node["mol"] for rxnIdx, reaction in zip(all_reactions_type, all_reactions): if onlyUseReactions and rxnIdx not in onlyUseReactions: continue ps = reaction.RunReactants((mol,)) if ps: all_pass = [ all([prod.GetNumAtoms(onlyExplicit=True) > minFragmentSize for prod in p_]) for p_ in ps ] nz_i = 0 while nz_i < len(all_pass) and not all_pass[nz_i]: nz_i += 1 if not silent: print(nSmi, "->", len(ps), "products and selected ", nz_i) # display(MolsToGridImage(list(itertools.chain(*list(ps))), molsPerRow=2)) prodSeq = ps[nz_i % len(all_pass)] seqOk = True # we want to disqualify small fragments, so sort the product sequence by size prodSeq = [(prod.GetNumAtoms(onlyExplicit=True), prod) for prod in prodSeq] prodSeq.sort(key=lambda x: x[0]) for _, prod in prodSeq: prod.sanitized = True try: Chem.SanitizeMol(prod) except: if dm.sanitize_mol(prod) is None: seqOk = False break continue pSmi = Chem.MolToSmiles(prod, isomericSmiles=True) seqOk = seqOk and (dm.to_mol(pSmi) is not None) notDummies = sum([atm.GetSymbol() != "*" for atm in prod.GetAtoms()]) # nDummies = pSmi.count('*') # if minFragmentSize > 0 and (nats - nDummies < minFragmentSize): if minFragmentSize > 0 and notDummies < minFragmentSize: seqOk = False break prod.pSmi = pSmi if seqOk: for _, prod in prodSeq: if not prod.sanitized: continue pSmi = prod.pSmi node_num += 1 usmi = Chem.MolToSmiles(dm.fix_mol(prod), isomericSmiles=True) G.add_node(node_num, smiles=usmi, mol=prod) G.add_edge(parent, node_num) if usmi not in allNodes: activePool[pSmi] = node_num allNodes.add(usmi) G.nodes[parent]["rxn"] = rxnIdx break # at least one reaction matches leaves_smiles = [ G.nodes[n]["smiles"] for n in G.nodes() if G.in_degree(n) != 0 and G.out_degree(n) == 0 ] if returnTree: return leaves_smiles, allNodes, G return leaves_smiles, allNodes
def _preprocess(i, row): '''Takes a smiles string and generates a clean rdkit mol with datamol. The stereoisomers are then enumerated while holding defined stereochemistry. Morgan fingerprints are then generated using RDkit with and without stereochemistry. The try/except logic deals with RDkit mol failures on conversion of an invalid smiles string. Smarts are added for later searching.''' try: mol = dm.to_mol(str(row[smiles_column]), ordered=True) mol = dm.fix_mol(mol) mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) opts = StereoEnumerationOptions(unique=True,maxIsomers=20,rand=0xf00d) isomers = EnumerateStereoisomers(mol, options=opts) enum_smiles = sorted(Chem.MolToSmiles(y,isomericSmiles=True) for y in isomers) # enum_dm_smiles = sorted(dm.standardize_smiles(dm.to_smiles(x)) for x in isomers) smiles_list = [] achiral_fp_lis = [] chiral_fp_lis = [] # standard_smiles_list = [] for count, smi in enumerate(enum_smiles): smiles_string = smi mol = dm.to_mol(smi, ordered=True) mol = dm.fix_mol(mol) mol = dm.sanitize_mol(mol, sanifix=True, charge_neutral=False) mol = dm.standardize_mol(mol, disconnect_metals=False, normalize=True, reionize=True, uncharge=False, stereo=True) fingerprint_function = rdMolDescriptors.GetMorganFingerprintAsBitVect pars = { "radius": 2, "nBits": 8192, "invariants": [], "fromAtoms": [], "useChirality": True, "useBondTypes": True, "useFeatures": False, } pars2 = { "radius": 2, "nBits": 8192, "invariants": [], "fromAtoms": [], "useChirality": False, "useBondTypes": True, "useFeatures": False, } fp = fingerprint_function(mol, **pars) fp1 = fingerprint_function(mol, **pars2) smiles_list.append(dm.standardize_smiles(smiles_string)) achiral_fp_lis.append(list(fp1.GetOnBits())) chiral_fp_lis.append(list(fp.GetOnBits())) row["standard_smiles"] = dm.standardize_smiles(dm.to_smiles(mol)) row["smarts"] = dm.to_smarts(mol) row["selfies"] = dm.to_selfies(mol) row["enumerated_smiles"] = smiles_list row["achiral_fp"] = achiral_fp_lis row["chiral_fp"] = chiral_fp_lis # row["dm_enumerated_smiles"] = enum_dm_smiles_lis # row["onbits_fp"] =list(fp.GetOnBits()) return row except ValueError: # row["standard_smiles"] = 'dropped' # row["selfies"] = 'dropped' # row["inchi"] = 'dropped' # row["inchikey"] = 'dropped' row["standard_smiles"] = 'dropped' row["smarts"] = 'dropped' row["selfies"] = 'dropped' row["enumerated_smiles"] = list('dropped') row["achiral_fp"] = list('dropped') row["chiral_fp"] = list('dropped') # row["dm_enumerated_smiles"] = 'dropped' return row