def test(a_mol, b_smiles, transform): a_prods = transform.RunReactants([a_mol]) if not a_prods: return False a_prods = [standardize(i) for i in chain(*a_prods)] if not any(["[*]" in i for i in a_prods]): if b_smiles in a_prods: return True return False else: qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True qp.adjustDegree = True qp.adjustDegreeFlags = Chem.ADJUST_IGNOREDUMMIES a_prods = [Chem.AddHs(i) for i in a_prods] a_prods = [Chem.AdjustQueryProperties(i, qp) for i in a_prods] b_mol = Chem.MolFromSmiles(b_smiles) b_mol = Chem.AddHs(b_mol) if any([b_mol.HasSubstructMatch(i) for i in a_prods]): return True return False
def _load_template(self, path): """ Loads a template molecule with 2D coordinates Args: path (str): path to the model molecule in *.sdf, or *.pdb format Raises: ValueError: if unsupported format is used: sdf|pdb Returns: rdkit.Chem.rdchem.Mol: RDKit representation of the template """ mol = Chem.RWMol() extension = os.path.basename(path).split('.')[1] if extension == 'sdf': mol = Chem.MolFromMolFile(path, sanitize=True, removeHs=True) elif extension == 'pdb': mol = Chem.MolFromPDBFile(path, sanitize=True, removeHs=True) else: raise ValueError( 'Unsupported molecule type \'{}\''.format(extension)) p = Chem.AdjustQueryParameters() p.makeAtomsGeneric = True p.makeBondsGeneric = True mol = Chem.AdjustQueryProperties(mol, p) return mol
def _queryfromrequest(suffix='_query'): # get errors on stderr: tgt = request.get_json() if tgt is None: tgt = request.values sio = sys.stderr = StringIO() if 'smiles' + suffix in tgt: mol = Chem.MolFromSmiles(tgt.get('smiles' + suffix), sanitize=False) if mol is not None: try: Chem.SanitizeMol(mol) except: mol = None elif 'smarts' + suffix in tgt: mol = Chem.MolFromSmarts(tgt.get('smarts' + suffix)) elif 'mol' + suffix in tgt: mol = Chem.MolFromMolBlock(tgt.get('mol' + suffix), removeHs=False) mol = Chem.AdjustQueryProperties(mol) else: return None if mol is None: errm = sio.getvalue() # some errors leave blank lines errm = errm.replace('RDKit ERROR: \n', '') raise InvalidUsage( "Molecule could not be processed. Error message was:\n%s" % errm, status_code=411) return mol
def refine(self): for frag_id in range(self.N_frag): frag = self.frag_list[frag_id] mol_id_list = self.frag2mol[frag_id] self.frag2mol_mapping.append(list()) for mol_id in mol_id_list: mol = self.mol_list[mol_id] self.qp.makeDummiesQueries = True mol = Chem.AdjustQueryProperties(mol, self.qp) frag = Chem.AdjustQueryProperties(frag, self.qp) matches = mol.GetSubstructMatches(frag, useChirality=True) if len(matches)>0: self.frag2mol_mapping[-1].append(list(matches[0])) else: self.frag2mol_mapping[-1].append(list())
def add_frag_list(self, frag_list, mol): self.mol_list.append(mol) self.mol2frag.append(list()) new_mol_id = self.N_mol self.N_mol += 1 if len(frag_list)==0: frag_list = [mol] for frag in frag_list: new_frag_id = -1 for frag_id, frag_db in enumerate(self.frag_list): self.qp.makeDummiesQueries = False frag_db = Chem.AdjustQueryProperties(frag_db, self.qp) frag = Chem.AdjustQueryProperties(frag, self.qp) if are_mol_same(frag_db, frag, useChirality=True): ### If we are here, then the fragment is already ### in the database new_frag_id = frag_id break if new_frag_id == -1: ### If we are here, then the fragment is new self.frag_list.append(frag) self.frag2mol.append(list()) new_frag_id = self.N_frag self.N_frag += 1 if new_mol_id not in self.frag2mol[new_frag_id]: self.frag2mol[new_frag_id].append(new_mol_id) if len(self.frag2mol[new_frag_id])>self.max_frag2mol: self.max_frag2mol=len(self.frag2mol[new_frag_id]) if new_frag_id not in self.mol2frag[new_mol_id]: self.mol2frag[new_mol_id].append(new_frag_id) if len(self.mol2frag[new_mol_id])>self.max_mol2frag: self.max_mol2frag=len(self.mol2frag[new_mol_id])
def flatten_tartrate_mol(m): tartrate = Chem.MolFromSmarts('OC(=O)C(O)C(O)C(=O)O') # make sure we only match free tartrate/tartaric acid fragments params = Chem.AdjustQueryParameters.NoAdjustments() params.adjustDegree = True params.adjustDegreeFlags = Chem.AdjustQueryWhichFlags.ADJUST_IGNORENONE tartrate = Chem.AdjustQueryProperties(tartrate, params) matches = m.GetSubstructMatches(tartrate) if matches: m = Chem.Mol(m) for match in matches: m.GetAtomWithIdx(match[3]).SetChiralTag( Chem.ChiralType.CHI_UNSPECIFIED) m.GetAtomWithIdx(match[5]).SetChiralTag( Chem.ChiralType.CHI_UNSPECIFIED) return m
def _get_ligands(self): ligands_df = pd.read_sql_query( """ select pdbid, mol_send(molecule) as molecule, atoms, rings, aromatic_rings, weight from {ligands} where molecule is not null """.format(ligands=PopulateLigandsScript.LIGANDS_DB), self.conn) params = Chem.AdjustQueryParameters() params.makeAtomsGeneric = True params.makeBondsGeneric = True params.adjustRingCount = True ligands_df.loc[:, "molecule"] = ligands_df.loc[:, "molecule"].apply( lambda m: Chem.Mol(m.tobytes())) ligands_df.loc[:, "pattern"] = ligands_df.loc[:, "molecule"].apply( lambda m: Chem.AdjustQueryProperties(m, params)) return ligands_df
def query_core(self): if self.core: ps = Chem.AdjustQueryParameters.NoAdjustments() ps.makeDummiesQueries = True return Chem.AdjustQueryProperties(self.core, ps)
def decomposition(gdatarec_lib, gdata_lib, mode, parms=6, pairs=True, parmsfile=None, frag_file=None, map_file=None, radiusadd=[0., 3.], softness=1., softcut=2., pairfile=None, exclude=None, paircut=0.0, prefix=None, scaling=2.0, verbose=False): if verbose: print "Start mapout procedure with" print "mode = %d" % mode print "softness = %6.3f" % softness print "softcut = %6.3f" % softcut print "parmsfile = %s" % parmsfile if verbose: print "Organizing and preparing data ..." mode_dict = dict() mode_dict = { 0: mode0, 1: mode1, 3: mode3, 4: mode4, 5: mode5, 6: mode6, 7: mode7 } if mode in mode_dict.keys(): fitmode = mode_dict[mode] else: mode_error(mode) has_cplxlig = True if mode in [0, 1]: has_cplxlig = False fitter = fitmode(gdatarec_lib, gdata_lib, parms=parms, pairs=False, radiusadd=radiusadd, softness=softness, softcut=softcut, scaling=scaling, verbose=verbose) parmdict = read_parmsfile(parmsfile) ### Find position of SES in parms file A_SSE = -1 B_SSE = -1 for i, entry in enumerate(parmdict["header"]): if entry.startswith("SSE"): if entry.endswith("(A)"): A_SSE = i elif entry.endswith("(B)"): B_SSE = i ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Find the best Candidate Solutions ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Collect all the solution candiates N_entries = len(parmdict.keys()) - 1 A_list = list() B_list = list() x_list = list() A_list_tmp = list() B_list_tmp = list() x_list_tmp = list() for key, value in parmdict.items(): if key == "header": continue A_list_tmp.append(value[A_SSE]) B_list_tmp.append(value[B_SSE]) x_list_tmp.append(value[:fitter._parms]) if fitter.decomp: N_entries = N_entries / 2 for i in range(N_entries): A_list.append([ copy.copy(A_list_tmp[2 * i]), copy.copy(A_list_tmp[2 * i + 1]) ]) B_list.append([ copy.copy(B_list_tmp[2 * i]), copy.copy(B_list_tmp[2 * i + 1]) ]) x_list.append(copy.copy(x_list_tmp[2 * i])) else: A_list = copy.copy(A_list_tmp) B_list = copy.copy(B_list_tmp) x_list = copy.copy(x_list_tmp) A_list = np.array(A_list) B_list = np.array(B_list) ### Find the best candidate solution if fitter.decomp: ndf, dl, dc, ndr = pygmo.fast_non_dominated_sorting(A_list) ordered_ndf = list() for front in ndf: ordered_ndf.append(pygmo.sort_population_mo(A_list[front])) else: ordered_ndf = np.argsort(A_list, axis=0) if fitter.decomp: best_x_A = np.array(x_list[ordered_ndf[0][0]]) else: best_x_A = np.array(x_list[ordered_ndf[0]]) ### ~~~~~~~~~~~~~~~~~~~~~~ ### ### Prepare Exclusion List ### ### ~~~~~~~~~~~~~~~~~~~~~~ ### if exclude != None \ and exclude != "": exclude_list = list() with open(exclude, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue for s in l: exclude_list.append(s) else: exclude_list = list() ### ~~~~~~~~~~~~~~~~~~~~~~~ ### ### Prepare Pairise Fitting ### ### ~~~~~~~~~~~~~~~~~~~~~~~ ### if pairs: if pairfile != None \ and pairfile != "": if type(pairfile) != str: raise TypeError( "The path to pairfile must be of type str, but is of type %s" % type(pairfile)) pairlist = read_pairsfile(pairfile, paircut) pairlist_idx = list() for pair in pairlist: for i in range(fitter.N_case): case1 = fitter.select[i] name1 = fitter.name[case1] if name1 in exclude_list: continue for j in range(fitter.N_case): if j <= i: continue case2 = fitter.select[j] name2 = fitter.name[case2] if name2 in exclude_list: continue if name1==pair[0] \ and name2==pair[1]: pairlist_idx.append([case1, case2]) elif name1==pair[1] \ and name2==pair[0]: pairlist_idx.append([case2, case1]) else: pairlist = None pairlist_idx = list() for i in range(fitter.N_case): name1 = fitter.name[i] if name1 in exclude_list: continue for j in range(fitter.N_case): if j <= i: continue name2 = fitter.name[j] if name2 in exclude_list: continue pairlist_idx.append([i, j]) else: pairlist = None pairlist_idx = None ### ~~~~~~~~~~~~~~~~~ ### ### Build the Library ### ### ~~~~~~~~~~~~~~~~~ ### has_extlib = False ### Check for external mapping files if frag_file != None \ and frag_file != "": has_extlib = True ext_frag = list() ext_frag_name = list() with open(frag_file, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue ext_frag.append(Chem.MolFromSmiles(l[1])) ext_frag_name.append(l[0]) else: ext_frag = None ext_frag_name = None if map_file != None \ and map_file != "": ext_map_frag = list() ext_map_inds = list() ext_map_name = list() with open(map_file, "r") as fopen: for line in fopen: l = line.rstrip().lstrip().split() if len(l) == 0: continue if l[0].startswith("#"): continue ext_map_name.append(l[0]) ext_map_frag.append(list()) ext_map_inds.append(list()) ids_list = l[1].split(",") if len(ids_list) == 1: if ids_list[0] == "-1": continue for i in ids_list: ext_map_frag[-1].append(int(i)) for s in l[2:]: ext_map_inds[-1].append(list()) for i in s.split(","): ext_map_inds[-1][-1].append(int(i)) else: ext_map_frag = None ext_map_inds = None ext_map_name = None if ext_frag==None \ and ext_map_frag!=None: raise IOError("Must provide both, frag_file and map_file.") if ext_frag!=None \ and ext_map_frag==None: raise IOError("Must provide both, frag_file and map_file.") if has_extlib: mol2extmol = list() #frag2extfrag = list() if has_cplxlig: mol2extmol_cplx = list() #frag2extfrag_cplx = list() mol2extmol_lig = list() #frag2extfrag_lig = list() if verbose: "Starting fragment decomposition..." RAND = np.random.randint(9999) frag_lib = frag_library() if has_cplxlig: frag_lib_cplx = frag_library() frag_lib_lig = frag_library() progs = aux_progs(verbose) for case in range(fitter.N_case): valid_poses = np.where(fitter.ind_case == case)[0] name = fitter.name[case] for pose in valid_poses: pmd_instance = fitter.pdat[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol.append(index) else: frag_list = get_frag_list(mol) frag_lib.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) if has_cplxlig: valid_poses_cplx = np.where(fitter.ind_case_cplx == case)[0] valid_poses_lig = np.where(fitter.ind_case_lig == case)[0] for pose in valid_poses_cplx: pmd_instance = fitter.pdat_cplx[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol_cplx.append(index) else: frag_list = get_frag_list(mol) frag_lib_cplx.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) for pose in valid_poses_lig: pmd_instance = fitter.pdat_lig[pose] pmd_instance.save("p%d.mol2" % RAND) args = "-i p%d.mol2 -fi mol2 -o p%d_sybyl.mol2 -fo mol2 -at sybyl -pf y -dr no" % ( RAND, RAND) progs.call(progs.ante_exe, args) mol = Chem.MolFromMol2File("p%d_sybyl.mol2" % RAND, removeHs=False) if verbose: AllChem.Compute2DCoords(mol) if has_extlib: index = ext_map_name.index(name) frag_list = list() for frag_id in ext_map_frag[index]: frag_list.append(ext_frag[frag_id]) ### If we have an external library with mappings ### we must do the refinement manually! mol2extmol_lig.append(index) else: frag_list = get_frag_list(mol) frag_lib_lig.add_frag_list(frag_list, mol) os.remove("p%d.mol2" % RAND) os.remove("p%d_sybyl.mol2" % RAND) if has_extlib: for frag_id in range(frag_lib.N_frag): frag_lib.frag2mol_mapping.append(list()) for mol_id in frag_lib.frag2mol[frag_id]: frag_id_rank = frag_lib.mol2frag[mol_id].index(frag_id) ext_mol_id = mol2extmol[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib.frag2mol_mapping[-1].append(matches) if has_cplxlig: for frag_id in range(frag_lib_cplx.N_frag): frag_lib_cplx.frag2mol_mapping.append(list()) for mol_id in frag_lib_cplx.frag2mol[frag_id]: frag_id_rank = frag_lib_cplx.mol2frag[mol_id].index( frag_id) ext_mol_id = mol2extmol_cplx[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib_cplx.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib_cplx.frag2mol_mapping[-1].append(matches) for frag_id in range(frag_lib_lig.N_frag): frag_lib_lig.frag2mol_mapping.append(list()) for mol_id in frag_lib_lig.frag2mol[frag_id]: frag_id_rank = frag_lib_lig.mol2frag[mol_id].index(frag_id) ext_mol_id = mol2extmol_lig[mol_id] if len(ext_map_inds[ext_mol_id]) == 0: ### If we are here, then the molecule has no fragments. ### The molecule is then treated, as if itself would ### be the fragment mol = frag_lib_lig.mol_list[mol_id] matches = range(mol.GetNumAtoms()) else: matches = ext_map_inds[ext_mol_id][frag_id_rank] frag_lib_lig.frag2mol_mapping[-1].append(matches) else: frag_lib.refine() if has_cplxlig: frag_lib_cplx.refine() frag_lib_lig.refine() if verbose: print "Poses Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case == case)[0] print name, for pose in valid_poses: print frag_lib.mol2frag[pose], print "" frag_lib.draw("pos_") if has_cplxlig: print "Cplx Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case_cplx == case)[0] print name, for pose in valid_poses: print frag_lib_cplx.mol2frag[pose], print "" frag_lib_cplx.draw("cplx_") print "Lig Fragments..." for case in range(fitter.N_case): name = fitter.name[case] valid_poses = np.where(fitter.ind_case_lig == case)[0] print name, for pose in valid_poses: print frag_lib_lig.mol2frag[pose], print "" frag_lib_lig.draw("lig_") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Calculate the Fragment weightings ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if verbose: print "Calculate fragment weightings..." ### Constructor for weight_fitting: ### def __init__(self, fitter, x, frag_library, prefix=None, verbose=False): weight = weight_fitting(fitter, best_x_A, pairs, frag_lib, "pos", verbose) weight.process_rec = True weight.process_cplx = False weight.process_lig = False if has_cplxlig: weight_cplx = weight_fitting(fitter, best_x_A, pairs, frag_lib_cplx, "cplx", verbose) weight_cplx.process_rec = False weight_cplx.process_cplx = True weight_cplx.process_lig = False weight_lig = weight_fitting(fitter, best_x_A, pairs, frag_lib_lig, "lig", verbose) weight_lig.process_rec = False weight_lig.process_cplx = False weight_lig.process_lig = True ### Make the fragment-based decomposition of the GIST grids for case in range(fitter.N_case): weight.set_case(case) ### Use the internal write routine as a callback for the process routine weight.process(weight.simple_weighting) if has_cplxlig: weight_cplx.set_case(case) weight_lig.set_case(case) weight_cplx.process(weight_cplx.simple_weighting) weight_lig.process(weight_lig.simple_weighting) ### Combine the individual poses and get the final ### contributions of the fragments calc_data = np.zeros((2, fitter.N_case, frag_lib.N_frag), dtype=DOUBLE) frag_assign = np.zeros((fitter.N_case, frag_lib.N_frag), dtype=int) frag_assign[:] = -1 if has_cplxlig: calc_data_cplx = np.zeros((2, fitter.N_case, frag_lib_cplx.N_frag), dtype=DOUBLE) frag_assign_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag), dtype=int) frag_assign_cplx[:] = -1 calc_data_lig = np.zeros((2, fitter.N_case, frag_lib_lig.N_frag), dtype=DOUBLE) frag_assign_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag), dtype=int) frag_assign_lig[:] = -1 for case in range(fitter.N_case): weight.set_case(case) _data, _assign = weight.combine() calc_data[0, case, :] = np.copy(_data[0]) calc_data[1, case, :] = np.copy(_data[1]) frag_assign[case, :] = np.copy(_assign) if has_cplxlig: weight_cplx.set_case(case) _data, _assign = weight_cplx.combine() calc_data_cplx[0, case, :] = np.copy(_data[0]) calc_data_cplx[1, case, :] = np.copy(_data[1]) frag_assign_cplx[case, :] = np.copy(_assign) weight_lig.set_case(case) _data, _assign = weight_lig.combine() calc_data_lig[0, case, :] = np.copy(_data[0]) calc_data_lig[1, case, :] = np.copy(_data[1]) frag_assign_lig[case, :] = np.copy(_assign) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### Evaluate the Fragment Properties ### ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if has_cplxlig: case2frag_cplx = np.zeros((fitter.N_case, frag_lib_cplx.N_frag), dtype=int) case2frag_lig = np.zeros((fitter.N_case, frag_lib_lig.N_frag), dtype=int) case2frag_cplx[:] = -1 case2frag_lig[:] = -1 for case in range(fitter.N_case): valids = np.where(frag_assign[case] > -1)[0] valids_cplx = np.where(frag_assign_cplx[case] > -1)[0] valids_lig = np.where(frag_assign_lig[case] > -1)[0] for frag_id in frag_assign[case, valids]: frag_lib.qp.makeDummiesQueries = False frag_lib_cplx.qp.makeDummiesQueries = False frag_lib_lig.qp.makeDummiesQueries = False frag = Chem.AdjustQueryProperties(frag_lib.frag_list[frag_id],\ frag_lib.qp) for frag_id_cplx in frag_assign_cplx[case, valids_cplx]: frag_cplx = Chem.AdjustQueryProperties(frag_lib_cplx.frag_list[frag_id_cplx],\ frag_lib_cplx.qp) if are_mol_same(frag, frag_cplx, useChirality=True): case2frag_cplx[case, frag_id_cplx] = frag_id break for frag_id_lig in frag_assign_lig[case, valids_lig]: frag_lig = Chem.AdjustQueryProperties(frag_lib_lig.frag_list[frag_id_lig],\ frag_lib_lig.qp) if are_mol_same(frag, frag_lig, useChirality=True): case2frag_lig[case, frag_id_lig] = frag_id break
def align_mol_to_frags(smi_molecule, smi_linker, smi_frags): try: # Load SMILES as molecules mol = Chem.MolFromSmiles(smi_molecule) frags = Chem.MolFromSmiles(smi_frags) linker = Chem.MolFromSmiles(smi_linker) # Include dummy atoms in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True # Renumber molecule based on frags (incl. dummy atoms) aligned_mols = [] sub_idx = [] # Get matches to fragments and linker qfrag = Chem.AdjustQueryProperties(frags, qp) frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False)) qlinker = Chem.AdjustQueryProperties(linker, qp) linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False)) # Loop over matches for frag_match, linker_match in product(frags_matches, linker_matches): # Check if match f_match = [ idx for num, idx in enumerate(frag_match) if frags.GetAtomWithIdx(num).GetAtomicNum() != 0 ] l_match = [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in f_match ] # If perfect match, break if len(set(list(f_match) + list(l_match))) == mol.GetNumHeavyAtoms(): break # Add frag indices sub_idx += frag_match # Add linker indices to end sub_idx += [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in sub_idx ] aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx)) aligned_mols.append(frags) nodes_to_keep = [i for i in range(len(frag_match))] # Renumber dummy atoms to end dummy_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: dummy_idx.append(atom.GetIdx()) for i, mol in enumerate(aligned_mols): sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2)) for idx in dummy_idx: sub_idx.remove(idx) sub_idx.append(idx) if i == 0: mol_range = list(range(mol.GetNumHeavyAtoms())) else: mol_range = list(range(mol.GetNumHeavyAtoms() + 2)) idx_to_add = list(set(mol_range).difference(set(sub_idx))) sub_idx.extend(idx_to_add) aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx) # Get exit vectors exit_vectors = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: if atom.GetIdx() in nodes_to_keep: nodes_to_keep.remove(atom.GetIdx()) for nei in atom.GetNeighbors(): exit_vectors.append(nei.GetIdx()) if len(exit_vectors) != 2: print("Incorrect number of exit vectors") return (aligned_mols[0], aligned_mols[1]), nodes_to_keep, exit_vectors except: print("Could not align") return ([], []), [], []
def compute_distance_and_angle(mol, smi_linker, smi_frags): try: frags = [Chem.MolFromSmiles(frag) for frag in smi_frags.split(".")] frags = Chem.MolFromSmiles(smi_frags) linker = Chem.MolFromSmiles(smi_linker) # Include dummy in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True # Renumber based on frags (incl. dummy atoms) aligned_mols = [] sub_idx = [] # Align to frags and linker qfrag = Chem.AdjustQueryProperties(frags, qp) frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False)) qlinker = Chem.AdjustQueryProperties(linker, qp) linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False)) # Loop over matches for frag_match, linker_match in product(frags_matches, linker_matches): # Check if match f_match = [ idx for num, idx in enumerate(frag_match) if frags.GetAtomWithIdx(num).GetAtomicNum() != 0 ] l_match = [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in f_match ] if len(set(list(f_match) + list(l_match))) == mol.GetNumHeavyAtoms(): #if len(set(list(frag_match)+list(linker_match))) == mol.GetNumHeavyAtoms(): break # Add frag indices sub_idx += frag_match # Add linker indices to end sub_idx += [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in sub_idx ] nodes_to_keep = [i for i in range(len(frag_match))] aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx)) aligned_mols.append(frags) # Renumber dummy atoms to end dummy_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: dummy_idx.append(atom.GetIdx()) for i, mol in enumerate(aligned_mols): sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2)) for idx in dummy_idx: sub_idx.remove(idx) sub_idx.append(idx) if i == 0: mol_range = list(range(mol.GetNumHeavyAtoms())) else: mol_range = list(range(mol.GetNumHeavyAtoms() + 2)) idx_to_add = list(set(mol_range).difference(set(sub_idx))) sub_idx.extend(idx_to_add) aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx) # Get exit vectors exit_vectors = [] linker_atom_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: if atom.GetIdx() in nodes_to_keep: nodes_to_keep.remove(atom.GetIdx()) for nei in atom.GetNeighbors(): exit_vectors.append(nei.GetIdx()) linker_atom_idx.append(atom.GetIdx()) # Get coords conf = aligned_mols[0].GetConformer() exit_coords = [] for exit in exit_vectors: exit_coords.append(np.array(conf.GetAtomPosition(exit))) linker_coords = [] for linker_atom in linker_atom_idx: linker_coords.append(np.array(conf.GetAtomPosition(linker_atom))) # Get angle v1_u = unit_vector(linker_coords[0] - exit_coords[0]) v2_u = unit_vector(linker_coords[1] - exit_coords[1]) angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) # Get linker length linker = Chem.MolFromSmiles(smi_linker) linker_length = linker.GetNumHeavyAtoms() # Get distance distance = np.linalg.norm(exit_coords[0] - exit_coords[1]) # Record results return distance, angle except: print(Chem.MolToSmiles(mol), smi_linker, smi_frags) return None, None
def get_linker(full_mol, clean_frag, starting_point): # INPUT FORMAT: molecule (RDKit mol object), clean fragments (RDKit mol object), starting fragments (SMILES) # Get matches of fragments matches = list(full_mol.GetSubstructMatches(clean_frag)) # If no matches, terminate if len(matches) == 0: print("No matches") return "" # Get number of atoms in linker linker_len = full_mol.GetNumHeavyAtoms() - clean_frag.GetNumHeavyAtoms() if linker_len == 0: return "" # Setup mol_to_break = Chem.Mol(full_mol) Chem.Kekulize(full_mol, clearAromaticFlags=True) poss_linker = [] if len(matches) > 0: # Loop over matches for match in matches: mol_rw = Chem.RWMol(full_mol) # Get linker atoms linker_atoms = list( set(list(range( full_mol.GetNumHeavyAtoms()))).difference(match)) linker_bonds = [] atoms_joined_to_linker = [] # Loop over starting fragments atoms # Get (i) bonds between starting fragments and linker, (ii) atoms joined to linker for idx_to_delete in sorted(match, reverse=True): nei = [ x.GetIdx() for x in mol_rw.GetAtomWithIdx( idx_to_delete).GetNeighbors() ] intersect = set(nei).intersection(set(linker_atoms)) if len(intersect) == 1: linker_bonds.append( mol_rw.GetBondBetweenAtoms( idx_to_delete, list(intersect)[0]).GetIdx()) atoms_joined_to_linker.append(idx_to_delete) elif len(intersect) > 1: for idx_nei in list(intersect): linker_bonds.append( mol_rw.GetBondBetweenAtoms(idx_to_delete, idx_nei).GetIdx()) atoms_joined_to_linker.append(idx_to_delete) # Check number of atoms joined to linker # If not == 2, check next match if len(set(atoms_joined_to_linker)) != 2: continue # Delete starting fragments atoms for idx_to_delete in sorted(match, reverse=True): mol_rw.RemoveAtom(idx_to_delete) linker = Chem.Mol(mol_rw) # Check linker required num atoms if linker.GetNumHeavyAtoms() == linker_len: mol_rw = Chem.RWMol(full_mol) # Delete linker atoms for idx_to_delete in sorted(linker_atoms, reverse=True): mol_rw.RemoveAtom(idx_to_delete) frags = Chem.Mol(mol_rw) # Check there are two disconnected fragments if len(Chem.rdmolops.GetMolFrags(frags)) == 2: # Fragment molecule into starting fragments and linker fragmented_mol = Chem.FragmentOnBonds( mol_to_break, linker_bonds) # Remove starting fragments from fragmentation linker_to_return = Chem.Mol(fragmented_mol) qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True for f in starting_point.split('.'): qfrag = Chem.AdjustQueryProperties( Chem.MolFromSmiles(f), qp) linker_to_return = AllChem.DeleteSubstructs( linker_to_return, qfrag, onlyFrags=True) # Check linker is connected and two bonds to outside molecule if len(Chem.rdmolops.GetMolFrags(linker)) == 1 and len( linker_bonds) == 2: Chem.Kekulize(linker_to_return, clearAromaticFlags=True) # If for some reason a starting fragment isn't removed (and it's larger than the linker), remove (happens v. occassionally) if len(Chem.rdmolops.GetMolFrags( linker_to_return)) > 1: for frag in Chem.MolToSmiles( linker_to_return).split('.'): if Chem.MolFromSmiles( frag).GetNumHeavyAtoms() == linker_len: return frag return Chem.MolToSmiles( Chem.MolFromSmiles( Chem.MolToSmiles(linker_to_return))) # If not, add to possible linkers (above doesn't capture some complex cases) else: fragmented_mol = Chem.MolFromSmiles( Chem.MolToSmiles(fragmented_mol), sanitize=False) linker_to_return = AllChem.DeleteSubstructs( fragmented_mol, Chem.MolFromSmiles(starting_point)) poss_linker.append(Chem.MolToSmiles(linker_to_return)) # If only one possibility, return linker if len(poss_linker) == 1: return poss_linker[0] # If no possibilities, process failed elif len(poss_linker) == 0: print("FAIL:", Chem.MolToSmiles(full_mol), Chem.MolToSmiles(clean_frag), starting_point) return "" # If multiple possibilities, process probably failed else: print("More than one poss linker. ", poss_linker) return poss_linker[0]
def join_frag_linker(linker, st_pt, random_join=True): if linker == "": du = Chem.MolFromSmiles('*') #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0]) return Chem.MolToSmiles( Chem.RemoveHs( AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du, Chem.MolFromSmiles('[H]'), True)[0])).split('.')[0] combo = Chem.CombineMols(Chem.MolFromSmiles(linker), Chem.MolFromSmiles(st_pt)) # Include dummy in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp) linker_atoms = combo.GetSubstructMatches(qlink) if len(linker_atoms) > 1: for l_atoms in linker_atoms: count_dummy = 0 for a in l_atoms: if combo.GetAtomWithIdx(a).GetAtomicNum() == 0: count_dummy += 1 if count_dummy == 2: break linker_atoms = l_atoms else: linker_atoms = linker_atoms[0] linker_dummy_bonds = [] linker_dummy_bonds_at = [] linker_exit_points = [] for atom in linker_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: linker_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) linker_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) linker_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp) st_pt_atoms = combo.GetSubstructMatches(qst_pt) st_pt_atoms = list( set(range(combo.GetNumAtoms())).difference(linker_atoms)) st_pt_dummy_bonds = [] st_pt_dummy_bonds_at = [] st_pt_exit_points = [] for atom in st_pt_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: st_pt_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) st_pt_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) st_pt_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) combo_rw = Chem.EditableMol(combo) if random_join: np.random.shuffle(st_pt_exit_points) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] return final_mol else: final_mols = [] for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]: combo_rw = Chem.EditableMol(combo) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] final_mols.append(final_mol) return final_mols