def getRMS(self, prb_mol, ref_pos, useFF=False): def optimizeWithFF(mol): molf = Chem.AddHs(mol, addCoords=True) AllChem.MMFFOptimizeMolecule(molf) molf = Chem.RemoveHs(molf) return molf n_est = prb_mol.GetNumAtoms() ref_cf = Chem.rdchem.Conformer(n_est) for k in range(n_est): ref_cf.SetAtomPosition(k, ref_pos[k].tolist()) ref_mol = copy.deepcopy(prb_mol) ref_mol.RemoveConformer(0) ref_mol.AddConformer(ref_cf) if useFF: try: res = AllChem.AlignMol(prb_mol, optimizeWithFF(ref_mol)) except: res = AllChem.AlignMol(prb_mol, ref_mol) else: res = AllChem.AlignMol(prb_mol, ref_mol) return res
def best_rmsd_tmp_conf(dir_tmp_conf, file_ref): best_rmsd = None best_f = None for f in os.listdir(dir_tmp_conf): if os.path.isfile(f) and f.startswith('c_') and f.endswith('.mol2'): mol = remove_SiH3(rdk.MolFromMol2File(f)) ref = rdk.MolFromMol2File(file_ref) conf_bestRmsd = None iso_matches_iter = getGraphIsoIter(mol, ref) for i, m in enumerate(iso_matches_iter): atomMap = [] for k, v in m.iteritems(): atomMap.append([k, v]) rmsd = chm.AlignMol(mol, ref, 0, 0, atomMap) w = rdk.PDBWriter(str(i) + f.replace('.mol2', '.pdb')) w.write(mol) w.close() if conf_bestRmsd is None or rmsd < conf_bestRmsd: conf_bestRmsd = rmsd print f, conf_bestRmsd if best_rmsd is None or conf_bestRmsd < best_rmsd: best_f = f best_rmsd = conf_bestRmsd return best_rmsd, best_f
def overlay(mol1, mol2): print(len(mol1.GetAtoms()), mol1.GetNumConformers()) print(len(mol2.GetAtoms()), mol2.GetNumConformers()) Chem.SanitizeMol(mol1) Chem.SanitizeMol(mol2) assert (Chem.MolToSmiles(mol1) == Chem.MolToSmiles(mol2)) return AllChem.AlignMol(mol1, mol2, prbCid=0, refCid=0, maxIters=1000)
def MCSAlignMolecules(ref_mol, ali_mol): from rdkit import Chem from rdkit.Chem import rdMolAlign from rdkit.Chem import rdFMCS from rdkit.Chem.rdFMCS import FindMCS, AtomCompare, BondCompare ''' Do not sanitize the molecules, RDKit will freak out and give errors And All we want is to do MCSS, we dont care much about health of molecule ''' mol1 = Chem.MolFromMol2File(ref_mol, removeHs=False, sanitize=False) mol2 = Chem.MolFromMol2File(ali_mol, removeHs=False, sanitize=False) _fmcs_params = dict(maximizeBonds=False, threshold=1.0, timeout=60, verbose=False, matchValences=True, ringMatchesRingOnly=True, completeRingsOnly=True, atomCompare=AtomCompare.CompareAny, bondCompare=BondCompare.CompareAny) try: mcs = rdFMCS.FindMCS([mol1, mol2], **_fmcs_params) except ValueError: print( '\n Max Common Substructure calculation \n failed for this molecule!! \n Please be judicious ' ) sys.exit() core = Chem.MolFromSmarts(mcs.smartsString) match1 = mol1.GetSubstructMatch(core) match2 = mol2.GetSubstructMatch(core) from rdkit.Chem import AllChem AllChem.AlignMol(mol2, mol1, atomMap=list(zip(match2, match1))) Chem.MolToMolFile(mol2, 'aligned.mol', kekulize=False) return mol2
def gen_coords_rdmol(rdmol): ref = rdmol.__copy__() conf = rdmol.GetConformer() coordDict = {} unchanged = [] maps = [] # Put known coordinates in coordDict for i in range(rdmol.GetNumAtoms()): pos = conf.GetAtomPosition(i) if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and \ (-0.0001 < pos.z < 0.0001): continue # atom without coordinates coordDict[i] = pos unchanged.append(i) maps.append((i, i)) # compute coordinates for new atoms, keeping known coordinates rms = 1 rs = 1 # repeat embedding and alignment until the rms of mapped atoms is sufficiently small if rdmol.GetNumAtoms() > len(maps): while rms > 0.1: AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs, useBasicKnowledge=True) # align new molecule to original coordinates rms = AllChem.AlignMol(rdmol, ref, atomMap=maps) rs += 1 return unchanged
def overlay_ring_atoms(mol1, mol2, mol2_conf_id): mol1_atoms = get_ring_atoms(mol1) mol2_atoms = get_ring_atoms(mol2) return AllChem.AlignMol(mol1, mol2, prbCid=0, refCid=mol2_conf_id, atomMap=list(zip(mol1_atoms, mol2_atoms)), maxIters=1000)
def add_unit(self, unit: MonomerUnit, unit_tags: list = [], polymer_tags: list = [], replace_polymer_atoms: bool = True, minimize: bool = True): # TODO: slow. Can I replace this with try/except? if not unit_tags or not polymer_tags or not self.n_atoms: self.add_unit_only(unit) return monomer_tags = utils.asiterable(unit_tags) polymer_tags = utils.asiterable(polymer_tags) n_mtags = len(monomer_tags) err = "Must provide same number of tags for monomer and polymer" assert len(polymer_tags) == n_mtags, err replace_polymer_atoms = utils.asiterable(replace_polymer_atoms) if (len(replace_polymer_atoms) != n_mtags and len(replace_polymer_atoms) == 1): replace_polymer_atoms = replace_polymer_atoms * n_mtags m_indices = [] p_indices = [] del_rep_atoms = [] for mtag, ptag, rep in zip(monomer_tags, polymer_tags, replace_polymer_atoms): m_indices.extend(mtag.indices) p_indices.extend(ptag.indices) rep = utils.asiterable(rep) if len(rep) != len(mtag.atoms) and len(rep) == 1: rep = rep * len(mtag.atoms) for m, p, r in zip(mtag.atoms, ptag.atoms, rep): atoms = [p, m] if not r: atoms = atoms[::-1] del_rep_atoms.append(atoms) # great atom_map = tuple(zip(map(int, m_indices), map(int, p_indices))) AllChem.AlignMol(unit.rdmol, self.rdmol, 0, 0, atomMap=atom_map) self.add_unit_only(unit) del_rep_atoms = sorted(del_rep_atoms, key=lambda x: x[0].index) delete_atoms = [x[0] for x in del_rep_atoms] self.remove_params_within_atoms(delete_atoms) for to_delete, to_replace in del_rep_atoms[::-1]: self.update_atom(to_delete, to_replace) self.remove_atoms(delete_atoms[::-1]) self.clean()
def align_mols_mcs_all(system_pdbs, align_to=0): ##Load mols mols = [Chem.MolFromPDBFile(pdb, removeHs=False) for pdb in system_pdbs] num_states = len(system_pdbs) ##Align with mcs ref = mols[align_to] print("ref:\t", align_to, os.path.basename(system_pdbs[align_to])) for mol2ID,mv in enumerate(mols): if(mol2ID==align_to): continue print("move:\t", mol2ID, os.path.basename(system_pdbs[mol2ID])) if(mol2ID == 4): mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True) elif(mol2ID == 10): ref = mols[3] mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True,ringCompare=rdFMCS.RingCompare.PermissiveRingFusion, atomCompare=rdFMCS.AtomCompare.CompareAny) elif(mol2ID == 12): ref = mols[10] mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True) else: ref = mols[align_to] mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom) smart = mcs.smartsString #"[#6&R]1-&@[#6&R](-&!@[#1&!R])-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]" #mcs.smartsString patt = Chem.MolFromSmarts(smart) # smartsString print("patternMol: ", mcs.smartsString) refMatch = ref.GetSubstructMatch(patt) print("refMatch:\t", refMatch) mvMatch = mv.GetSubstructMatch(patt) print("mvMatch:\t", mvMatch) try: AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch)),reflect=True) except Exception as err: raise err ##write out out_dir = "align" if (not os.path.exists(out_dir)): os.mkdir(out_dir) path_prefix = out_dir + "/aligned_" aligned_pdb_paths = [] for mol1ID, in_pdb in enumerate(system_pdbs): base_name = os.path.basename(in_pdb) tmp_out = path_prefix + base_name Chem.MolToPDBFile(mols[mol1ID], tmp_out) aligned_pdb_paths.append(tmp_out) return aligned_pdb_paths
def bb_distortion(self, bb_conformers=None, conformer=-1): """ Rmsd difference of building blocks before and after assembly. The function looks at each building block in the macromolecule and calculates the rmsd between the "free" version and the one present in the macromolecule. The mean of these rmsds is returned. Atoms which form the functional group of the building blocks and hydrogens are excluded from the calculation. Parameters ---------- bb_conformers : :class:`list` of :class:`int` The ids of building block conformers to use. 1 id for each building block, in an order corresponding to :attr:`building_blocks`. If ``None``, all conformer ids default to ``-1``. conformer : :class:`int`, optional The id of the conformer to use. Returns ------- :class:`float` The mean rmsd of the macromole's building blocks to their "free" counterparts. """ if bb_conformers is None: bb_conformers = [-1 for _ in range(len(self.building_blocks))] # Go through each of the building blocks. For each building # block get the core. Get the corrospending cores in the # macromolecules and add the rmsd to the sum. Increment the # count to calculate the mean later. rmsd = 0 n = 0 for i, bb in enumerate(self.building_blocks): free = bb.core() am = [(x, x) for x in range(free.GetNumAtoms())] for frag in self.building_block_cores(i): rmsd += rdkit.AlignMol(free, frag, bb_conformers[i], conformer, atomMap=am) n += 1 return rmsd / n
def evaluate(self, mol, energies, opt_mol, opt_energies, min_energy): """ Determines if the conformers on mol are accepted in the final set of conformers or are rejected based on energy difference from the minimum energy conformer and whether conformers are greater than the RMSD threshold apart from each other. In the latter case, if they are not, then the lowest energy conformer out of the two is kept. Args: mol (RDKit Mol): The molecule containing the candidate conformers. energies (list): The list of energies of the candidate conformers. opt_mol (RDKit Mol): The molecule containing the final set of conformers. opt_energies (list): The energies of the final set of conformers. min_energy (int): The lowest energy in the final set of conformers. """ for i, macro_conf in enumerate(mol.GetConformers()): # skip if energy is too high if energies[i] > min_energy + self.energy_diff: continue similar_confs = [] for opt_conf in opt_mol.GetConformers(): # remove conformer if energy is too high if opt_energies[ opt_conf.GetId()] > min_energy + self.energy_diff: del opt_energies[opt_conf.GetId()] opt_mol.RemoveConformer(opt_conf.GetId()) continue rmsd = AllChem.AlignMol(mol, opt_mol, macro_conf.GetId(), opt_conf.GetId(), maxIters=self.max_iters) if rmsd < self.min_rmsd: similar_confs.append(opt_conf.GetId()) similar_energies = [ opt_energies[conf_id] for conf_id in similar_confs ] similar_energies.append(energies[i]) if np.argmin(similar_energies) == len(similar_energies) - 1: for conf_id in similar_confs: opt_mol.RemoveConformer(conf_id) del opt_energies[conf_id] conf_id = opt_mol.AddConformer(macro_conf, assignId=True) opt_energies[conf_id] = energies[i]
def combAlignedOptLigHCore(core,lig,list): """Aligns ligand carboxylate to core carboxylate. Identifies which atom will need to be connected across ligand/core. Deletes ligand carboxylate. Combines ligand and core molecules to one molecule. """ atomnums=lig.GetSubstructMatch(Chem.MolFromSmarts('[CX3](=O)[OX1H0-,OX2H1]')) print "Alignment result: ", AllChem.AlignMol(lig,core,atomMap=zip(atomnums,list)) connect_atom=lig.GetAtomWithIdx(lig.GetSubstructMatch(Chem.MolFromSmarts('*[CX3](=O)[OX1H0-,OX2H1]'))[0]) connect_atom.SetProp('connect','Y') trunc=Chem.DeleteSubstructs(lig,Chem.MolFromSmarts('[CX3](=O)[OX2H1][H]')) if trunc.GetNumAtoms() == lig.GetNumAtoms(): trunc=Chem.DeleteSubstructs(lig,Chem.MolFromSmarts('[CX3](=O)[OX1H0-]')) allatoms=trunc.GetAtoms() combo=Chem.CombineMols(core,trunc) return combo
def EmbedAlignConstrainedScore(prbMol,refMols,core,prbNumConfs=10,refNumConfs=10,prbCharge=None,refCharges=None): """Calculates a constrained alignment based on a common pattern in the input molecules. Caution: Will fail if the pattern does not match. Calculates a shape and electrostatic potential similarity of the best alignment. :param prbMol: RDKit molecule for which shape and electrostatic similarities are calculated. :param refMol: RDKit molecule or list of RDKit molecules serving as references. :param core: Common pattern for the constrained embedding as embedded RDKit molecule :param prbNumConfs: Number of conformers to create for the probe molecule. A higher number creates better alignments but slows down the algorithm. :param refNumConfs: Number of conformers to create for each reference molecule. A higher number creates better alignments but slows down the algorithm. :param prbCharge: (optional) List or array of partial charges of the probe molecule. If not given, RDKit Gasteiger Charges are used as default. :param refCharge: (optional) List of list or 2D array of partial charges of the reference molecules. If not given, RDKit Gasteiger Charges are used as default. :return: shape similarity and ESP similarity. """ if type(refMols) != list: refMols=[refMols] if refCharges == None: refCharges=[None]*len(refMols) prbMol=ConstrainedEmbedMultipleConfs(prbMol, core, numConfs=prbNumConfs) for refMol in refMols: refMol=ConstrainedEmbedMultipleConfs(refMol, core, numConfs=refNumConfs) prbMatch = prbMol.GetSubstructMatch(core) allShapeDist = [] allEspSim = [] for idx,refMol in enumerate(refMols): shapeDist=1 prbBestConf=0 refBestConf=0 refMatch = refMol.GetSubstructMatch(core) for i in range(refNumConfs): for j in range(prbNumConfs): AllChem.AlignMol(prbMol,refMol,atomMap=list(zip(prbMatch,refMatch)),prbCid=j,refCid=i) shape = AllChem.ShapeTanimotoDist(prbMol,refMol,confId1=j,confId2=i) if shape<shapeDist: shapeDist=shape prbBestConf=j refBestConf=i espSim=GetEspSim(prbMol,refMol,prbBestConf,refBestConf,prbCharge,refCharges[idx]) allShapeDist.append(1-shapeDist) allEspSim.append(espSim) return allShapeDist,allEspSim
def align_calphas(probe, reference): ref_calphas = find_calphas(reference) print('Found', len(ref_calphas), 'CAs') prb_calphas = find_calphas(probe) print('Found', len(prb_calphas), 'CAs') atom_map = [] for resnum, idx in prb_calphas.items(): if resnum in ref_calphas: atom_map.append((idx, ref_calphas[resnum])) else: print('WARNING: residue', resnum, 'not found in reference') print('Mapped', len(atom_map), 'atoms') rmsd = AllChem.AlignMol(probe, reference, atomMap=atom_map) print('RMSD:', rmsd)
def _align_conformers(self, mol, templates): def _get_maps(mol, template): matches = mol.GetSubstructMatches(template) match_template = template.GetSubstructMatch(template) return [(template, list(zip(m, match_template))) for m in matches] template_maps = [_get_maps(mol, template) for template in templates] template_maps = [item for list in template_maps for item in list] template_maps = sorted(template_maps, key=lambda x: len(x[1]), reverse=True) result = Chem.Mol(mol) result.RemoveAllConformers() for conformer in mol.GetConformers(): candidate_conformers = [] for template, constraint_map in template_maps: score = AllChem.AlignMol(mol, template, prbCid=conformer.GetId(), atomMap=constraint_map) if score <= self.align_rms_thresh: newConfId = result.AddConformer(conformer, assignId=True) candidate_conformers.append(newConfId) accepted_conformers = candidate_conformers[:1] for candidate_confId in candidate_conformers[1:]: rmsds = [ AllChem.GetConformerRMS(result, candidate_confId, accepted_confId, prealigned=True) for accepted_confId in accepted_conformers ] if min(rmsds) > self.prune_rms_thresh: accepted_conformers.append(candidate_confId) else: result.RemoveConformer(candidate_confId) return result
def align_mols_mcs(system_pdbs, align_to=0): ##Load mols mols = [Chem.MolFromPDBFile(pdb, removeHs=False) for pdb in system_pdbs] num_states = len(system_pdbs) ##Align with mcs ref = mols[align_to] print("ref:\t", align_to, os.path.basename(system_pdbs[align_to])) for mol2ID,mv in enumerate(mols): if(mol2ID==align_to): continue print("move:\t", mol2ID, os.path.basename(system_pdbs[mol2ID])) mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True) smart = mcs.smartsString patt = Chem.MolFromSmarts(smart) # smartsString print("patternMol: ", mcs.smartsString) refMatch = ref.GetSubstructMatch(patt) print("refMatch:\t", refMatch) mvMatch = mv.GetSubstructMatch(patt) print("mvMatch:\t", mvMatch) try: AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch))) except Exception as err: raise err ##write out out_dir = "align" if (not os.path.exists(out_dir)): os.mkdir(out_dir) path_prefix = out_dir + "/aligned_" aligned_pdb_paths = [] for mol1ID, in_pdb in enumerate(system_pdbs): base_name = os.path.basename(in_pdb) tmp_out = path_prefix + base_name Chem.MolToPDBFile(mols[mol1ID], tmp_out) aligned_pdb_paths.append(tmp_out) return aligned_pdb_paths
def make_substructure_molecule(template_mol_path, query_mol_path): """ :param template_mol: path to the prepared template molecule (starting fragment) :param query_mol: path to the prepared querty molecule (suggested followup) :return: string representation fo the MCS with 3D coordinates """ #template_mol = [x for x in Chem.SDMolSupplier(template_mol_path, removeHs=False) if x is not None][0] template_mol_ccdc = MoleculeReader(template_mol_path)[0] template_mol = rdkitize_ccdc_mol(template_mol_ccdc) #query_mol = [y for y in Chem.SDMolSupplier(query_mol_path, removeHs=False, sanitize=False) if y is not None][0] #Chem.SanitizeMol(query_mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE) query_mol_ccdc = MoleculeReader(query_mol_path)[0] query_mol = rdkitize_ccdc_mol(query_mol_ccdc) print(query_mol) mcsResult=rdFMCS.FindMCS([template_mol, query_mol],threshold=0.9, completeRingsOnly=True) #find the maximum common substructure if mcsResult.smartsString and len(mcsResult.smartsString)>0 : patt = Chem.MolFromSmarts(mcsResult.smartsString,mergeHs=True) # keep only the core of the reference molecule ref=AllChem.ReplaceSidechains(template_mol, patt) if ref: core=AllChem.DeleteSubstructs(ref,Chem.MolFromSmiles('*')) core.UpdatePropertyCache() try: return Chem.MolToMolBlock(core) except Exception as e: t_match = template_mol.GetSubstructMatch(patt) print(e) Chem.SanitizeMol(patt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE) cmap = {i:template_mol.GetConformer().GetAtomPosition(t_match[i]) for i in range(len(t_match))} GetFF=lambda x,confId=-1:AllChem.MMFFGetMoleculeForceField(x,AllChem.MMFFGetMoleculeProperties(x),confId=confId) n = AllChem.EmbedMolecule(patt,randomSeed=0xf00d,coordMap=cmap, maxAttempts=1000) AllChem.UFFOptimizeMolecule(patt) AllChem.AlignMol(patt,template_mol,atomMap = list(zip(range(len(t_match)),t_match))) return Chem.MolToMolBlock(patt)
def alignBySiH3Core(file_mol, file_ref): mol = rdk.MolFromMol2File(file_mol) ref = rdk.MolFromMol2File(file_ref) mol_core, c_m = findCore(mol) ref_core, c_r = findCore(ref) print mol_core, ref_core conf_bestRmsd = None iso_matches_iter = getGraphIsoIter(mol_core, ref_core) for i, m in enumerate(iso_matches_iter): atomMap = [] for k, v in m.iteritems(): atomMap.append([k, v]) try: rmsd = chm.AlignMol(mol, ref, 0, 0, atomMap) except: continue w = rdk.PDBWriter(str(i) + file_mol.replace('.mol2', '.pdb')) w.write(mol) w.close() if conf_bestRmsd is None or rmsd < conf_bestRmsd: conf_bestRmsd = rmsd print conf_bestRmsd
def find_conformations(mol, core, match, coordMap, useTethers=True, coreConfId=-1, randomseed=2342, max_iters=200, opt=None): """Function to generate conformations. Heavily based on ConstrainedEmbed in the RDKit Uses a forcefield (default MMFF) to generate conformations constrained to a core smiles. Does energy minimisation. Calculates the RMSD Takes an RDKit molecule and a core. Options are to useTethers, coreConfId - the conformer ID to use, randomseed - the randomseed to use, maxIts - the maximum number of iterations for the minimisation, opt - the forcefield to use. Returns an RDKit molecule """ ci = AllChem.EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, useRandomCoords=True) if ci < 0: print Chem.MolToMolBlock(mol) print Chem.MolToMolBlock(core) #raise ValueError, 'Could not embed molecule.' print "COULD NOT EMBED" return None # Now make a map of the points to tether algMap = [(j, i) for i, j in enumerate(match)] if not useTethers: # clean up the conformation if opt is "MMFF": try: mmff_mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol), sanitize=False, removeHs=False) myff = Chem.rdForceFieldHelpers.SetupMMFFForceField( mmff_mol, mmffVerbosity=0) ff = AllChem.MMFFGetMoleculeForceField(mol, myff, confId=0) # Because the newer version of RDKit has this difference except AttributeError: ff = AllChem.MMFFGetMoleculeForceField( mol, AllChem.MMFFGetMoleculeProperties(mol)) else: ff = AllChem.UFFGetMoleculeForceField(mol, confId=0) for i, idxI in enumerate(match): for j in range(i + 1, len(match)): idxJ = match[j] d = coordMap[idxI].Distance(coordMap[idxJ]) ff.AddDistanceConstraint(idxI, idxJ, d, d, 300.) ff.Initialize() n = 4 more = ff.Minimize() while more and n: more = ff.Minimize() n -= 1 # rotate the embedded conformation onto the core: rms = AllChem.AlignMol(mol, core, atomMap=algMap) else: # rotate the embedded conformation onto the core: rms = AllChem.AlignMol(mol, core, atomMap=algMap) if opt is "MMFF": try: mmff_mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol), sanitize=False, removeHs=False) myff = Chem.rdForceFieldHelpers.SetupMMFFForceField( mmff_mol, mmffVerbosity=0) ff = AllChem.MMFFGetMoleculeForceField(mol, myff, confId=0) # Because the newer version of RDKit has this difference except AttributeError: ff = AllChem.MMFFGetMoleculeForceField( mol, AllChem.MMFFGetMoleculeProperties(mol)) else: ff = AllChem.UFFGetMoleculeForceField(mol, confId=0) conf = core.GetConformer() if ff is None: sys.stderr.write("FORCEFIELD IS NONE\n" + Chem.MolToSmiles(mol)) return None for i in range(core.GetNumAtoms()): p = conf.GetAtomPosition(i) pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 ff.AddDistanceConstraint(pIdx, match[i], 0, 0.0, 300.) ff.Initialize() # Do an energy minimisation # Forcefield parameters taken from Greg Landrum more = ff.Minimize(maxIts=max_iters, energyTol=1e-4, forceTol=1e-3) # Four extra steps of minimisation -> as prescribed in Greg's method n = 4 while more and n: more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) n -= 1 # Realign rms = AllChem.AlignMol(mol, core, atomMap=algMap) mol.SetProp('EmbedRMS', str(rms)) return (mol, ff.CalcEnergy())
def ConstrainedEmbed_Slack(mol, core, useTethers=True, tdist=0.25, coreConfId=-1, randomseed=2342, getForceField=UFFGetMoleculeForceField, **kwargs): """ generates an embedding of a molecule where part of the molecule is constrained to have particular coordinates Arguments - mol: the molecule to embed - core: the molecule to use as a source of constraints - useTethers: (optional) if True, the final conformation will be optimized subject to a series of extra forces that pull the matching atoms to the positions of the core atoms. Otherwise simple distance constraints based on the core atoms will be used in the optimization. - tdist: (optional) if useTethers==True, a distance constraint between the atoms and the positions of the core atoms during the optimization procedure. - coreConfId: (optional) id of the core conformation to use - randomSeed: (optional) seed for the random number generator """ match = mol.GetSubstructMatch(core) if not match: raise ValueError("molecule doesn't match the core") coordMap = {} coreConf = core.GetConformer(coreConfId) for i, idxI in enumerate(match): corePtI = coreConf.GetAtomPosition(i) coordMap[idxI] = corePtI ci = AllChem.EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs) if ci < 0: raise ValueError('Could not embed molecule.') algMap = [(j, i) for i, j in enumerate(match)] if not useTethers: # clean up the conformation ff = getForceField(mol, confId=0) for i, idxI in enumerate(match): for j in range(i + 1, len(match)): idxJ = match[j] d = coordMap[idxI].Distance(coordMap[idxJ]) ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.) ff.Initialize() n = 4 more = ff.Minimize() while more and n: more = ff.Minimize() n -= 1 # rotate the embedded conformation onto the core: rms = AllChem.AlignMol(mol, core, atomMap=algMap) else: # rotate the embedded conformation onto the core: rms = AllChem.AlignMol(mol, core, atomMap=algMap) ff = getForceField(mol, confId=0) conf = core.GetConformer() for i in range(core.GetNumAtoms()): p = conf.GetAtomPosition(i) pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 ff.AddDistanceConstraint(pIdx, match[i], 0, tdist, 100.) ff.Initialize() n = 4 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) while more and n: more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) n -= 1 # realign rms = AllChem.AlignMol(mol, core, atomMap=algMap) mol.SetProp('EmbedRMS', str(rms)) return mol
mol_init_1=Chem.AddHs(mol_ref) # remove all conformers from molecule mol_init_1.RemoveAllConformers() AllChem.EmbedMultipleConfs(mol_init_1,args.num_parallel_samples,\ numThreads=args.num_threads) try: ## baseline force field part with UFF mol_baseUFF = copy.deepcopy(mol_init_1) AllChem.UFFOptimizeMoleculeConfs(mol_baseUFF, numThreads=args.num_threads, maxIters=200) mol_baseUFF=Chem.RemoveHs(mol_baseUFF) RMSlist_UFF = [] for c in mol_baseUFF.GetConformers(): c_id = c.GetId() RMS_UFF = AllChem.AlignMol(mol_baseUFF, mol_ref, prbCid=c_id, refCid=0) RMSlist_UFF.append(RMS_UFF) ttest_uff.extend(RMSlist_UFF) except: continue try: ## baseline force field part with MMFF mol_baseMMFF = copy.deepcopy(mol_init_1) AllChem.MMFFOptimizeMoleculeConfs(mol_baseMMFF, numThreads=args.num_threads, maxIters=200) mol_baseMMFF=Chem.RemoveHs(mol_baseMMFF) RMSlist_MMFF = [] for c in mol_baseMMFF.GetConformers(): c_id = c.GetId() RMS_MMFF = AllChem.AlignMol(mol_baseMMFF, mol_ref, prbCid=c_id, refCid=0)
def get_conformations(rdkit_mol, nconfs=1, name=None, forcefield=None, rms=-1): """ Generates 3D conformation(s) for an rdkit_mol :parameter rdkit_mol: RDKit molecule :type rdkit_mol: rdkit.Chem.Mol :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ def MMFFenergy(cid): ff = AllChem.MMFFGetMoleculeForceField( rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy def UFFenergy(cid): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy if name: rdkit_mol.SetProp('name', name) cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1)) if forcefield: optimize_molecule, energy = { 'uff': [AllChem.UFFOptimizeMolecule, UFFenergy], 'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy], }[forcefield] for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) if rms > 0: keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) except: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
conf.SetAtomPosition(i, coords[i].tolist()) mol_init_hs.AddConformer(conf) mol_init_hs = Chem.AddHs(mol_init_hs, addCoords=True) mol_init_embed = copy.deepcopy(mol_init_hs) # some weird issue # Can't kekulize mol. Unkekulized atoms: 6 7 8 try: mol_init_embed = Chem.RemoveHs(mol_init_embed) except: logger.info('Cant kekulize mol issue') continue RMS_EMBED = AllChem.AlignMol(mol_init_embed, mol_ref) pred_embed.append(mol_init_embed) ttest_embed.append(RMS_EMBED) # run MMFF/UFF on top of it try: ## baseline force field part with UFF mol_baseUFF = copy.deepcopy(mol_init_hs) uff_out = AllChem.UFFOptimizeMolecule(mol_baseUFF, maxIters=args.max_iters) mol_baseUFF = Chem.RemoveHs(mol_baseUFF) RMS_UFF = AllChem.AlignMol(mol_baseUFF, mol_ref) pred_uff.append(mol_baseUFF) ttest_uff.append(RMS_UFF) except: continue
def align_confs(mol): for conf in mol.GetConformers(): AllChem.AlignMol(mol, mol, prbCid=conf.GetId(), refCid=0)
def get_conformations(mol, nconfs=1, name=None, forcefield=None, rms=-1, enforceChirality=False): """ Generates 3D conformation(s) for an rdkit_mol or a PLAMS Molecule :parameter mol: RDKit or PLAMS Molecule :type mol: rdkit.Chem.Mol or |Molecule| :parameter int nconfs: Number of conformers to be generated :parameter str name: A name for the molecule :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry optimization and ranking of comformations. The default value None results in skipping of the geometry optimization step :parameter float rms: Root Mean Square deviation threshold for removing similar/equivalent conformations. :parameter bool enforceChirality: Enforce the correct chirality if chiral centers are present :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1 :rtype: |Molecule| or list of PLAMS Molecules """ if isinstance(mol, Molecule): rdkit_mol = to_rdmol(mol, assignChirality=enforceChirality) else: rdkit_mol = mol def MMFFenergy(cid): ff = AllChem.MMFFGetMoleculeForceField( rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy def UFFenergy(cid): ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid) try: energy = ff.CalcEnergy() except: msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \ "\nNo geometry optimization was performed." warn(msg) energy = 1e9 return energy if name: rdkit_mol.SetProp('name', name) try: cids = list( AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1, enforceChirality=enforceChirality)) except: # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems cids = list( AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1, useRandomCoords=True, enforceChirality=enforceChirality)) if forcefield: # Select the forcefield (UFF or MMFF) optimize_molecule, energy = { 'uff': [AllChem.UFFOptimizeMolecule, UFFenergy], 'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy], }[forcefield] # Optimize and sort conformations for cid in cids: optimize_molecule(rdkit_mol, confId=cid) cids.sort(key=energy) # Remove duplicate conformations based on RMS if rms > 0: keep = [cids[0]] for cid in cids[1:]: for idx in keep: try: r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx) except: r = rms + 1 message = "Alignment failed in multiple conformation generation: " message += Chem.MolToSmiles(rdkit_mol) message += "\nAssuming different conformations." warn(message) if r < rms: break else: keep.append(cid) cids = keep if nconfs == 1: return from_rdmol(rdkit_mol) else: return [from_rdmol(rdkit_mol, cid) for cid in cids]
def compare_confomer_generator_and_trajectory_minimum_structures( results_path: str, name: str, base: str, tautomer_idx: int, thinning: int = 100 ): assert tautomer_idx == 1 or tautomer_idx == 2 ani_results = pickle.load(open(f"{results_path}/ani_mm_results.pickle", "rb")) exp_results = pickle.load(open(f"{results_path}/exp_results.pickle", "rb")) # generate the tautomer object t1_smiles = exp_results[name]["t1-smiles"] t2_smiles = exp_results[name]["t2-smiles"] t_type, tautomers, flipped = generate_tautomer_class_stereobond_aware( name, t1_smiles, t2_smiles, nr_of_conformations=1, enforceChirality=True ) tautomer = tautomers[0] print(f"Flipped: {flipped}") tautomer.perform_tautomer_transformation() tautomer_mol = prune_conformers( ani_results[name]["t1-confs"], ani_results[name]["t1-energies"], rmsd_threshold=0.1, ) print(len(tautomer_mol[1])) traj_path = ( f"{base}/{name}/{name}_lambda_{tautomer_idx-1}.0000_kappa_0.0000_in_vacuum.dcd" ) pdb_path = f"{base}/{name}/{name}_0.pdb" # load trajectory, remove dummy atom traj = md.load(traj_path, top=pdb_path) atom_idx = [a.index for a in traj.topology.atoms] if (tautomer_idx - 1) == 1: atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_0)) else: atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_1)) traj = traj.atom_slice(atom_indices=atom_idx) # save pdb without dummy atom tautomer_pdb = f"{base}/{name}/{name}_without_dummy_{tautomer_idx}.pdb" traj[0].save_pdb(tautomer_pdb) # generate rdkit mol object with the same atom indizes as the trajectory but without the dummy atom mol = Chem.MolFromPDBFile(tautomer_pdb, removeHs=False) # remove conf of pdb mol.RemoveAllConformers() # generate energy function, use atom symbols of rdkti mol from .ani import ANI_force_and_energy, ANI1ccx model = ANI1ccx() energy_function = ANI_force_and_energy( model=model, atoms=[a.GetSymbol() for a in mol.GetAtoms()], mol=None ) # take every 100th conformation and minimize it using ANI1 minimized_traj = [] # store min conformations in here for idx, conf in enumerate(traj[::thinning]): print(f"{idx}/{len(traj[::thinning])}") c = (conf.xyz[0]) * unit.nanometer min_conf = energy_function.minimize(c)[ 0 ] # only real atoms, therefor lambda not needed minimized_traj.append(min_conf) new_conf = _generate_conformer(min_conf) # add the conformation to the rdkit mol object mol.AddConformer(new_conf, assignId=True) # generate mdtraj object with minimized confs minimum_traj = md.Trajectory( np.array([v.value_in_unit(unit.nanometer) for v in minimized_traj]), traj.topology, ) # generate reference_mol reference = prune_conformers( ani_results[name][f"t{tautomer_idx}-confs"], ani_results[name][f"t{tautomer_idx}-energies"], rmsd_threshold=0.1, ) # remove most hydrogens reference_mol = _remove_hydrogens(copy.deepcopy(reference[0])) compare_mol = _remove_hydrogens(copy.deepcopy(mol)) # find atom indices that are compared for RMSD sub_m = rdFMCS.FindMCS( [reference_mol, compare_mol], bondCompare=Chem.rdFMCS.BondCompare.CompareOrder.CompareAny, maximizeBonds=False, ) mcsp = Chem.MolFromSmarts(sub_m.smartsString, False) # the order of the substructure lists are the same for both # substructure matches => substructure_idx_m1[i] = substructure_idx_m2[i] substructure_idx_reference = reference_mol.GetSubstructMatches(mcsp, uniquify=False) substructure_idx_compare = compare_mol.GetSubstructMatches(mcsp, uniquify=False) # generate rmsd matrix rmsd = np.zeros( (reference_mol.GetNumConformers(), mol.GetNumConformers()), dtype=float ) # save clusters got_hit = np.zeros(reference_mol.GetNumConformers(), dtype=int) # atom mapping from itertools import combinations for nr_of_mappings, (e1, e2) in enumerate( combinations(substructure_idx_reference + substructure_idx_compare, 2) ): atom_mapping = [(a1, a2) for a1, a2 in zip(e1, e2)] # get rmsd matrix with a given set of atom mapping # update rmsd matrix whenever lower RMSD appears for i in range(len(reference_mol.GetConformers())): for j in range(len(compare_mol.GetConformers())): proposed_rmsd = AllChem.AlignMol( reference_mol, compare_mol, i, j, atomMap=atom_mapping ) # test if this is optimal atom mapping if nr_of_mappings == 0: rmsd[i, j] = proposed_rmsd else: rmsd[i, j] = min(rmsd[i, j], proposed_rmsd) for i in range(len(reference_mol.GetConformers())): for j in range(len(compare_mol.GetConformers())): if rmsd[i, j] <= 0.1: got_hit[i] += 1 sns.heatmap(rmsd) plt.show() print(f"Nr of clusters: {len(got_hit)}") print( f"Nr of conformations part of one cluster: {sum(got_hit)}/{mol.GetNumConformers()}" ) print(f"Clusters present: {got_hit}") AllChem.AlignMolConformers(reference_mol) AllChem.AlignMolConformers(compare_mol) return compare_mol, minimum_traj, reference_mol, reference[1]
def GenReaction(EdName, Pro1Name, Pro2Name): EdFile = EdName + ".mol" Pro1File = Pro1Name + ".mol" Pro2File = Pro2Name + ".mol" edmol = Chem.MolFromMolFile(EdFile, sanitize=True, removeHs=False) pro1mol = Chem.MolFromMolFile(Pro1File, sanitize=True, removeHs=False) pro2mol = Chem.MolFromMolFile(Pro2File, sanitize=True, removeHs=False) print(EdFile) print(Pro1File) print(Pro2File) # print(Chem.MolToMolBlock(edmol)) # print(Chem.MolToMolBlock(pro1mol)) # print(Chem.MolToMolBlock(pro2mol)) xyzed, atomed, charge, spin = io.ReadMol(EdName) xyzpro1, atompro1, charge, spin = io.ReadMol(Pro1Name) xyzpro2, atompro2, charge, spin = io.ReadMol(Pro2Name) xyzed_new = [] atomed_new = [] # io.PrintXYZ('Educt',atomed,xyzed) # Align first product to molecule bestpair = [] bestrmsd = 100 ifit = 0 while ifit < 10000: # repeatedly tests random atom-pairlists. probably not the smartest way ifit += 1 pairlist1 = range(len(atompro1)) atomlist = range(len(atompro1)) random.shuffle(atomlist) flags = [False] * len(atomed) weightlist = [1] * len(atompro1) # print(atomlist) for j in atomlist: for i in range(len(atomed)): if (atomed[i] != atompro1[j]): continue elif not flags[i]: pairlist1[j] = [j, i] flags[i] = True break # print(pairlist1) if len(pairlist1) == 1: # if the fragment is a single atom rd-kits AlignMol fails. Simply move atom to corresponding location xyzpro1[0] = xyzed[pairlist1[0][1]] xyzed_new.append(xyzed[pairlist1[0][1]]) atomed_new.append(atomed[pairlist1[0][1]]) ifit = 1000000 else: for i in range(len(pairlist1)): weightlist[i] = pro1mol.GetAtomWithIdx(i).GetAtomicNum()**2 rmsd = AllChem.AlignMol(pro1mol, edmol, atomMap=pairlist1, weights=weightlist) # print(rmsd) if rmsd < bestrmsd: bestrmsd = rmsd bestpair = pairlist1 if len(pairlist1) > 1: # realign with best pairlist. also, write to new educt geo pairlist1 = bestpair for i in range(len(pairlist1)): weightlist[i] = pro1mol.GetAtomWithIdx(i).GetAtomicNum()**2 xyzed_new.append(xyzed[pairlist1[i][1]]) atomed_new.append(atomed[pairlist1[i][1]]) rmsd = AllChem.AlignMol(pro1mol, edmol, atomMap=pairlist1, weights=weightlist) print(rmsd) print(Chem.MolToMolBlock(pro1mol), file=open('pro1mol.mol', 'w+')) xyzpro1, atompro1, charge, spin = io.ReadMol('pro1mol') # io.PrintXYZ('Product1',atompro1,xyzpro1) # print(Chem.MolToMolBlock(edmol),file=open('edmol.mol','w+')) # print(Chem.MolToMolBlock(pro1mol),file=open('pro1mol.mol','w+')) # Align second product to remaining framework. Same as above bestpair = [] bestrmsd = 100 ifit = 0 while ifit < 10000: ifit += 1 pairlist2 = range(len(atompro2)) atomlist = range(len(atompro2)) random.shuffle(atomlist) flags = [False] * len(atomed) weightlist = [1] * len(atompro2) # print(atomlist) for j in atomlist: for i in range(len(atomed)): if (atomed[i] != atompro2[j]): continue elif any(x[1] == i for x in pairlist1): # skip atoms already used in prev step. continue elif not flags[i]: pairlist2[j] = [j, i] flags[i] = True break # print(pairlist1) if len(pairlist2) == 1: xyzpro2[0] = xyzed[pairlist2[0][1]] xyzed_new.append(xyzed[pairlist2[0][1]]) atomed_new.append(atomed[pairlist2[0][1]]) ifit = 1000000 else: for i in range(len(pairlist2)): weightlist[i] = pro2mol.GetAtomWithIdx(i).GetAtomicNum()**2 rmsd = AllChem.AlignMol(pro2mol, edmol, atomMap=pairlist2, weights=weightlist) # print(rmsd) if rmsd < bestrmsd: bestrmsd = rmsd bestpair = pairlist2 if len(pairlist2) > 1: pairlist2 = bestpair for i in range(len(pairlist2)): weightlist[i] = pro2mol.GetAtomWithIdx(i).GetAtomicNum()**2 xyzed_new.append(xyzed[pairlist2[i][1]]) atomed_new.append(atomed[pairlist2[i][1]]) rmsd = AllChem.AlignMol(pro2mol, edmol, atomMap=pairlist2, weights=weightlist) print(rmsd) print(Chem.MolToMolBlock(pro2mol), file=open('pro2mol.mol', 'w+')) xyzpro2, atompro2, charge, spin = io.ReadMol('pro2mol') # io.PrintXYZ('Product2',atompro2,xyzpro2) # shift along vector connecting fragments' centers of mass xyzpro1s, xyzpro2s = CoM_shift(atompro1, xyzpro1, atompro2, xyzpro2, 4.0) # merge aligned and shifted product geometries atom_diss, xyz_diss, frags = io.MergeXYZ(atompro1, xyzpro1, atompro2, xyzpro2) atom_shift, xyz_shift, frags = io.MergeXYZ(atompro1, xyzpro1s, atompro2, xyzpro2s) io.PrintXYZ('Reordered Educt', atomed_new, xyzed_new) # io.PrintXYZ('Merged Products',atom_diss,xyz_diss) io.PrintXYZ('Shifted Products', atom_shift, xyz_shift) io.PrintAimsGeo('Reordered Educt', atomed_new, xyzed_new) io.PrintAimsGeo('Shifted Products', atom_shift, xyz_shift) return xyzed, atomed, charge, spin
mv = mols[1] #mcs = rdFMCS.FindMCS([ref, mv], completeRingsOnly=True, matchValences=True, ringMatchesRingOnly=True) # G078 mcs = rdFMCS.FindMCS([ref, mv], completeRingsOnly=True, ringMatchesRingOnly=True) smartsString = mcs.smartsString #from rdkit.Chem import MCS #smartsString = MCS.FindMCS(mols, atomCompare="any").smarts patt = Chem.MolFromSmarts(smartsString) # smartsString refMatch = ref.GetSubstructMatch(patt) mvMatch = mv.GetSubstructMatch(patt) try: AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch))) except Exception as err: print(err.args) pass ##write out out_pdb_path = out_dir + "/" + out_prefix + ".pdb" out_text = "" for mol in mols: out_text += Chem.MolToPDBBlock(mol) file_out = open(out_pdb_path, "w") file_out.write(out_text) file_out.close() ###############################
def EmbedAlignConstrainedScore( prbMol, refMols, core, prbNumConfs=10, refNumConfs=10, prbCharge=[], refCharges=[], metric="carbo", integrate="gauss", partialCharges="gasteiger", renormalize=False, customrange=None, marginMC=10, nMC=1, basisPsi4='3-21G', methodPsi4='scf', gridPsi4=1, ): """Calculates a constrained alignment based on a common pattern in the input molecules. Caution: Will fail if the pattern does not match. Calculates a shape and electrostatic potential similarity of the best alignment. :param prbMol: RDKit molecule for which shape and electrostatic similarities are calculated. :param refMol: RDKit molecule or list of RDKit molecules serving as references. :param core: Common pattern for the constrained embedding as embedded RDKit molecule :param prbNumConfs: Number of conformers to create for the probe molecule. A higher number creates better alignments but slows down the algorithm. :param refNumConfs: Number of conformers to create for each reference molecule. A higher number creates better alignments but slows down the algorithm. :param prbCharge: (optional) List or array of partial charges of the probe molecule. If not given, RDKit Gasteiger Charges are used as default. :param refCharge: (optional) List of list or 2D array of partial charges of the reference molecules. If not given, RDKit Gasteiger Charges are used as default. :param metric: (optional) Similarity metric. :param integrate: (optional) Integration method. :param partialCharges: (optional) Partial charge distribution. :param renormalize: (optional) Boolean whether to renormalize the similarity score to [0:1]. :param customrange: (optional) Custom range to renormalize to, supply as tuple or list of two values (lower bound, upper bound). :param marginMC: (optional) Margin up to which to integrate (added to coordinates plus/minus their vdW radii) if MC integration is utilized. :param nMC: (optional) Number of grid points per 1 Angstrom**3 volume of integration vox if MC integration is utilized. :param basisPsi4: (optional) Basis set for Psi4 calculation. :param methodPsi4: (optional) Method for Psi4 calculation. :param gridPsi4: (optional) Integer grid point density for ESP evaluation for Psi4 calculation. :return: shape similarity and ESP similarity. """ if type(refMols) != list: refMols = [refMols] if refCharges == []: refCharges = [[]] * len(refMols) prbMol = ConstrainedEmbedMultipleConfs(prbMol, core, numConfs=prbNumConfs) for refMol in refMols: refMol = ConstrainedEmbedMultipleConfs(refMol, core, numConfs=refNumConfs) prbMatch = prbMol.GetSubstructMatch(core) allShapeDist = [] allEspSim = [] for idx, refMol in enumerate(refMols): shapeDist = 1 prbBestConf = 0 refBestConf = 0 refMatch = refMol.GetSubstructMatch(core) for i in range(refNumConfs): for j in range(prbNumConfs): AllChem.AlignMol(prbMol, refMol, atomMap=list(zip(prbMatch, refMatch)), prbCid=j, refCid=i) shape = AllChem.ShapeTanimotoDist(prbMol, refMol, confId1=j, confId2=i) if shape < shapeDist: shapeDist = shape prbBestConf = j refBestConf = i espSim = GetEspSim(prbMol, refMol, prbBestConf, refBestConf, prbCharge, refCharges[idx], metric, integrate, partialCharges, renormalize, customrange, marginMC, nMC, basisPsi4, methodPsi4, gridPsi4) allShapeDist.append(1 - shapeDist) allEspSim.append(espSim) return allShapeDist, allEspSim
def optimize_linear_rotamers(self, linear_mol, conf_id, cleaved_atom1, cleaved_atom2, dihedrals): """ Helper function of generate() that generates combinations of dihedrals that are rotated together and determines if the rotations have brought the cleaved atoms to within the distance thresholds. If so the dihedrals are kept and further refinement on those dihedral angles are performed, where the best set of conformers resulting from these manipulations are kept and returned to caller. Args: linear_mol (RDKit Mol): The linear oligomer. conf_id (int): The conformer id of the conformer on the linear oligomer to optimize. dihedrals (dict): The dict of dihedral angles that can be rotated on the linear oligomer. Returns: list: A list of RDKit Mols, each with an optimized conformer. """ mast_mol = deepcopy(linear_mol) mast_mol.RemoveAllConformers() optimized_linear_confs, distances = [], [] linear_conf = linear_mol.GetConformer(conf_id) # generate length 2 combinations for dihedrals that don't contain cleaved atoms and get the resulting # distances between the two cleaved atoms after applying various angles to those dihedrals. Sort the results # based on distance for dihedral1, dihedral2 in combinations(dihedrals['other'], 2): ini_dihedral1 = AllChem.GetDihedralDeg(linear_conf, dihedral1[0], dihedral1[1], dihedral1[2], dihedral1[3]) ini_dihedral2 = AllChem.GetDihedralDeg(linear_conf, dihedral2[0], dihedral2[1], dihedral2[2], dihedral2[3]) dist = calc_distance(linear_conf, cleaved_atom1, cleaved_atom2) distances.append( [dist, ini_dihedral1, dihedral1, ini_dihedral2, dihedral2]) angle1, angle2 = 0, 0 while angle1 < 360: AllChem.SetDihedralDeg(linear_conf, dihedral1[0], dihedral1[1], dihedral1[2], dihedral1[3], angle1) while angle2 < 360: AllChem.SetDihedralDeg(linear_conf, dihedral2[0], dihedral2[1], dihedral2[2], dihedral2[3], angle2) dist = calc_distance(linear_conf, cleaved_atom1, cleaved_atom2) distances.append( [dist, angle1, dihedral1, angle2, dihedral2]) angle2 += self.large_angle_gran angle1 += self.large_angle_gran # reset dihedrals AllChem.SetDihedralDeg(linear_conf, dihedral1[0], dihedral1[1], dihedral1[2], dihedral1[3], ini_dihedral1) AllChem.SetDihedralDeg(linear_conf, dihedral2[0], dihedral2[1], dihedral2[2], dihedral2[3], ini_dihedral2) distances.sort(key=lambda x: x[0]) # starting with the dihedral combinations that minimized the distance between cleaved atoms the most, find # the optimimum angles for dihedrals that contain cleaved atoms and no hydrogens, then for dihedrals that # contain cleaved atoms and hydrogens, until desired number of conformers has been generated for distance in distances: linear_mol_copy = deepcopy(linear_mol) linear_conf = linear_mol_copy.GetConformer(conf_id) # set starting dihedrals AllChem.SetDihedralDeg(linear_conf, distance[2][0], distance[2][1], distance[2][2], distance[2][3], distance[1]) AllChem.SetDihedralDeg(linear_conf, distance[4][0], distance[4][1], distance[4][2], distance[4][3], distance[3]) # if no clashes are detected optimize continue optimization if not is_clashing(linear_mol, conf_id, self.clash_threshold): self.optimize_dihedrals(linear_conf, cleaved_atom1, cleaved_atom2, dihedrals['cleaved']) self.optimize_dihedrals(linear_conf, cleaved_atom1, cleaved_atom2, dihedrals['cleaved_and_Hs']) for ref_conf in range(mast_mol.GetNumConformers()): rms = AllChem.AlignMol(linear_mol_copy, mast_mol, conf_id, ref_conf, maxIters=self.max_iters) if rms < self.min_rmsd: break else: optimized_linear_confs.append(linear_conf) mast_mol.AddConformer(linear_conf, assignId=True) # return when num_confs valid conformers has been obtained if len(optimized_linear_confs) == self.num_confs: break return optimized_linear_confs