def prepare(self, mol, ionize=None, align=None, add_hydrogens=None): """ Prepare a molecule for featurization. Default values for individual steps can be overriden with keyword arguments. For example, to disable ionization for a specific molecule, include ionize=False. Parameters ---------- mol : RDMol Molecule. ionize : bool, optional (default None) Override for self.ionize. align : bool, optional (default None) Override for self.align. add_hydrogens : bool, optional (default None) Override for self.add_hydrogens. """ if ionize is None: ionize = self.ionize if align is None: align = self.align if add_hydrogens is None: add_hydrogens = self.add_hydrogens mol = Chem.Mol(mol) # create a copy # ionization if ionize: mol = self.ionizer(mol) # orientation if align: # canonicalization can fail when hydrogens are present mol = Chem.RemoveHs(mol) center = rdGeometry.Point3D(0, 0, 0) for conf in mol.GetConformers(): rdMolTransforms.CanonicalizeConformer(conf, center=center) # hydrogens if add_hydrogens: mol = Chem.AddHs(mol, addCoords=True) return mol
def test1Canonicalization(self): mol = Chem.MolFromSmiles("C") conf = Chem.Conformer(1) conf.SetAtomPosition(0, (4.0, 5.0, 6.0)) mol.AddConformer(conf, 1) conf = mol.GetConformer() pt = rdmt.ComputeCentroid(conf) self.failUnless(ptEq(pt, geom.Point3D(4.0, 5.0, 6.0))) fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', '1oir.mol') m = Chem.MolFromMolFile(fileN) cpt = rdmt.ComputeCentroid(m.GetConformer()) trans = rdmt.ComputeCanonicalTransform(m.GetConformer(), cpt) trans2 = rdmt.ComputeCanonicalTransform(m.GetConformer()) for i in range(4): for j in range(4): self.failUnless(feq(trans[i, j], trans2[i, j])) rdmt.TransformConformer(m.GetConformer(), trans2) m2 = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeConformer(m2.GetConformer()) nats = m.GetNumAtoms() cnf1 = m.GetConformer() cnf2 = m2.GetConformer() for i in range(nats): p1 = list(cnf1.GetAtomPosition(i)) p2 = list(cnf2.GetAtomPosition(i)) self.failUnless(feq(p1[0], p2[0])) self.failUnless(feq(p1[1], p2[1])) self.failUnless(feq(p1[2], p2[2])) m3 = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeMol(m3) cnf1 = m.GetConformer() cnf2 = m3.GetConformer() for i in range(nats): p1 = list(cnf1.GetAtomPosition(i)) p2 = list(cnf2.GetAtomPosition(i)) self.failUnless(feq(p1[0], p2[0])) self.failUnless(feq(p1[1], p2[1])) self.failUnless(feq(p1[2], p2[2]))
def set_dehedral_angles(m, theta=120.0, rotate_general=True, rotate_ol=True, rotate_ine=True): """ Systematic rotation of dihedral angles theta degrees Taken from Mads """ rotate_idx_list = list() if rotate_general: smart = "[!#1]~[!$(*#*)&!D1]-!@[!$(*#*)&!D1]~[!#1]" rotate_idx_list += m.GetSubstructMatches(Chem.MolFromSmarts(smart)) if rotate_ol: smart = "[*]~[*]-[O,S]-[#1]" rotate_idx_list += m.GetSubstructMatches(Chem.MolFromSmarts(smart)) if rotate_ine: smart = "[*]~[*]-[NX3;H2]-[#1]" rotate_idx_list += m.GetSubstructMatches(Chem.MolFromSmarts(smart)) # Find unique bonds and dihedral angles indexes idx_bonds = list() idx_dihedral = list() atoms = m.GetAtoms() for k, i, j, l in rotate_idx_list: if (i,j) in idx_bonds: continue idx_bonds.append((i,j)) idx_dihedral.append((k,i,j,l)) print("found", k,i,j,l) print(atoms[k].GetAtomicNum()) print(atoms[i].GetAtomicNum()) print(atoms[j].GetAtomicNum()) print(atoms[l].GetAtomicNum()) # find dihedrals of parent molecule and create all combinations # where the angles are rotated theta degrees. parent = m.GetConformer() # List of alle moveable angles dihedrals = list() for k, i, j, l in idx_dihedral: parent_dihedral = rdMolTransforms.GetDihedralDeg(parent, k, i, j, l) new_dihedrals = [ x*theta for x in range(int(360./theta))] print(new_dihedrals) dihedrals.append(new_dihedrals) # make all possible combinations of dihedral angles dihedral_combinations = list(itertools.product(*dihedrals)) # Create the conformations according to angle combinations for dihedrals in dihedral_combinations: for (k,i,j,l), angle in zip(idx_dihedral, dihedrals): print(k,i,j,l, angle) rdMolTransforms.SetDihedralDeg(parent, k, i, j, l, angle) # translate mol to centroid rdMolTransforms.CanonicalizeConformer(parent) m.AddConformer(parent, assignId=True) return m
def __call__(self, *args, **kwargs): input = set([k for k in kwargs]).intersection(self.input_formats) assert len(input) == 1 typename = list(input)[0] data = kwargs[typename] molecule = self.loaders[typename](data.data) molecule = Chem.AddHs(molecule) confIds = AllChem.EmbedMultipleConfs(molecule, self.n * 4, maxAttempts=10000, pruneRmsThresh=self.pre_rmsThresh) for conf in confIds: AllChem.UFFOptimizeMolecule(molecule, maxIters=5000, confId=conf) energies = [] for conf in confIds: energies.append( (conf, self._rdkit_calc_energy_uff(molecule, conf, 0))) conformers_ids = list( map(lambda x: x[0], sorted(list(energies), key=operator.itemgetter(1)))) before = len(conformers_ids) # Conformers we have in total if before < self.n: selected = conformers_ids else: c_keep = [conformers_ids[0]] conformers_ids = conformers_ids[1:] for conf in conformers_ids: good = True for conf_in_c_keep in c_keep: rms = AllChem.GetConformerRMS(molecule, conf, conf_in_c_keep, prealigned=False) if rms < self.rmsThresh: good = False break if good: c_keep.append(conf) selected = c_keep if len(selected) >= self.n: selected = random.sample(selected, self.n) list( map(lambda i: rdmt.CanonicalizeConformer(molecule.GetConformer(i)), selected)) energies = [] for conf in selected: energies.append( (conf, self._rdkit_calc_energy_uff(molecule, conf, 0))) selected = list(sorted(list(energies), key=operator.itemgetter(1))) mol_conformers = map( lambda s: ( Chem.MolFromPDBBlock(Chem.MolToPDBBlock(molecule, confId=s[0]), sanitize=False, removeHs=False), s[1], ), selected) pdbs = [(PDB(rdkit2amberpdb(Chem.MolToPDBBlock(mol_conformer)), uff_energy=energy)) for mol_conformer, energy in mol_conformers] kwargs.update({"pdbs": PDBS(pdbs=pdbs)}) return kwargs