def butina_clustering_m(rdkit_mol, difference_matrix='tfd', threshold=0.001): """ Clustering conformers with RDKit's Butina algorithem """ # calculate difference matrix if difference_matrix.lower() == 'tfd': diffmat = TorsionFingerprints.GetTFDMatrix(rdkit_mol) if difference_matrix.lower() == 'rms': diffmat = AllChem.GetConformerRMSMatrix(rdkit_mol, prealigned=False) # cluster conformers num_confs = rdkit_mol.GetNumConformers() clt = Butina.ClusterData(diffmat, num_confs, threshold, isDistData=True, reordering=True) # new conformers centroid_idx = [c[0] for c in clt] # centroid indexes. new_rdkit_mol = copy.deepcopy(rdkit_mol) new_rdkit_mol.RemoveAllConformers() for idx in centroid_idx: centroid_conf = rdkit_mol.GetConformer(idx) new_rdkit_mol.AddConformer(centroid_conf, assignId=True) del rdkit_mol # delete old mol, is this nessesary? return new_rdkit_mol
def testTorsionFingerprints(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb') ref = Chem.MolFromSmiles( 'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol = Chem.MolFromPDBFile(refFile) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) # the torsion lists tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) self.assertEqual(len(tors_list), 11) self.assertEqual(len(tors_list_rings), 4) self.assertAlmostEqual(tors_list[-1][1], 180.0, 4) tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec') self.assertAlmostEqual(tors_list[-1][1], 90.0, 4) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0) self.assertEqual(len(tors_list[0][0]), 2) # the weights weights = TorsionFingerprints.CalculateTorsionWeights(mol) self.assertAlmostEqual(weights[4], 1.0) self.assertEqual(len(weights), len(tors_list + tors_list_rings)) weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14) self.assertAlmostEqual(weights[3], 1.0) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3) # the torsion angles tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings) self.assertEqual(len(weights), len(torsions)) self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4) # the torsion fingerprint deviation tfd = TorsionFingerprints.CalculateTFD(torsions, torsions) self.assertAlmostEqual(tfd, 0.0) refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings) weights = TorsionFingerprints.CalculateTorsionWeights(mol) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights) self.assertAlmostEqual(tfd, 0.0691, 4) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2) self.assertAlmostEqual(tfd, 0.1115, 4) # the wrapper functions tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2) self.assertAlmostEqual(tfd, 0.0691, 4) mol.AddConformer(mol2.GetConformer(), assignId=True) mol.AddConformer(mol2.GetConformer(), assignId=True) tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2]) self.assertEqual(len(tfd), 2) self.assertAlmostEqual(tfd[0], 0.0691, 4) tfdmat = TorsionFingerprints.GetTFDMatrix(mol) self.assertEqual(len(tfdmat), 3)
def tfd_matrix(mol: Chem.Mol) -> np.array: """Calculates the TFD matrix for all conformers in a molecule. """ tfd = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False) n = int(np.sqrt(len(tfd) * 2)) + 1 idx = np.tril_indices(n, k=-1, m=n) matrix = np.zeros((n, n)) matrix[idx] = tfd matrix += np.transpose(matrix) return matrix
def cluster_conformers(mol, mode="RMSD", threshold=2.0): if mode == "TFD": dmat = TorsionFingerprints.GetTFDMatrix(mol) else: dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=False) rms_clusters = Butina.ClusterData(dmat, mol.GetNumConformers(), threshold, isDistData=True, reordering=True) return rms_clusters
def rdkit_tfd(mol): kernel = TorsionFingerprints.GetTFDMatrix(mol) return kernel
for mol in mols: if mol != None: mol = Chem.AddHs(mol) conf = AllChem.EmbedMultipleConfs(mol, numConfs=int(N), pruneRmsThresh=float(RMS), useExpTorsionAnglePrefs=True, useBasicKnowledge=True, numThreads=int(nbthread)) if len(conf) > 0: Chem.rdMolAlign.AlignMolConformers(mol) AllChem.UFFOptimizeMoleculeConfs(mol, numThreads=int(nbthread)) ## Here new code to discard identical conformers around an axis of symmetry (not supported by pruneRmsThresh in the previous fct) matrix = TorsionFingerprints.GetTFDMatrix(mol, useWeights=False, maxDev='equal', symmRadius=2, ignoreColinearBonds=True) conf_clusters = Butina.ClusterData(matrix, len(conf), cutoff, True) confnb = 1 for cluster in conf_clusters: writer = Chem.SDWriter(output_folder + "/" + mol.GetProp("_Name") + "_conf_" + str(confnb) + ".sdf") writer.write(mol, confId=cluster[0]) # output only centroid writer.close() confnb += 1 else: # not able to make conformers print("Could not generate any conformers for %s" % (mol.GetProp("_Name")))