def testRootedTorsions(self): m = Chem.MolFromSmiles('Oc1ccccc1') fp1 = rdMD.GetTopologicalTorsionFingerprint(m) fp2 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(0, )) nz1 = fp1.GetNonzeroElements() nz2 = fp2.GetNonzeroElements() for k, v in nz2.items(): self.assertTrue(v <= nz1[k]) m = Chem.MolFromSmiles('COCC') fp1 = rdMD.GetTopologicalTorsionFingerprint(m) self.assertEqual(len(fp1.GetNonzeroElements()), 1) fp1 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(0, )) self.assertEqual(len(fp1.GetNonzeroElements()), 1) fp1 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(1, )) self.assertEqual(len(fp1.GetNonzeroElements()), 0)
def GenerateTopologicalTorsionsFingerprints(Mols): """Generate TopologicalTorsions fingerprints.""" MiscUtil.PrintInfo("\nGenerating TopologicalTorsions %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "UseChirality"] FPSize = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["FPSize"] BitsPerHash = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "BitsPerHash"] if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): # Generate ExplicitBitVect fingerprints... MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) MolsFingerprints = [ rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( Mol, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate LongSparseIntVect fingerprint... MolsFingerprints = [ rdMolDescriptors.GetTopologicalTorsionFingerprint( Mol, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def GenerateTopologicalTorsionsFingerprints(Mols): """Generate TopologicalTorsions fingerprints.""" MiscUtil.PrintInfo("\nGenerating TopologicalTorsions fingerprints...") UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][ "UseChirality"] if OptionsInfo["GenerateBitVectFingerints"]: FPSize = 2048 BitsPerHash = 4 MolsFingerprints = [ rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( Mol, includeChirality=UseChirality, nBits=FPSize, nBitsPerEntry=BitsPerHash) for Mol in Mols ] else: # Generate LongSparseIntVect fingerprint... MolsFingerprints = [ rdMolDescriptors.GetTopologicalTorsionFingerprint( Mol, includeChirality=UseChirality) for Mol in Mols ] return MolsFingerprints
def testTorsionValues(self): import base64 testD=(('CCCO',b'AQAAAAgAAAD/////DwAAAAEAAAAAAAAAIECAAAMAAAABAAAA\n'), ('CNc1ccco1',b'AQAAAAgAAAD/////DwAAAAkAAAAAAAAAIICkSAEAAAABAAAAKVKgSQEAAAABAAAAKVCgUAEAAAAB\nAAAAKVCgUQEAAAABAAAAKVCkCAIAAAABAAAAKdCkCAIAAAABAAAAKVCgSAMAAAABAAAAKVCkSAMA\nAAABAAAAIICkSAMAAAABAAAA\n'), ) for smi,txt in testD: pkl = base64.decodestring(txt) fp = rdMD.GetTopologicalTorsionFingerprint(Chem.MolFromSmiles(smi)) fp2 = DataStructs.LongSparseIntVect(pkl) self.assertEqual(DataStructs.DiceSimilarity(fp,fp2),1.0) self.assertEqual(fp,fp2)
def testAtomPairOptions(self): m1 = Chem.MolFromSmiles('c1ccccc1') m2 = Chem.MolFromSmiles('c1ccccn1') fp1 = rdMD.GetAtomPairFingerprint(m1) fp2 = rdMD.GetAtomPairFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6) fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2) self.assertNotEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6) self.assertEqual(fp1,fp2) fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6) fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6) self.assertNotEqual(fp1,fp2)
def testTopologicalTorsions(self): mol = Chem.MolFromSmiles("CC"); fp = rdMD.GetTopologicalTorsionFingerprint(mol) self.assertTrue(fp.GetTotalVal()==0) mol = Chem.MolFromSmiles("CCCC"); fp = rdMD.GetTopologicalTorsionFingerprint(mol) self.assertTrue(fp.GetTotalVal()==1) fp = rdMD.GetTopologicalTorsionFingerprint(mol,3) self.assertTrue(fp.GetTotalVal()==2) mol = Chem.MolFromSmiles("CCCO"); fp = rdMD.GetTopologicalTorsionFingerprint(mol) self.assertTrue(fp.GetTotalVal()==1) fp = rdMD.GetTopologicalTorsionFingerprint(mol,3) self.assertTrue(fp.GetTotalVal()==2) mol = Chem.MolFromSmiles("CCCCCCCCCCC"); fp = rdMD.GetTopologicalTorsionFingerprint(mol,7) self.assertRaises(ValueError,lambda : rdMD.GetTopologicalTorsionFingerprint(mol,8))
atomId -- the atom to remove the pairs for (if -1, no pair is removed) fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair nBitsPerEntry -- the number of bits available for each pair """ if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type") if atomId < 0: return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0) if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms") return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId]) ttDict = {} ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia) ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia) ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia) # usage: lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize) def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4): """ Calculates the topological torsion fingerprint with the pairs of atomId removed. Parameters: mol -- the molecule of interest atomId -- the atom to remove the torsions for (if -1, no torsion is removed) fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv') nBits -- the size of the bit vector (only for fpType='bv') minLength -- the minimum path length for an atom pair maxLength -- the maxmimum path length for an atom pair
def run_comparison(references=None, conformers=None): references_file = args.references conformers_file = args.conformers print(' -- -- -- -- -- LOADING REFERENCES IN {} -- -- -- -- -- '.format( references_file)) print(' -- -- -- -- -- LOADING CONFORMERS IN {} -- -- -- -- -- '.format( conformers_file)) lowest_rmsd = [] #out_macro_refs = Chem.SDWriter('Macrocycles_references.sdf') out_RMSD = Chem.SDWriter('Aligment_RMSD.sdf') #out_macro_confs = Chem.SDWriter('Macrocycles_conformers.sdf') print(' -- -- -- -- -- STARTING ANALYSIS -- -- -- -- -- ') ref_index = 1 for ref in Chem.SDMolSupplier(references_file): print(ref_index, ')', ref.GetProp('_Name').split('_')[0]) macrocycle_atoms = [] all_cycles = Chem.GetSymmSSSR(ref) cycles_size = [i for i in all_cycles if len(i) >= 8] for element in cycles_size: macrocycle_atoms += list(element) all_atoms = [i.GetIdx() for i in ref.GetAtoms()] atoms_to_remove = (list(set(all_atoms) - set(macrocycle_atoms))) macrocycle = Chem.RWMol(ref) for i in sorted(atoms_to_remove, reverse=True): macrocycle.RemoveAtom(i) m_ref = macrocycle.GetMol() m_ref.UpdatePropertyCache() macrocycle_atoms = sorted(list(set(macrocycle_atoms))) print('Initial Num Atoms:', len(all_atoms)) print('Macrocycle length:', len(macrocycle_atoms)) m_ref_smiles = Chem.MolFragmentToSmiles(ref, macrocycle_atoms, kekuleSmiles=True) m_ref_smiles = Chem.MolFromSmiles(m_ref_smiles, sanitize=False) ref_index = ref_index + 1 mol_index = 0 table = pd.DataFrame() for mol in Chem.SDMolSupplier(conformers_file): if ref.GetProp('_Name').split('_')[0] == mol.GetProp( '_Name').split('_')[0]: table.loc[mol_index, 'Conformer'] = [mol.GetProp('_Name')] ref_atoms = ref.GetSubstructMatch(m_ref_smiles) mol_atoms = mol.GetSubstructMatch(m_ref_smiles) amap = zip(mol_atoms, ref_atoms) rms_macrocycle = AllChem.GetBestRMS(mol, ref, map=[list(amap)]) mol.SetProp('RMSD_macrocycle', str(rms_macrocycle)) table.loc[mol_index, 'RMSD_macrocycle'] = [rms_macrocycle] macrocycle_atoms = [] all_cycles = Chem.GetSymmSSSR(mol) cycles_size = [i for i in all_cycles if len(i) >= 8] for element in cycles_size: macrocycle_atoms += list(element) all_atoms = [i.GetIdx() for i in mol.GetAtoms()] atoms_to_remove = ( list(set(all_atoms) - set(macrocycle_atoms))) macrocycle = Chem.RWMol(mol) for i in sorted(atoms_to_remove, reverse=True): macrocycle.RemoveAtom(i) m_mol = macrocycle.GetMol() m_mol.UpdatePropertyCache() #m_mol=Chem.MolFragmentToSmiles(mol,macrocycle_atoms,kekuleSmiles=True) #m_mol=Chem.MolFromSmiles(m_mol,sanitize=False) radious_macro = Descriptors3D.RadiusOfGyration(m_mol) table.loc[mol_index, 'RoG_macrocycle'] = radious_macro tt_macro = rdMolDescriptors.GetTopologicalTorsionFingerprint( m_mol) table.loc[mol_index, 'TF_macrocycle'] = [tt_macro.GetTotalVal()] r_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_ref) r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( m_ref, r_list[0], r_list[1]) c_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_mol) c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( m_mol, c_list[0], c_list[1]) if len(r_angles) == len(c_angles): torsion_macro = Chem.TorsionFingerprints.CalculateTFD( r_angles, c_angles) table.loc[mol_index, 'TFD_macrocycle'] = [torsion_macro] else: table.loc[mol_index, 'TFD_macrocycle'] = ['NA'] cmd.read_molstr(Chem.MolToMolBlock(ref), 'ref') cmd.read_molstr(Chem.MolToMolBlock(mol), 'mol') rmsd = cmd.rms_cur('ref', 'mol') cmd.deselect() cmd.delete('all') mol.SetProp('RMSD_heavy_atoms', str(rmsd)) table.loc[mol_index, 'RMSD_heavy_atoms'] = [rmsd] out_RMSD.write(mol) radious = Descriptors3D.RadiusOfGyration(mol) table.loc[mol_index, 'RoG_heavy_atoms'] = radious tt = rdMolDescriptors.GetTopologicalTorsionFingerprint(mol) table.loc[mol_index, 'TF_heavy_atoms'] = [tt.GetTotalVal()] r_list = Chem.TorsionFingerprints.CalculateTorsionLists(ref) r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( ref, r_list[0], r_list[1]) c_list = Chem.TorsionFingerprints.CalculateTorsionLists(mol) c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( mol, c_list[0], c_list[1]) if len(r_angles) == len(c_angles): torsion = Chem.TorsionFingerprints.CalculateTFD( r_angles, c_angles) table.loc[mol_index, 'TFD_heavy_atoms'] = [torsion] else: table.loc[mol_index, 'TFD_heavy_atoms'] = ['NA'] mol_index = mol_index + 1 if len(table.index) > 0: sort = table.sort_values('RMSD_macrocycle', ascending=True) sort = sort.reset_index(drop=True) sort.to_csv(ref.GetProp('_Name') + '.csv') sort['Nconf'] = len(sort.index) print('Number of conformers analyzed:', len(sort.index)) print('data in file:', ref.GetProp('_Name') + '.csv') print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- \n') sort['Span_Rog_macrocycle'] = float( max(sort['RoG_macrocycle']) - min(sort['RoG_macrocycle'])) sort['Span_Rog_heavy_atoms'] = float( max(sort['RoG_heavy_atoms']) - min(sort['RoG_heavy_atoms'])) lowest_rmsd.append(sort.loc[0]) else: print('No reference or conformers found in input files for {}'. format(ref.GetProp('_Name'))) print(' ************************************ \n') #out_macro_refs.close() out_RMSD.close() #out_macro_confs.close() print('SAVING DATA OF LOWEST RMSD OF CONFORMERS') summary = pd.DataFrame(lowest_rmsd) summary = summary.reset_index(drop=True) summary.to_csv('Lowest_RMSD_Data.csv') print('Lowest RMSD Data in file: Lowest_RMSD_Data.csv') print('***************************************************\n') print('Structures in files: Alignment_RMSD.sdf') print('***************************************************\n') print( 'CALCULATION OF {} OUT OF {} REFERENCES DONE, FILES SAVED. THANK YOU FOR USING THIS SCRIPT \n' .format(len(summary.index), len(Chem.SDMolSupplier(references_file))))