Пример #1
0
def TFD_for_oemols(ref_mol, query_mol):
    """
    This is the TFD_for_oemols script. 
    It makes use of RDKit's TFD calculation and the function rdmol_from_oemol.
    TFD_for_oemols takes in two OEMOLs. 
    It does not matter which mol is the ref mol and which is the querymol. 
    TFD metric is the same no matter which is the ref and which is the query.
    First, OEmols are made RDKit compatible. Then,
    TFD is computed and returned using RDKit's TorsionFingerprints
    Module. Takes one input reference mol2 and one input query mol2.
    
    Args: 
        ref_mol (oemol) An oemol that has already been read in. 
        query_mol (oemol) An oemol that has already been read in. 
    
    Returns: 
        tfd (float) The torsion fingerprint deviation between ref and query.
    """
    # converts refmol to one readable by RDKit
    rrdmol2 = rdmol_from_oemol(ref_mol)
    # converts querymol to one readable by RDKit
    qrdmol2 = rdmol_from_oemol(query_mol)
    # If there was a mistake in the conversion process, return -1
    if (Chem.MolToSmiles(qrdmol2) != Chem.MolToSmiles(rrdmol2)):
        tfd = -1
    else:
        # calculates the TFD
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(rrdmol2, qrdmol2)
        except IndexError:
            tfd = 0
    return tfd
Пример #2
0
  def testTorsionFingerprints(self):
    # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD
    refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb')
    ref = Chem.MolFromSmiles(
      'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1')
    mol = Chem.MolFromPDBFile(refFile)
    mol = AllChem.AssignBondOrdersFromTemplate(ref, mol)

    # the torsion lists
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol)
    self.assertEqual(len(tors_list), 11)
    self.assertEqual(len(tors_list_rings), 4)
    self.assertAlmostEqual(tors_list[-1][1], 180.0, 4)
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec')
    self.assertAlmostEqual(tors_list[-1][1], 90.0, 4)
    self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test')
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0)
    self.assertEqual(len(tors_list[0][0]), 2)

    # the weights
    weights = TorsionFingerprints.CalculateTorsionWeights(mol)
    self.assertAlmostEqual(weights[4], 1.0)
    self.assertEqual(len(weights), len(tors_list + tors_list_rings))
    weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14)
    self.assertAlmostEqual(weights[3], 1.0)
    self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3)

    # the torsion angles
    tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol)
    torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings)
    self.assertEqual(len(weights), len(torsions))
    self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4)

    # the torsion fingerprint deviation
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions)
    self.assertAlmostEqual(tfd, 0.0)
    refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb')
    mol2 = Chem.MolFromPDBFile(refFile)
    mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2)
    torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings)
    weights = TorsionFingerprints.CalculateTorsionWeights(mol)
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights)
    self.assertAlmostEqual(tfd, 0.0691, 4)
    tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2)
    self.assertAlmostEqual(tfd, 0.1115, 4)

    # the wrapper functions
    tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2)
    self.assertAlmostEqual(tfd, 0.0691, 4)

    mol.AddConformer(mol2.GetConformer(), assignId=True)
    mol.AddConformer(mol2.GetConformer(), assignId=True)
    tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2])
    self.assertEqual(len(tfd), 2)
    self.assertAlmostEqual(tfd[0], 0.0691, 4)

    tfdmat = TorsionFingerprints.GetTFDMatrix(mol)
    self.assertEqual(len(tfdmat), 3)
Пример #3
0
  def testTorsionFingerprintsAtomReordering(self):
    # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD
    refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand.pdb')
    ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1')
    mol1 = Chem.MolFromPDBFile(refFile)
    mol1 = AllChem.AssignBondOrdersFromTemplate(ref, mol1)

    refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand_reordered.pdb')
    mol2 = Chem.MolFromPDBFile(refFile)
    mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2)

    tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol1, mol2)
    self.assertEqual(tfd, 0.0)
Пример #4
0
def calc_tfd(ref_mol, query_mol):
    """
    Calculate Torsion Fingerprint Deviation between two molecular structures.
    RDKit is required for TFD calculation.

    References
    ----------
    Modified from the following code:
    https://github.com/MobleyLab/benchmarkff/03_analysis/compare_ffs.py

    TFD reference:
    https://pubs.acs.org/doi/10.1021/ci2002318

    Parameters
    ----------
    ref_mol : RDKit RDMol
    query_mol : RDKit RDMol

    Returns
    -------
    tfd : float
        Torsion Fingerprint Deviation between ref and query molecules

    """
    # check if the molecules are the same
    # tfd requires the two molecules must be instances of the same molecule
    rsmiles = Chem.MolToSmiles(ref_mol)
    qsmiles = Chem.MolToSmiles(query_mol)
    if rsmiles != qsmiles:
        print(f"- WARNING: The reference mol {ref_mol.GetProp('_Name')} and "
              f"query mol {query_mol.GetProp('_Name')} do NOT have the same "
              f"SMILES strings as determined by RDKit MolToSmiles. "
              f"\n {rsmiles}\n {qsmiles}")
        tfd = np.nan

    # calculate the TFD
    else:
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(
                ref_mol, query_mol)
        # triggered for molecules such as urea
        except IndexError:
            print(
                f"- Error calculating TFD on molecule {ref_mol.GetProp('_Name')}."
                " Possibly no non-terminal rotatable bonds found.")
            tfd = np.nan

    return tfd
Пример #5
0
def get_tfd(source_1, source_2, file_in, seed):
    
    confab_tfd_uniform = []
    confab_tfd_EI = []
    confab_tfd_LCB = []    
    
    bo_tfd_confab = []
    bo_tfd_uniform = []
    bo_tfd_EI = []
    bo_tfd_LCB = []

    bo_check_EI = []
    bo_check_LCB = []
    confab_check_EI = []
    confab_check_LCB = []

    bo_target = []
    confab_target = []

    for i in range(len(file_in)):
        print(file_in.iloc[i,0])
        if file_in.iloc[i,2] == "Yes":
            if "200" in str(file_in.iloc[i,1]):         
                basenames = file_in.iloc[i,0] + '/' + file_in.iloc[i,1] + '.sdf'
                inputs = os.path.join(source_2, basenames)
                ref_mol = Chem.SDMolSupplier(inputs)

            else:
                basenames = file_in.iloc[i,0] +'/' + file_in.iloc[i,1] + '.sdf'
                inputs  = os.path.join(source_1, basenames)
                ref_mol = Chem.SDMolSupplier(inputs)     

            bo_target.append(file_in.iloc[i,0])

            # read EI_bayes
            EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) 
            EI_input = os.path.join(source_1, EI_bases)
            EI_mol = Chem.SDMolSupplier(EI_input)

            # read LCB_bayes
            LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) 
            LCB_input = os.path.join(source_1, LCB_bases)
            LCB_mol = Chem.SDMolSupplier(LCB_input)

            # read uniform
            uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) 
            uniform_input = os.path.join(source_1, uniform_bases)
            uniform_mol = Chem.SDMolSupplier(uniform_input)

            # read confab
            confab_bases = file_in.iloc[i,0] + "/confab.sdf" 
            confab_input = os.path.join(source_1, confab_bases)
            confab_mol = Chem.SDMolSupplier(confab_input)

            
            bo_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0]))
            bo_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0]))
            bo_tfd_confab.append(TFP.GetTFDBetweenMolecules(confab_mol[0], ref_mol[0]))
            bo_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0]))
            
	
        else:
            basenames = file_in.iloc[i,0] + '/confab.sdf'
            inputs = os.path.join(source_1, basenames)
            ref_mol = Chem.SDMolSupplier(inputs)
            confab_target.append(file_in.iloc[i,0])

            # read EI_bayes
            EI_bases = file_in.iloc[i,0] + "/EI_bayes_{}.sdf".format(seed) 
            EI_input = os.path.join(source_1, EI_bases)
            EI_mol = Chem.SDMolSupplier(EI_input)

            # read LCB_bayes
            LCB_bases = file_in.iloc[i,0] + "/LCB_bayes_{}.sdf".format(seed) 
            LCB_input = os.path.join(source_1, LCB_bases)
            LCB_mol = Chem.SDMolSupplier(LCB_input)

            # read uniform
            uniform_bases = file_in.iloc[i,0] + "/uniform_{}.sdf".format(seed) 
            uniform_input = os.path.join(source_1, uniform_bases)
            uniform_mol = Chem.SDMolSupplier(uniform_input)


            confab_tfd_EI.append(TFP.GetTFDBetweenMolecules(EI_mol[0], ref_mol[0]))
            confab_tfd_LCB.append(TFP.GetTFDBetweenMolecules(LCB_mol[0], ref_mol[0]))
            confab_tfd_uniform.append(TFP.GetTFDBetweenMolecules(uniform_mol[0], ref_mol[0]))

    bo_data = pd.DataFrame({"target": bo_target, "Uniform": bo_tfd_uniform, "EI": bo_tfd_EI, "LCB": bo_tfd_LCB, "Confab": bo_tfd_confab, "N_rot": 5}, columns = ["target", "Uniform", "EI", "LCB", "Confab", "N_rot"])
    confab_data = pd.DataFrame({'target': confab_target, "Uniform": confab_tfd_uniform, "EI": confab_tfd_EI, "LCB": confab_tfd_LCB, "N_rot":5}, columns = ["target","Uniform","EI","LCB", "N_rot"])   
    return  confab_data, bo_data
Пример #6
0
def calc_tfd(ref_mol, query_mol, conf_id_tag):
    """
    Calculate Torsion Fingerprint Deviation between two molecular structures.
    RDKit is required for TFD calculation.

    References
    ----------
    Modified from the following code:
    https://github.com/MobleyLab/off-ffcompare

    TFD reference:
    https://pubs.acs.org/doi/10.1021/ci2002318

    Parameters
    ----------
    ref_mol : OEMol
    query_mol : OEMol
    conf_id_tag : string
        label of the SD tag that should be the same for matching conformers
        in different files

    Returns
    -------
    tfd : float
        Torsion Fingerprint Deviation between ref and query molecules

    """
    # convert refmol to one readable by RDKit
    ref_rdmol = reader.rdmol_from_oemol(ref_mol)

    # convert querymol to one readable by RDKit
    que_rdmol = reader.rdmol_from_oemol(query_mol)

    # check if the molecules are the same
    # tfd requires the two molecules must be instances of the same molecule
    rsmiles = Chem.MolToSmiles(ref_rdmol)
    qsmiles = Chem.MolToSmiles(que_rdmol)
    if rsmiles != qsmiles:
        print(f"- WARNING: The reference mol \'{ref_mol.GetTitle()}\' and "
              f"query mol \'{query_mol.GetTitle()}\' do NOT have the same "
              "SMILES strings as determined by RDKit MolToSmiles. It is "
              "possible that they did not have matching SMILES even before "
              "conversion from OEMol to RDKit mol. Listing in order the "
              "QCArchive SMILES string, RDKit SMILES for ref mol, and "
              "RDKit SMILES for query mol:"
              f"\n {oechem.OEGetSDData(ref_mol, conf_id_tag)}"
              f"\n {rsmiles}\n {qsmiles}")
        tfd = np.nan

    # calculate the TFD
    else:
        try:
            tfd = TorsionFingerprints.GetTFDBetweenMolecules(
                ref_rdmol, que_rdmol)
        # triggered for molecules such as urea
        except IndexError:
            print(
                f"- Error calculating TFD on molecule '{ref_mol.GetTitle()}'."
                " Possibly no non-terminal rotatable bonds found.")
            tfd = np.nan

    return tfd
Пример #7
0
    if pred is None:  # in case of failure
        entry2RMSD[refEntry] = ''
        entry2TFD[refEntry] = ''
        continue

    predEntry = pred.GetProp('_Name')
    assert(refEntry == predEntry)
    try:
        rmsd = AllChem.GetBestRMS(ref, pred)
    except:
        rmsd = ''
    try:
        m = Chem.MolFromSmiles(Chem.MolToSmiles(ref))
        ref = AllChem.AssignBondOrdersFromTemplate(m, ref)
        pred = AllChem.AssignBondOrdersFromTemplate(m, pred)
        tfd = TorsionFingerprints.GetTFDBetweenMolecules(ref, pred)
    except:
        tfd = ''
    entry2RMSD[refEntry] = rmsd
    entry2TFD[refEntry] = tfd

# See https://baoilleach.blogspot.com/2010/11/automorphisms-isomorphisms-symmetry.html
print("Entry,SMILES,RMSD,Bond error,Angle error,Torsion error,TFD,Stereo correct")
for ref, pred in zip(pybel.readfile("sdf", refFileName),
                     pybel.readfile("sdf", predFileName)):
    refMol = ref.OBMol
    predMol = pred.OBMol

    refEntry = refMol.GetTitle()
    predEntry = predMol.GetTitle()
    assert refEntry == predEntry