示例#1
0
    def testRootedTorsions(self):
        m = Chem.MolFromSmiles('Oc1ccccc1')
        fp1 = rdMD.GetTopologicalTorsionFingerprint(m)
        fp2 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(0, ))
        nz1 = fp1.GetNonzeroElements()
        nz2 = fp2.GetNonzeroElements()
        for k, v in nz2.items():
            self.assertTrue(v <= nz1[k])

        m = Chem.MolFromSmiles('COCC')
        fp1 = rdMD.GetTopologicalTorsionFingerprint(m)
        self.assertEqual(len(fp1.GetNonzeroElements()), 1)
        fp1 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(0, ))
        self.assertEqual(len(fp1.GetNonzeroElements()), 1)
        fp1 = rdMD.GetTopologicalTorsionFingerprint(m, fromAtoms=(1, ))
        self.assertEqual(len(fp1.GetNonzeroElements()), 0)
示例#2
0
def GenerateTopologicalTorsionsFingerprints(Mols):
    """Generate TopologicalTorsions fingerprints."""

    MiscUtil.PrintInfo("\nGenerating TopologicalTorsions %s fingerprints..." %
                       OptionsInfo["SpecifiedFingerprintsType"])

    UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][
        "UseChirality"]
    FPSize = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["FPSize"]
    BitsPerHash = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][
        "BitsPerHash"]

    if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I):
        # Generate ExplicitBitVect fingerprints...
        MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" %
                           (FPSize, BitsPerHash))
        MolsFingerprints = [
            rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
                Mol,
                includeChirality=UseChirality,
                nBits=FPSize,
                nBitsPerEntry=BitsPerHash) for Mol in Mols
        ]
    else:
        # Generate LongSparseIntVect fingerprint...
        MolsFingerprints = [
            rdMolDescriptors.GetTopologicalTorsionFingerprint(
                Mol, includeChirality=UseChirality) for Mol in Mols
        ]

    return MolsFingerprints
示例#3
0
def GenerateTopologicalTorsionsFingerprints(Mols):
    """Generate TopologicalTorsions fingerprints."""

    MiscUtil.PrintInfo("\nGenerating TopologicalTorsions fingerprints...")

    UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"][
        "UseChirality"]

    if OptionsInfo["GenerateBitVectFingerints"]:
        FPSize = 2048
        BitsPerHash = 4
        MolsFingerprints = [
            rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
                Mol,
                includeChirality=UseChirality,
                nBits=FPSize,
                nBitsPerEntry=BitsPerHash) for Mol in Mols
        ]
    else:
        # Generate LongSparseIntVect fingerprint...
        MolsFingerprints = [
            rdMolDescriptors.GetTopologicalTorsionFingerprint(
                Mol, includeChirality=UseChirality) for Mol in Mols
        ]

    return MolsFingerprints
示例#4
0
 def testTorsionValues(self):
   import base64
   testD=(('CCCO',b'AQAAAAgAAAD/////DwAAAAEAAAAAAAAAIECAAAMAAAABAAAA\n'),
          ('CNc1ccco1',b'AQAAAAgAAAD/////DwAAAAkAAAAAAAAAIICkSAEAAAABAAAAKVKgSQEAAAABAAAAKVCgUAEAAAAB\nAAAAKVCgUQEAAAABAAAAKVCkCAIAAAABAAAAKdCkCAIAAAABAAAAKVCgSAMAAAABAAAAKVCkSAMA\nAAABAAAAIICkSAMAAAABAAAA\n'),
          )
   for smi,txt in testD:
     pkl = base64.decodestring(txt)
     fp = rdMD.GetTopologicalTorsionFingerprint(Chem.MolFromSmiles(smi))
     fp2 = DataStructs.LongSparseIntVect(pkl)
     self.assertEqual(DataStructs.DiceSimilarity(fp,fp2),1.0)
     self.assertEqual(fp,fp2)
示例#5
0
  def testAtomPairOptions(self):
    m1 = Chem.MolFromSmiles('c1ccccc1')
    m2 = Chem.MolFromSmiles('c1ccccn1')

    fp1 = rdMD.GetAtomPairFingerprint(m1)
    fp2 = rdMD.GetAtomPairFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetAtomPairFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetAtomPairFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedAtomPairFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetTopologicalTorsionFingerprint(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetTopologicalTorsionFingerprint(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2)
    self.assertNotEqual(fp1,fp2)
    
    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[1]*6)
    self.assertEqual(fp1,fp2)

    fp1 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m1,atomInvariants=[1]*6)
    fp2 = rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m2,atomInvariants=[2]*6)
    self.assertNotEqual(fp1,fp2)
示例#6
0
  def testTopologicalTorsions(self):
    mol = Chem.MolFromSmiles("CC");
    fp = rdMD.GetTopologicalTorsionFingerprint(mol)
    self.assertTrue(fp.GetTotalVal()==0)
    
    mol = Chem.MolFromSmiles("CCCC");
    fp = rdMD.GetTopologicalTorsionFingerprint(mol)
    self.assertTrue(fp.GetTotalVal()==1)
    fp = rdMD.GetTopologicalTorsionFingerprint(mol,3)
    self.assertTrue(fp.GetTotalVal()==2)
    
    mol = Chem.MolFromSmiles("CCCO");
    fp = rdMD.GetTopologicalTorsionFingerprint(mol)
    self.assertTrue(fp.GetTotalVal()==1)
    fp = rdMD.GetTopologicalTorsionFingerprint(mol,3)
    self.assertTrue(fp.GetTotalVal()==2)

    mol = Chem.MolFromSmiles("CCCCCCCCCCC");
    fp = rdMD.GetTopologicalTorsionFingerprint(mol,7)
    self.assertRaises(ValueError,lambda : rdMD.GetTopologicalTorsionFingerprint(mol,8))
示例#7
0
    atomId -- the atom to remove the pairs for (if -1, no pair is removed)
    fpType -- the type of AP fingerprint ('normal', 'hashed', 'bv')
    nBits -- the size of the bit vector (only for fpType='bv')
    minLength -- the minimum path length for an atom pair
    maxLength -- the maxmimum path length for an atom pair
    nBitsPerEntry -- the number of bits available for each pair
  """
  if fpType not in ['normal', 'hashed', 'bv']: raise ValueError("Unknown Atom pairs fingerprint type")
  if atomId < 0:
    return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, 0)
  if atomId >= mol.GetNumAtoms(): raise ValueError("atom index greater than number of atoms")
  return apDict[fpType](mol, nBits, minLength, maxLength, nBitsPerEntry, [atomId])


ttDict = {}
ttDict['normal'] = lambda m, bits, ts, bpe, ia: rdMD.GetTopologicalTorsionFingerprint(m, targetSize=ts, ignoreAtoms=ia)
ttDict['hashed'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprint(m, nBits=bits, targetSize=ts, ignoreAtoms=ia)
ttDict['bv'] = lambda m, bits, ts, bpe, ia: rdMD.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=bits, targetSize=ts, nBitsPerEntry=bpe, ignoreAtoms=ia)

# usage:   lambda m,i: GetTTFingerprint(m, i, fpType, nBits, targetSize)
def GetTTFingerprint(mol, atomId=-1, fpType='normal', nBits=2048, targetSize=4, nBitsPerEntry=4):
  """
  Calculates the topological torsion fingerprint with the pairs of atomId removed.

  Parameters:
    mol -- the molecule of interest
    atomId -- the atom to remove the torsions for (if -1, no torsion is removed)
    fpType -- the type of TT fingerprint ('normal', 'hashed', 'bv')
    nBits -- the size of the bit vector (only for fpType='bv')
    minLength -- the minimum path length for an atom pair
    maxLength -- the maxmimum path length for an atom pair
示例#8
0
def run_comparison(references=None, conformers=None):

    references_file = args.references
    conformers_file = args.conformers

    print(' -- -- -- -- -- LOADING REFERENCES IN {} -- -- -- -- -- '.format(
        references_file))
    print(' -- -- -- -- -- LOADING CONFORMERS IN {} -- -- -- -- -- '.format(
        conformers_file))

    lowest_rmsd = []
    #out_macro_refs		= Chem.SDWriter('Macrocycles_references.sdf')
    out_RMSD = Chem.SDWriter('Aligment_RMSD.sdf')
    #out_macro_confs		= Chem.SDWriter('Macrocycles_conformers.sdf')

    print(' -- -- -- -- -- STARTING ANALYSIS -- -- -- -- -- ')
    ref_index = 1

    for ref in Chem.SDMolSupplier(references_file):

        print(ref_index, ')', ref.GetProp('_Name').split('_')[0])

        macrocycle_atoms = []
        all_cycles = Chem.GetSymmSSSR(ref)
        cycles_size = [i for i in all_cycles if len(i) >= 8]
        for element in cycles_size:
            macrocycle_atoms += list(element)
        all_atoms = [i.GetIdx() for i in ref.GetAtoms()]
        atoms_to_remove = (list(set(all_atoms) - set(macrocycle_atoms)))

        macrocycle = Chem.RWMol(ref)
        for i in sorted(atoms_to_remove, reverse=True):
            macrocycle.RemoveAtom(i)

        m_ref = macrocycle.GetMol()
        m_ref.UpdatePropertyCache()

        macrocycle_atoms = sorted(list(set(macrocycle_atoms)))
        print('Initial Num Atoms:', len(all_atoms))
        print('Macrocycle length:', len(macrocycle_atoms))

        m_ref_smiles = Chem.MolFragmentToSmiles(ref,
                                                macrocycle_atoms,
                                                kekuleSmiles=True)
        m_ref_smiles = Chem.MolFromSmiles(m_ref_smiles, sanitize=False)

        ref_index = ref_index + 1
        mol_index = 0
        table = pd.DataFrame()

        for mol in Chem.SDMolSupplier(conformers_file):
            if ref.GetProp('_Name').split('_')[0] == mol.GetProp(
                    '_Name').split('_')[0]:

                table.loc[mol_index, 'Conformer'] = [mol.GetProp('_Name')]

                ref_atoms = ref.GetSubstructMatch(m_ref_smiles)
                mol_atoms = mol.GetSubstructMatch(m_ref_smiles)
                amap = zip(mol_atoms, ref_atoms)
                rms_macrocycle = AllChem.GetBestRMS(mol, ref, map=[list(amap)])

                mol.SetProp('RMSD_macrocycle', str(rms_macrocycle))
                table.loc[mol_index, 'RMSD_macrocycle'] = [rms_macrocycle]

                macrocycle_atoms = []
                all_cycles = Chem.GetSymmSSSR(mol)
                cycles_size = [i for i in all_cycles if len(i) >= 8]
                for element in cycles_size:
                    macrocycle_atoms += list(element)
                all_atoms = [i.GetIdx() for i in mol.GetAtoms()]
                atoms_to_remove = (
                    list(set(all_atoms) - set(macrocycle_atoms)))

                macrocycle = Chem.RWMol(mol)
                for i in sorted(atoms_to_remove, reverse=True):
                    macrocycle.RemoveAtom(i)

                m_mol = macrocycle.GetMol()
                m_mol.UpdatePropertyCache()

                #m_mol=Chem.MolFragmentToSmiles(mol,macrocycle_atoms,kekuleSmiles=True)
                #m_mol=Chem.MolFromSmiles(m_mol,sanitize=False)

                radious_macro = Descriptors3D.RadiusOfGyration(m_mol)
                table.loc[mol_index, 'RoG_macrocycle'] = radious_macro

                tt_macro = rdMolDescriptors.GetTopologicalTorsionFingerprint(
                    m_mol)
                table.loc[mol_index,
                          'TF_macrocycle'] = [tt_macro.GetTotalVal()]

                r_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_ref)
                r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    m_ref, r_list[0], r_list[1])
                c_list = Chem.TorsionFingerprints.CalculateTorsionLists(m_mol)
                c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    m_mol, c_list[0], c_list[1])

                if len(r_angles) == len(c_angles):
                    torsion_macro = Chem.TorsionFingerprints.CalculateTFD(
                        r_angles, c_angles)
                    table.loc[mol_index, 'TFD_macrocycle'] = [torsion_macro]
                else:
                    table.loc[mol_index, 'TFD_macrocycle'] = ['NA']

                cmd.read_molstr(Chem.MolToMolBlock(ref), 'ref')
                cmd.read_molstr(Chem.MolToMolBlock(mol), 'mol')
                rmsd = cmd.rms_cur('ref', 'mol')
                cmd.deselect()
                cmd.delete('all')

                mol.SetProp('RMSD_heavy_atoms', str(rmsd))
                table.loc[mol_index, 'RMSD_heavy_atoms'] = [rmsd]

                out_RMSD.write(mol)

                radious = Descriptors3D.RadiusOfGyration(mol)
                table.loc[mol_index, 'RoG_heavy_atoms'] = radious

                tt = rdMolDescriptors.GetTopologicalTorsionFingerprint(mol)
                table.loc[mol_index, 'TF_heavy_atoms'] = [tt.GetTotalVal()]

                r_list = Chem.TorsionFingerprints.CalculateTorsionLists(ref)
                r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    ref, r_list[0], r_list[1])
                c_list = Chem.TorsionFingerprints.CalculateTorsionLists(mol)
                c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles(
                    mol, c_list[0], c_list[1])

                if len(r_angles) == len(c_angles):
                    torsion = Chem.TorsionFingerprints.CalculateTFD(
                        r_angles, c_angles)
                    table.loc[mol_index, 'TFD_heavy_atoms'] = [torsion]
                else:
                    table.loc[mol_index, 'TFD_heavy_atoms'] = ['NA']

                mol_index = mol_index + 1

        if len(table.index) > 0:
            sort = table.sort_values('RMSD_macrocycle', ascending=True)
            sort = sort.reset_index(drop=True)
            sort.to_csv(ref.GetProp('_Name') + '.csv')

            sort['Nconf'] = len(sort.index)
            print('Number of conformers analyzed:', len(sort.index))
            print('data in file:', ref.GetProp('_Name') + '.csv')
            print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- \n')

            sort['Span_Rog_macrocycle'] = float(
                max(sort['RoG_macrocycle']) - min(sort['RoG_macrocycle']))
            sort['Span_Rog_heavy_atoms'] = float(
                max(sort['RoG_heavy_atoms']) - min(sort['RoG_heavy_atoms']))

            lowest_rmsd.append(sort.loc[0])
        else:
            print('No reference or conformers found in input files for {}'.
                  format(ref.GetProp('_Name')))
            print(' ************************************ \n')

    #out_macro_refs.close()
    out_RMSD.close()
    #out_macro_confs.close()

    print('SAVING DATA OF LOWEST RMSD OF CONFORMERS')

    summary = pd.DataFrame(lowest_rmsd)
    summary = summary.reset_index(drop=True)
    summary.to_csv('Lowest_RMSD_Data.csv')

    print('Lowest RMSD Data in file: Lowest_RMSD_Data.csv')
    print('***************************************************\n')
    print('Structures in files: Alignment_RMSD.sdf')
    print('***************************************************\n')
    print(
        'CALCULATION OF {} OUT OF {} REFERENCES DONE, FILES SAVED. THANK YOU FOR USING THIS SCRIPT \n'
        .format(len(summary.index), len(Chem.SDMolSupplier(references_file))))