示例#1
0
    def test8MultiThreadMultiConf(self):
        mol = Chem.AddHs(
            Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC"))
        cids = rdDistGeom.EmbedMultipleConfs(mol,
                                             200,
                                             maxAttempts=30,
                                             randomSeed=100)
        energies = []
        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
            ee = ff.CalcEnergy()
            energies.append(ee)

        mol = Chem.AddHs(
            Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC"))
        cids = rdDistGeom.EmbedMultipleConfs(mol,
                                             200,
                                             maxAttempts=30,
                                             randomSeed=100,
                                             numThreads=4)
        nenergies = []
        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
            ee = ff.CalcEnergy()
            nenergies.append(ee)

        self.assertTrue(lstEq(energies, nenergies, tol=1e-6))
示例#2
0
  def test8MultiThreadMultiConf(self):
    if (rdBase.rdkitBuild.split('|')[2] != "MINGW"):
      ENERGY_TOLERANCE = 1.0e-6
      MSD_TOLERANCE = 1.0e-6
    else:
      ENERGY_TOLERANCE = 1.0
      MSD_TOLERANCE = 1.0e-5
    mol = Chem.AddHs(Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC"))
    cids = rdDistGeom.EmbedMultipleConfs(mol, 200, maxAttempts=30, randomSeed=100)
    energies = []
    for cid in cids:
      ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
      ee = ff.CalcEnergy()
      energies.append(ee)

    mol2 = Chem.AddHs(Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC"))
    cids2 = rdDistGeom.EmbedMultipleConfs(mol2, 200, maxAttempts=30, randomSeed=100, numThreads=4)
    self.assertTrue(lstEq(cids, cids2))
    nenergies = []
    for cid in cids2:
      ff = ChemicalForceFields.UFFGetMoleculeForceField(mol2, 10.0, cid)
      ee = ff.CalcEnergy()
      nenergies.append(ee)

    self.assertTrue(lstEq(energies, nenergies, tol=ENERGY_TOLERANCE))

    for cid in cids:
      msd = 0.0
      for i in range(mol.GetNumAtoms()):
        msd += (mol.GetConformer().GetAtomPosition(i) \
            - mol2.GetConformer().GetAtomPosition(i)).LengthSq()
      msd /= mol.GetNumAtoms()
      self.assertTrue(msd < MSD_TOLERANCE)
示例#3
0
def confgen(input, output, prunermsthresh, numconf, add_ref):
    mol = Chem.AddHs(Chem.MolFromMolFile(input), addCoords=True)
    refmol = Chem.AddHs(Chem.Mol(mol))
    param = rdDistGeom.ETKDGv2()
    param.pruneRmsThresh = prunermsthresh
    cids = rdDistGeom.EmbedMultipleConfs(mol, numconf, param)
    mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant='MMFF94s')
    AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=0, mmffVariant='MMFF94s')
    w = Chem.SDWriter(output)
    if add_ref:
        refmol.SetProp('CID', '-1')
        refmol.SetProp('Energy', '')
        w.write(refmol)
    res = []

    for cid in cids:
        ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=cid)
        e = ff.CalcEnergy()
        res.append((cid, e))
    sorted_res = sorted(res, key=lambda x: x[1])
    rdMolAlign.AlignMolConformers(mol)
    for cid, e in sorted_res:
        mol.SetProp('CID', str(cid))
        mol.SetProp('Energy', str(e))
        w.write(mol, confId=cid)
    w.close()
示例#4
0
 def test5Issue285(self):
   m = Chem.MolFromSmiles('CNC=O')
   cs = rdDistGeom.EmbedMultipleConfs(m, 10)
   for i, ci in enumerate(cs):
     for j in range(i + 1, len(cs)):
       cj = cs[j]
       self.assertTrue(Chem.MolToMolBlock(m, confId=ci) != Chem.MolToMolBlock(m, confId=cj))
示例#5
0
    def test4AlignConfs(self):
        mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4')

        cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100)
        #writer = Chem.SDWriter('mol_899.sdf')

        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid)
            ff.Initialize()
            more = 1
            while more:
                more = ff.Minimize()
            # FIX: this should not be necessary but somehow more comes out to be 0
            # even with the structure still being crappy
            ff.Minimize()
        aids = [12, 13, 14, 15, 16, 17, 18]
        rdMolAlign.AlignMolConformers(mol, aids)

        # now test that the atom location of these atom are consistent
        confs = mol.GetConformers()
        for aid in aids:
            mpos = 0
            for i, conf in enumerate(confs):
                if (i == 0):
                    mpos = list(conf.GetAtomPosition(aid))
                    continue
                else:
                    pos = list(conf.GetAtomPosition(aid))

                    self.failUnless(lstFeq(mpos, pos, .5))
def embed_conf(mol,initial_confs,args,log,coord_Map,alg_Map, mol_template):
	if coord_Map is None and alg_Map is None and mol_template is None:
		cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs,ignoreSmoothingFailures=True, randomSeed=args.seed,numThreads = 0)
		if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
			log.write("o  Normal RDKit embeding process failed, trying to generate conformers with random coordinates (with "+str(initial_confs)+" possibilities)")
			cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0,ignoreSmoothingFailures=True, numZeroFail=1000, numThreads = 0)
		if args.verbose:
			log.write("o  "+ str(len(cids))+" conformers initially generated")
	# case of embed for templates
	else:
		cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed,ignoreSmoothingFailures=True, coordMap = coord_Map,numThreads = 0)
		if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
			log.write("o  Normal RDKit embeding process failed, trying to generate conformers with random coordinates (with "+str(initial_confs)+" possibilities)")
			cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, numZeroFail=1000,ignoreSmoothingFailures=True, coordMap = coord_Map,numThreads = 0)
		if args.verbose:
			log.write("o  "+ str(len(cids))+" conformers initially generated")

	return cids
示例#7
0
      def _multiConfFromSmiles(smiles, nConfs=10, maxIters=500):
          """Adds hydrogens to molecule and optimises a chosen number of conformers.  Returns the optimised RDKit mol."""
          idea = Chem.MolFromSmiles(smiles)
          idea = Chem.AddHs(idea)
          confs = rdDistGeom.EmbedMultipleConfs(idea, nConfs)

          for conf in confs:
              opt = ChemicalForceFields.MMFFOptimizeMolecule(idea, confId=conf, maxIters=maxIters)
          return idea
示例#8
0
 def testGitHub2820(self):
     m = Chem.MolFromSmiles("[Na]C")
     self.assertIsNotNone(m)
     mp = ChemicalForceFields.MMFFGetMoleculeProperties(m)
     self.assertIsNone(mp)
     rdDistGeom.EmbedMultipleConfs(m, 2)
     res = ChemicalForceFields.MMFFOptimizeMoleculeConfs(m)
     self.assertEqual(len(res), 2)
     self.assertEqual(res[0], res[1])
     self.assertEqual(res[0], (-1, -1.0))
示例#9
0
    def test6RmsPruning(self):
        smiles = [
            'CC(C)CC(NC(C1[N+]CCC1)=O)C([O-])=O',
            'CC(NC(CO)C(O)c1ccc([N+]([O-])=O)cc1)=O',
            'CC([N+])C(NC(C)C(N1C(C=O)CCC1)=O)=O',
            'CC(NC1C(O)C=C(C([O-])=O)OC1C(O)C(O)CO)=O',
            'CCCC=C(NC(C1CC1(C)C)=O)C([O-])=O',
            'OCC(O)C(O)C(Cn1c2c(cc(C)c(C)c2)nc-2c(=O)[nH]c(=O)nc12)O'
        ]

        nconfs = []
        expected = [4, 5, 5, 4, 5, 4]
        expected = [3, 3, 5, 4, 4, 4]
        for smi in smiles:
            mol = Chem.MolFromSmiles(smi)
            cids = rdDistGeom.EmbedMultipleConfs(mol,
                                                 50,
                                                 maxAttempts=30,
                                                 randomSeed=100,
                                                 pruneRmsThresh=1.5)
            nconfs.append(len(cids))

        d = [abs(x - y) for x, y in zip(expected, nconfs)]
        # print(nconfs)
        self.assertTrue(max(d) <= 1)

        # previous settings
        params = rdDistGeom.ETKDG()
        params.randomSeed = 100
        params.maxIterations = 30
        params.pruneRmsThresh = 1.5
        params.useSymmetryForPruning = False
        nconfs = []
        expected = [4, 5, 5, 4, 5, 4]
        for smi in smiles:
            mol = Chem.MolFromSmiles(smi)
            cids = rdDistGeom.EmbedMultipleConfs(mol, 50, params)
            nconfs.append(len(cids))

        d = [abs(x - y) for x, y in zip(expected, nconfs)]
        # print(nconfs)
        self.assertTrue(max(d) <= 1)
示例#10
0
 def test3MultiConf(self):
   mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")
   cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100)
   energies = [112.98, 103.57, 110.78, 100.40, 95.37, 101.64, 114.72, 112.65, 124.53, 107.50]
   nenergies = []
   for cid in cids:
     ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
     ee = ff.CalcEnergy()
     nenergies.append(ee)
   #print(['%.2f'%x for x in nenergies])
   #print(nenergies)
   self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
示例#11
0
 def test3MultiConf(self):
     mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")
     cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100,
                              useExpTorsionAnglePrefs=False,
                              useBasicKnowledge=False)
     energies = [116.330, 106.246, 109.816, 104.890,
         93.060, 140.803, 139.253, 95.820, 123.591, 108.655]
     nenergies = []
     for cid in cids:
         ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
         ee = ff.CalcEnergy()
         nenergies.append(ee)
     # print(['%.3f' % x for x in nenergies])
     # print(nenergies)
     self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
示例#12
0
 def test3MultiConf(self):
     mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")
     cids = rdDistGeom.EmbedMultipleConfs(mol, 10, maxAttempts=30, randomSeed=100,
                              useExpTorsionAnglePrefs=False,
                              useBasicKnowledge=False)
     energies = [115.460, 105.891, 109.868, 104.415,
         92.944, 140.917, 139.468, 95.081, 123.528, 107.885]
     nenergies = []
     for cid in cids:
         ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
         ee = ff.CalcEnergy()
         nenergies.append(ee)
     #print(['%.2f' % x for x in nenergies])
     # print(nenergies)
     self.assertTrue(lstEq(energies, nenergies, tol=1e-2))
示例#13
0
 def test3MultiConf(self):
     mol = Chem.MolFromSmiles("CC(C)(C)c(cc12)n[n]2C(=O)/C=C(N1)/COC")
     cids = rdDistGeom.EmbedMultipleConfs(mol,
                                          10,
                                          maxAttempts=30,
                                          randomSeed=100)
     energies = [
         90.05, 77.35, 91.45, 81.82, 81.60, 75.65, 86.50, 80.35, 80.55,
         73.73
     ]
     nenergies = []
     for cid in cids:
         ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, cid)
         ee = ff.CalcEnergy()
         nenergies.append(ee)
     #print ['%.2f'%x for x in nenergies]
     #print nenergies
     self.failUnless(lstEq(energies, nenergies, tol=1e-2))
示例#14
0
  def test6RmsPruning(self):
    smiles = [
      'CC(C)CC(NC(C1[N+]CCC1)=O)C([O-])=O', 'CC(NC(CO)C(O)c1ccc([N+]([O-])=O)cc1)=O',
      'CC([N+])C(NC(C)C(N1C(C=O)CCC1)=O)=O', 'CC(NC1C(O)C=C(C([O-])=O)OC1C(O)C(O)CO)=O',
      'CCCC=C(NC(C1CC1(C)C)=O)C([O-])=O', 'OCC(O)C(O)C(Cn1c2c(cc(C)c(C)c2)nc-2c(=O)[nH]c(=O)nc12)O'
    ]

    nconfs = []
    expected = [5, 6, 6, 6, 6, 3]
    for smi in smiles:
      mol = Chem.MolFromSmiles(smi)
      cids = rdDistGeom.EmbedMultipleConfs(mol, 50, maxAttempts=30, randomSeed=100,
                                           pruneRmsThresh=1.5)
      nconfs.append(len(cids))

    d = [abs(x - y) for x, y in zip(expected, nconfs)]

    self.assertTrue(max(d) <= 1)
示例#15
0
 def testOptimizeMoleculeConfs(self):
     m = Chem.AddHs(Chem.MolFromSmiles("CCCO"))
     self.assertIsNotNone(m)
     cids = rdDistGeom.EmbedMultipleConfs(m, numConfs=10)
     self.assertEqual(len(cids), 10)
     mp = ChemicalForceFields.MMFFGetMoleculeProperties(m)
     ff = ChemicalForceFields.MMFFGetMoleculeForceField(m, mp)
     before = [
         ChemicalForceFields.MMFFGetMoleculeForceField(
             m, mp, confId=cid).CalcEnergy() for cid in cids
     ]
     res, after = tuple(
         zip(*ChemicalForceFields.OptimizeMoleculeConfs(m, ff,
                                                        maxIters=200)))
     self.assertEqual(len(res), 10)
     self.assertEqual(len(before), len(after))
     self.assertTrue(all(map(lambda i: i == 0, res)))
     self.assertTrue(all(after[i] < b for i, b in enumerate(before)))
示例#16
0
    def test4AlignConfs(self):
        mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4')

        cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100)
        #writer = Chem.SDWriter('mol_899.sdf')

        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid)
            ff.Initialize()
            more = 1
            while more:
                more = ff.Minimize()
            # FIX: this should not be necessary but somehow more comes out to be 0
            # even with the structure still being crappy
            ff.Minimize()
        aids = [12, 13, 14, 15, 16, 17, 18]
        rdMolAlign.AlignMolConformers(mol, aids)

        # now test that the atom location of these atom are consistent
        confs = mol.GetConformers()
        for aid in aids:
            mpos = 0
            for i, conf in enumerate(confs):
                if (i == 0):
                    mpos = list(conf.GetAtomPosition(aid))
                    continue
                else:
                    pos = list(conf.GetAtomPosition(aid))

                    self.assertTrue(lstFeq(mpos, pos, .5))

        # now test that we can get a list of RMS values
        rmsvals = []
        rdMolAlign.AlignMolConformers(mol, aids, RMSlist=rmsvals)
        self.assertTrue((len(rmsvals) == mol.GetNumConformers() - 1))

        # make sure something sensible happens if we provide a stupid
        # argument:
        rmsvals = 4
        self.assertRaises(AttributeError,
                          rdMolAlign.AlignMolConformers,
                          mol,
                          atomIds=aids,
                          RMSlist=rmsvals)
示例#17
0
def summ_search(mol,
                name,
                args,
                log,
                dup_data,
                dup_data_idx,
                coord_Map=None,
                alg_Map=None,
                mol_template=None):
    '''embeds core conformers, then optimizes and filters based on RMSD. Finally the rotatable torsions are systematically rotated'''

    sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + args.output)

    Chem.SanitizeMol(mol)
    mol = Chem.AddHs(mol)
    mol.SetProp("_Name", name)

    # detects and applies auto-detection of initial number of conformers
    if args.sample == 'auto':
        initial_confs = int(auto_sampling(args.auto_sample, mol, log))

    else:
        initial_confs = int(args.sample)

    #
    dup_data.at[dup_data_idx, 'Molecule'] = name
    dup_data.at[dup_data_idx, 'RDKIT-Initial-samples'] = initial_confs

    if args.nodihedrals == False:
        rotmatches = getDihedralMatches(mol, args.heavyonly, log)
    else:
        rotmatches = []

    if len(rotmatches) > args.max_torsions:
        log.write("x  Too many torsions (%d). Skipping %s" %
                  (len(rotmatches), (name + args.output)))
        status = -1
    else:
        if coord_Map == None and alg_Map == None and mol_template == None:
            if args.etkdg:
                ps = Chem.ETKDG()
                ps.randomSeed = args.seed
                ps.ignoreSmoothingFailures = True
                ps.numThreads = 0
                cids = rdDistGeom.EmbedMultipleConfs(mol,
                                                     initial_confs,
                                                     params=ps)
            else:
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    ignoreSmoothingFailures=True,
                    randomSeed=args.seed,
                    numThreads=0)
            if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
                log.write(
                    "o  conformers initially sampled with random coordinates")
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    useRandomCoords=True,
                    boxSizeMult=10.0,
                    ignoreSmoothingFailures=True,
                    numZeroFail=1000,
                    numThreads=0)
            if args.verbose:
                log.write("o  " + str(len(cids)) +
                          " conformers initially sampled")
        # case of embed for templates
        else:
            if args.etkdg:
                ps = Chem.ETKDG()
                ps.randomSeed = args.seed
                ps.coordMap = coord_Map
                ps.ignoreSmoothingFailures = True
                ps.numThreads = 0
                cids = rdDistGeom.EmbedMultipleConfs(mol,
                                                     initial_confs,
                                                     params=ps)
            else:
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    ignoreSmoothingFailures=True,
                    coordMap=coord_Map,
                    numThreads=0)
            if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
                log.write(
                    "o  conformers initially sampled with random coordinates")
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    useRandomCoords=True,
                    boxSizeMult=10.0,
                    numZeroFail=1000,
                    ignoreSmoothingFailures=True,
                    coordMap=coord_Map,
                    numThreads=0)
            if args.verbose:
                log.write("o  " + str(len(cids)) +
                          " conformers initially sampled")

        #energy minimize all to get more realistic results
        #identify the atoms and decide Force Field

        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() > 36:  #upto Kr for MMFF, if not use UFF
                args.ff = "UFF"
                #log.write("UFF is used because there are atoms that MMFF doesn't recognise")
        if args.verbose:
            log.write("o  Optimizing " + str(len(cids)) +
                      " initial conformers with" + args.ff)
        if args.verbose:
            if args.nodihedrals == False:
                log.write("o  Found " + str(len(rotmatches)) +
                          " rotatable torsions")
                # for [a,b,c,d] in rotmatches:
                # 	log.write('  '+mol.GetAtomWithIdx(a).GetSymbol()+str(a+1)+ mol.GetAtomWithIdx(b).GetSymbol()+str(b+1)+ mol.GetAtomWithIdx(c).GetSymbol()+str(c+1)+mol.GetAtomWithIdx(d).GetSymbol()+str(d+1))
            else:
                log.write("o  Systematic torsion rotation is set to OFF")

        cenergy, outmols = [], []
        bar = IncrementalBar('o  Minimizing', max=len(cids))
        for i, conf in enumerate(cids):
            if coord_Map == None and alg_Map == None and mol_template == None:
                if args.ff == "MMFF":
                    GetFF = Chem.MMFFGetMoleculeForceField(
                        mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf)
                elif args.ff == "UFF":
                    GetFF = Chem.UFFGetMoleculeForceField(mol, confId=conf)
                else:
                    log.write('   Force field {} not supported!'.format(
                        args.ff))
                    sys.exit()

                GetFF.Initialize()
                converged = GetFF.Minimize(maxIts=args.opt_steps_RDKit)
                energy = GetFF.CalcEnergy()
                cenergy.append(GetFF.CalcEnergy())

                #if args.verbose:
                #    log.write("-   conformer", (i+1), "optimized: ", args.ff, "energy", GetFF.CalcEnergy())
            #id template realign before doing calculations
            else:
                num_atom_match = mol.GetSubstructMatch(mol_template)
                # Force field parameters
                if args.ff == "MMFF":
                    GetFF = lambda mol, confId=conf: Chem.MMFFGetMoleculeForceField(
                        mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf)
                elif args.ff == "UFF":
                    GetFF = lambda mol, confId=conf: Chem.UFFGetMoleculeForceField(
                        mol, confId=conf)
                else:
                    log.write('   Force field {} not supported!'.format(
                        options.ff))
                    sys.exit()
                getForceField = GetFF

                # clean up the conformation
                ff_temp = getForceField(mol, confId=conf)
                for k, idxI in enumerate(num_atom_match):
                    for l in range(k + 1, len(num_atom_match)):
                        idxJ = num_atom_match[l]
                        d = coord_Map[idxI].Distance(coord_Map[idxJ])
                        ff_temp.AddDistanceConstraint(idxI, idxJ, d, d, 10000)
                ff_temp.Initialize()
                #reassignned n from 4 to 10 for better embed and minimzation
                n = 10
                more = ff_temp.Minimize()
                while more and n:
                    more = ff_temp.Minimize()
                    n -= 1
                energy = ff_temp.CalcEnergy()
                # rotate the embedded conformation onto the core_mol:
                rms = rdMolAlign.AlignMol(mol,
                                          mol_template,
                                          prbCid=conf,
                                          atomMap=alg_Map,
                                          reflect=True,
                                          maxIters=100)
                # elif len(num_atom_match) == 5:
                #     ff_temp = GetFF(mol, confId=conf)
                #     conf_temp = mol_template.GetConformer()
                #     for k in range(mol_template.GetNumAtoms()):
                #         p = conf_temp.GetAtomPosition(k)
                #         q = mol.GetConformer(conf).GetAtomPosition(k)
                #         pIdx = ff_temp.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
                #         ff_temp.AddDistanceConstraint(pIdx, num_atom_match[k], 0, 0, 10000)
                #     ff_temp.Initialize()
                #     n = 10
                #     more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5)
                #     while more and n:
                #         more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5)
                #         n -= 1
                #     # realign
                #     energy = ff_temp.CalcEnergy()
                #     rms = rdMolAlign.AlignMol(mol, mol_template,prbCid=conf, atomMap=alg_Map,reflect=True,maxIters=50)
                cenergy.append(energy)

            # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs"
            # conformers. We do this to SetProp (Name and Energy) to the different conformers
            # and log.write in the SDF file. At the end, since all the mol objects has the same
            # conformers, but the energies are different, we can log.write conformers to SDF files
            # with the energies of the parent mol objects. We measured the computing time and
            # it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp
            # SetProp correctly
            pmol = PropertyMol.PropertyMol(mol)
            outmols.append(pmol)
            bar.next()
        bar.finish()

        for i, cid in enumerate(cids):
            outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1))
            outmols[cid].SetProp('Energy', cenergy[cid])

        cids = list(range(len(outmols)))
        sortedcids = sorted(cids, key=lambda cid: cenergy[cid])

        log.write("\n\no  Filters after intial embedding of " +
                  str(initial_confs) + " conformers")
        selectedcids, selectedcids_initial, eng_dup, eng_rms_dup = [], [], -1, -1
        bar = IncrementalBar('o  Filtering based on energy (pre-filter)',
                             max=len(sortedcids))
        for i, conf in enumerate(sortedcids):
            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if i == 0:
                selectedcids_initial.append(conf)
            # check rmsd
            for seenconf in selectedcids_initial:
                E_diff = abs(cenergy[conf] - cenergy[seenconf])  # in kcal/mol
                if E_diff < args.initial_energy_threshold:
                    eng_dup += 1
                    excluded_conf = True
                    break
            if excluded_conf == False:
                if conf not in selectedcids_initial:
                    selectedcids_initial.append(conf)
            bar.next()
        bar.finish()

        if args.verbose == True:
            log.write("o  " + str(eng_dup) +
                      " Duplicates removed  pre-energy filter (E < " +
                      str(args.initial_energy_threshold) + " kcal/mol )")

        #reduce to unique set
        if args.verbose:
            log.write("o  Removing duplicate conformers ( RMSD < " +
                      str(args.rms_threshold) + " and E difference < " +
                      str(args.energy_threshold) + " kcal/mol)")

        bar = IncrementalBar('o  Filtering based on energy and rms',
                             max=len(selectedcids_initial))
        #check rmsd
        for i, conf in enumerate(selectedcids_initial):

            #set torsions to same value
            for m in rotmatches:
                rdMolTransforms.SetDihedralDeg(
                    outmols[conf].GetConformer(conf), *m, 180.0)

            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if i == 0:
                selectedcids.append(conf)
            # check rmsd
            for seenconf in selectedcids:
                E_diff = abs(cenergy[conf] - cenergy[seenconf])  # in kcal/mol
                if E_diff < args.energy_threshold:
                    rms = get_conf_RMS(outmols[conf], outmols[conf], seenconf,
                                       conf, args.heavyonly,
                                       args.max_matches_RMSD, log)
                    if rms < args.rms_threshold:
                        excluded_conf = True
                        eng_rms_dup += 1
                        break
            if excluded_conf == False:
                if conf not in selectedcids:
                    selectedcids.append(conf)
            bar.next()
        bar.finish()

        # unique_mols, unique_energies = [],[]
        # for id in selectedcids:
        #     unique_mols.append(outmols[id])
        #     unique_energies.append(cenergy[id])

        # log.write(unique_mols[0:2].GetConformers()[0].GetPositions())

        if args.verbose == True:
            log.write("o  " + str(eng_rms_dup) +
                      " Duplicates removed (RMSD < " +
                      str(args.rms_threshold) + " / E < " +
                      str(args.energy_threshold) + " kcal/mol) after rotation")
        if args.verbose:
            log.write("o  " + str(len(selectedcids)) +
                      " unique (ignoring torsions) starting conformers remain")

        dup_data.at[dup_data_idx, 'RDKit-energy-duplicates'] = eng_dup
        dup_data.at[dup_data_idx,
                    'RDKit-RMS-and-energy-duplicates'] = eng_rms_dup
        dup_data.at[dup_data_idx,
                    'RDKIT-Unique-conformers'] = len(selectedcids)

        # now exhaustively drive torsions of selected conformers
        n_confs = int(len(selectedcids) * (360 / args.degree)**len(rotmatches))
        if args.verbose and len(rotmatches) != 0:
            log.write("\n\no  Systematic generation of " + str(n_confs) +
                      " confomers")
            bar = IncrementalBar(
                'o  Generating conformations based on dihedral rotation',
                max=len(selectedcids))
        else:
            bar = IncrementalBar('o  Generating conformations',
                                 max=len(selectedcids))

        total = 0
        for conf in selectedcids:
            #log.write(outmols[conf])
            total += genConformer_r(outmols[conf], conf, 0, rotmatches,
                                    args.degree, sdwriter, args,
                                    outmols[conf].GetProp('_Name'), log)
            bar.next()
        bar.finish()
        if args.verbose and len(rotmatches) != 0:
            log.write("o  %d total conformations generated" % total)
        status = 1
    sdwriter.close()

    #getting the energy from and mols after rotations
    if len(rotmatches) != 0:
        rdmols = Chem.SDMolSupplier(name + '_' + 'rdkit' + args.output,
                                    removeHs=False)
        if rdmols is None:
            log.write("Could not open " + name + args.output)
            sys.exit(-1)

        bar = IncrementalBar(
            'o  Filtering based on energy and rms after rotation of dihedrals',
            max=len(rdmols))
        sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + '_' + 'rotated' +
                                 args.output)

        rd_count = 0
        rd_selectedcids, rd_dup_energy, rd_dup_rms_eng = [], -1, 0
        for i in range(len(rdmols)):
            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if rd_count == 0:
                rd_selectedcids.append(i)
                if args.metal_complex == True:
                    for atom in rdmols[i].GetAtoms():
                        if atom.GetSymbol() == 'I' and (
                                len(atom.GetBonds()) == 6
                                or len(atom.GetBonds()) == 5
                                or len(atom.GetBonds()) == 4
                                or len(atom.GetBonds()) == 3
                                or len(atom.GetBonds()) == 2):
                            for el in elementspt:
                                if el.symbol == args.metal:
                                    atomic_number = el.number
                            atom.SetAtomicNum(atomic_number)
                sdwriter.write(rdmols[i])
            # Only the first ID gets included
            rd_count = 1
            # check rmsd
            for j in rd_selectedcids:
                if abs(
                        float(rdmols[i].GetProp('Energy')) -
                        float(rdmols[j].GetProp('Energy'))
                ) < args.initial_energy_threshold:  # comparison in kcal/mol
                    excluded_conf = True
                    rd_dup_energy += 1
                    break
                if abs(
                        float(rdmols[i].GetProp('Energy')) -
                        float(rdmols[j].GetProp('Energy'))
                ) < args.energy_threshold:  # in kcal/mol
                    rms = get_conf_RMS(rdmols[i], rdmols[j], -1, -1,
                                       args.heavyonly, args.max_matches_RMSD,
                                       log)
                    if rms < args.rms_threshold:
                        excluded_conf = True
                        rd_dup_rms_eng += 1
                        break
            if excluded_conf == False:
                if args.metal_complex == True:
                    for atom in rdmols[i].GetAtoms():
                        if atom.GetSymbol() == 'I' and (
                                len(atom.GetBonds()) == 6
                                or len(atom.GetBonds()) == 5
                                or len(atom.GetBonds()) == 4
                                or len(atom.GetBonds()) == 3
                                or len(atom.GetBonds()) == 2):
                            for el in elementspt:
                                if el.symbol == args.metal:
                                    atomic_number = el.number
                            atom.SetAtomicNum(atomic_number)
                sdwriter.write(rdmols[i])
                if i not in rd_selectedcids:
                    rd_selectedcids.append(i)
            bar.next()
        bar.finish()
        sdwriter.close()

        if args.verbose == True:
            log.write("o  " + str(rd_dup_energy) +
                      " Duplicates removed initial energy ( E < " +
                      str(args.initial_energy_threshold) + " kcal/mol )")
        if args.verbose == True:
            log.write("o  " + str(rd_dup_rms_eng) +
                      " Duplicates removed (RMSD < " +
                      str(args.rms_threshold) + " / E < " +
                      str(args.energy_threshold) + " kcal/mol) after rotation")
        if args.verbose == True:
            log.write("o  " + str(len(rd_selectedcids)) +
                      " unique (after torsions) conformers remain")

        #filtering process after rotations
        dup_data.at[dup_data_idx, 'RDKIT-Rotated-conformers'] = total
        dup_data.at[dup_data_idx,
                    'RDKIT-Rotated-Unique-conformers'] = len(rd_selectedcids)

    return status
示例#18
0
    def test6Chirality(self):
        # turn on chirality and we should get chiral volume that is pretty consistent and
        # positive
        tgtVol = 13.0
        smiles = "Cl[C@](C)(F)Br"
        mol = Chem.MolFromSmiles(smiles)
        cids = rdDistGeom.EmbedMultipleConfs(mol,
                                             30,
                                             maxAttempts=30,
                                             randomSeed=100)
        self.assertTrue(len(cids) == 30)
        for cid in cids:
            conf = mol.GetConformer(cid)
            vol = computeChiralVol(conf.GetAtomPosition(0),
                                   conf.GetAtomPosition(2),
                                   conf.GetAtomPosition(3),
                                   conf.GetAtomPosition(4))
            self.assertTrue(abs(vol - tgtVol) < 1)

        # turn of chirality and now we should see both chiral forms
        smiles = "ClC(C)(F)Br"
        mol = Chem.MolFromSmiles(smiles)
        cids = rdDistGeom.EmbedMultipleConfs(mol,
                                             30,
                                             maxAttempts=30,
                                             randomSeed=120)
        self.assertTrue(len(cids) == 30)
        nPos = 0
        nNeg = 0
        for cid in cids:
            conf = mol.GetConformer(cid)
            vol = computeChiralVol(conf.GetAtomPosition(0),
                                   conf.GetAtomPosition(2),
                                   conf.GetAtomPosition(3),
                                   conf.GetAtomPosition(4))
            self.assertTrue(abs(vol - tgtVol) < 1 or abs(vol + tgtVol) < 1)
            if vol < 0:
                nNeg += 1
            else:
                nPos += 1
        self.assertTrue(nPos > 0)
        self.assertTrue(nNeg > 0)

        tgtVol = 5.0
        for i in range(10):
            smiles = "Cl[C@H](F)Br"
            mol = Chem.MolFromSmiles(smiles)
            ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10)
            conf = mol.GetConformer(ci)
            vol = computeChiralVol(conf.GetAtomPosition(0),
                                   conf.GetAtomPosition(1),
                                   conf.GetAtomPosition(2),
                                   conf.GetAtomPosition(3))
            self.assertTrue(abs(vol - tgtVol) < 1, "%s %s" % (vol, tgtVol))

        tgtVol = 3.5
        expected = [
            -3.62, -3.67, -3.72, 3.91, 3.95, 3.98, 3.90, 3.94, 3.98, 3.91
        ]
        nPos = 0
        nNeg = 0
        for i in range(30):
            smiles = "ClC(F)Br"
            mol = Chem.MolFromSmiles(smiles)
            ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10)
            conf = mol.GetConformer(ci)
            vol = computeChiralVol(conf.GetAtomPosition(0),
                                   conf.GetAtomPosition(1),
                                   conf.GetAtomPosition(2),
                                   conf.GetAtomPosition(3))
            self.assertTrue(abs(vol - tgtVol) < 1 or abs(vol + tgtVol) < 1)
            if vol < 0:
                nNeg += 1
            else:
                nPos += 1

        self.assertTrue(nPos > 0)
        self.assertTrue(nNeg > 0)

        smiles = "Cl[C@H](F)Br"
        m = Chem.MolFromSmiles(smiles)
        mol = Chem.AddHs(m)
        cids = rdDistGeom.EmbedMultipleConfs(mol,
                                             10,
                                             maxAttempts=30,
                                             randomSeed=100)
        self.assertTrue(len(cids) == 10)
        tgtVol = 10.5
        for cid in cids:
            conf = mol.GetConformer(cid)
            vol = computeChiralVol(conf.GetAtomPosition(0),
                                   conf.GetAtomPosition(2),
                                   conf.GetAtomPosition(3),
                                   conf.GetAtomPosition(4))
            self.assertTrue(abs(vol - tgtVol) < 2.)

        # let's try a little more complicated system
        expectedV1 = -2.0
        expectedV2 = -2.9

        for i in range(5):
            smi = "C1=CC=C(C=C1)[C@H](OC1=C[NH]N=C1)C(=O)[NH]C[C@H](Cl)C1=CC=NC=C1"
            mol = Chem.MolFromSmiles(smi)
            ci = rdDistGeom.EmbedMolecule(mol, randomSeed=(i + 1) * 15)
            self.assertTrue(ci >= 0)
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci)
            ff.Minimize()

            conf = mol.GetConformer(ci)
            vol1 = computeChiralVol(conf.GetAtomPosition(6),
                                    conf.GetAtomPosition(3),
                                    conf.GetAtomPosition(7),
                                    conf.GetAtomPosition(13))
            self.assertTrue(
                abs(vol1 - expectedV1) < 1 or abs(vol1 + expectedV1) < 1)
            if vol1 < 0:
                nNeg += 1
            else:
                nPos += 1

            vol2 = computeChiralVol(conf.GetAtomPosition(17),
                                    conf.GetAtomPosition(16),
                                    conf.GetAtomPosition(18),
                                    conf.GetAtomPosition(19))
            self.assertTrue(
                abs(vol2 - expectedV2) < 1 or abs(vol2 + expectedV2) < 1)

        # remove the chiral specification and we should see other chiral
        # forms of the compound
        expectedV1 = 2.0  #[-2.30, -2.31, -2.30,  2.30, -1.77]
        expectedV2 = 2.8  #[2.90,  2.89,  2.69, -2.90, -2.93]

        self.assertTrue(nPos > 0)
        self.assertTrue(nNeg > 0)
        for i in range(5):
            smi = "C1=CC=C(C=C1)C(OC1=C[NH]N=C1)C(=O)[NH]CC(Cl)C1=CC=NC=C1"
            mol = Chem.MolFromSmiles(smi)
            ci = rdDistGeom.EmbedMolecule(mol, 30, (i + 1) * 10)
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, 10.0, ci)
            ff.Minimize()

            conf = mol.GetConformer(ci)
            vol1 = computeChiralVol(conf.GetAtomPosition(6),
                                    conf.GetAtomPosition(3),
                                    conf.GetAtomPosition(7),
                                    conf.GetAtomPosition(13))
            vol2 = computeChiralVol(conf.GetAtomPosition(17),
                                    conf.GetAtomPosition(16),
                                    conf.GetAtomPosition(18),
                                    conf.GetAtomPosition(19))
            self.assertTrue(abs(abs(vol1) - expectedV1) < 1.0)
            self.assertTrue(abs(abs(vol2) - expectedV2) < 1.0)
#suppl = Chem.SDMolSupplier('./platinum_dataset_2017_01.sdf', removeHs=False)
df = pd.read_csv('molecules_with_logS.csv')

smiles = df.iloc[: , 1]
mols = []
for smile in smiles:
    mol = Chem.MolFromSmiles(smile)
    mols.append(mol)

mol = mols[1201]

#1 Conformer generation
pm = rdDistGeom.ETKDGv2()
m_h = Chem.AddHs(mol)
cids = rdDistGeom.EmbedMultipleConfs(m_h, number_of_conformation, pm)
print(m_h.GetNumConformers())

#2. MMFF optimization and calculation
energy = []
prop = AllChem.MMFFGetMoleculeProperties(m_h)
for cid in cids:
    mmff = AllChem.MMFFGetMoleculeForceField(m_h, prop, confId=cid)
    mmff.Minimize()
    energy.append(mmff.CalcEnergy())

energy = np.array(energy)

# 3. Calculation for RMS
m = Chem.RemoveHs(m_h)
rms_mat = AllChem.GetConformerRMSMatrix(m)