示例#1
0
def mol_from_smiles(smiles, name, standardise=False):
    """Generate a n RDKit `PropertyMol` from SMILES string.

    Parameters
    ----------
    smile : str
        SMILES string
    name : str
        Name of molecule
    standardise : bool
        Clean Mol through standardisation

    Returns
    -------
    RDKit PropertyMol : Molecule.
    """
    mol = rdkit.Chem.MolFromSmiles(smiles)
    if mol is None:
        logging.error("Mol creation failed from SMILES: {!r}".format(
            (smiles, name)))
        return None
    if standardise:
        mol = mol_to_standardised_mol(mol, name)
    mol = PropertyMol(mol)
    mol.SetProp("_Name", name)
    mol.SetProp("_SMILES", smiles)
    return mol
示例#2
0
def mol_from_mol2(mol2_file, name=None, standardise=False):
    """Read a mol2 file into an RDKit `PropertyMol`.

    Parameters
    ----------
    mol2_file : str
        path to a mol2 file
    name : str, optional
        Name of molecule. If not provided, uses file basename as name
    standardise : bool
        Clean mol through standardisation

    Returns
    -------
    RDKit PropertyMol : Molecule.
    """
    if name is None:
        name = os.path.splitext(os.path.basename(mol2_file))[0]
    mol = rdkit.Chem.MolFromMol2File(mol2_file)
    if standardise:
        mol = mol_to_standardised_mol(mol, name)
    mol = PropertyMol(mol)
    mol.SetProp("_Name", name)
    return mol
def mult_min(name, args, program,log,dup_data,dup_data_idx):
	# read SDF files from RDKit optimization
	inmols = rdkit_sdf_read(name, args, log)

	cenergy, outmols = [],[]
	if args.verbose:
		log.write("\n\no  Multiple minimization of "+ name+args.output+ " with "+ program)
	bar = IncrementalBar('o  Minimizing', max = len(inmols))

	for i,mol in enumerate(inmols):
		bar.next()
		if mol is not None:
			# optimize this structure and record the energy
			mol,energy = optimize(mol, args, program,log,dup_data,dup_data_idx)
			pmol = PropertyMol.PropertyMol(mol)
			outmols.append(pmol)
			cenergy.append(energy)

	# if SQM energy exists, overwrite RDKIT energies and geometries
	cids = list(range(len(outmols)))
	sorted_all_cids = sorted(cids, key = lambda cid: cenergy[cid])

	name_mol = name.split('_rdkit')[0]
	for i, cid in enumerate(sorted_all_cids):
		outmols[cid].SetProp('_Name', name_mol +' conformer ' + str(i+1))
		outmols[cid].SetProp('Energy', cenergy[cid])

	log.write("\n\no  Applying filters to intial conformers")
	# filter based on energy window ewin_rdkit
	sortedcids = ewin_filter(sorted_all_cids,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani')
	# pre-filter based on energy only
	selectedcids_initial = pre_E_filter(sortedcids,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani')
	# filter based on energy and RMSD
	selectedcids = RMSD_and_E_filter(outmols,selectedcids_initial,cenergy,args,dup_data,dup_data_idx,log,'xtb_ani')

	if program == 'xtb':
		dup_data.at[dup_data_idx, 'xTB-Initial-samples'] = len(inmols)
	if program == 'ani':
		dup_data.at[dup_data_idx, 'ANI1ccx-Initial-samples'] = len(inmols)

	# write the filtered, ordered conformers to external file
	write_confs(outmols, cenergy,selectedcids, name, args, program,log)
def min_and_E_calc(mol,cids,args,log,coord_Map,alg_Map,mol_template):
	cenergy,outmols = [],[]
	bar = IncrementalBar('o  Minimizing', max = len(cids))
	for _, conf in enumerate(cids):
		if coord_Map is None and alg_Map is None and mol_template is None:
			GetFF = minimize_rdkit_energy(mol,conf,args,log)
			cenergy.append(GetFF.CalcEnergy())

		# id template realign before doing calculations
		else:
			mol,GetFF = realign_mol(mol,conf,coord_Map, alg_Map, mol_template,args,log)
			cenergy.append(GetFF.CalcEnergy())
		# outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs" conformers. We do this to SetProp (Name and Energy) to the different conformers
		# and log.write in the SDF file. At the end, since all the mol objects has the same conformers, but the energies are different, we can log.write conformers to SDF files
		# with the energies of the parent mol objects. We measured the computing time and it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp SetProp correctly
		pmol = PropertyMol.PropertyMol(mol)
		outmols.append(pmol)
		bar.next()
	bar.finish()
	return outmols,cenergy
示例#5
0
    def filter_conformers(self, mol):
        """Filter conformers which do not meet an RMSD threshold.

        Parameters
        ----------
        mol : RDKit Mol
            Molecule.

        Returns
        -------
        A new RDKit Mol containing the chosen conformers, sorted by
        increasing energy.
        """
        logging.debug("Pruning conformers for %s" % mol.GetProp('_Name'))
        energies = self.get_conformer_energies(mol)
        energy_below_threshold = np.ones_like(energies, dtype=np.bool_)

        sort = np.argsort(energies)  # sort by increasing energy
        confs = np.array(mol.GetConformers())

        # remove hydrogens to speed up substruct match
        mol = Chem.RemoveHs(mol)
        accepted = []  # always accept lowest-energy conformer
        rejected = []
        rmsds = np.zeros((confs.shape[0], confs.shape[0]), dtype=np.float)
        for i, fit_ind in enumerate(sort):
            accepted_num = len(accepted)

            # always accept lowest-energy conformer
            if accepted_num == 0:
                accepted.append(fit_ind)

                # pre-compute if Es are in acceptable range of min E
                if self.max_energy_diff != -1.:
                    energy_below_threshold = (
                        energies <= energies[fit_ind] + self.max_energy_diff)

            # reject conformers after first_conformers is reached
            if accepted_num >= self.first_conformers:
                rejected.append(fit_ind)
                continue

            # check if energy is too high
            if not energy_below_threshold[fit_ind]:
                rejected.append(fit_ind)
                continue

            # get RMSD to selected conformers
            these_rmsds = np.zeros((accepted_num, ), dtype=np.float)
            # reverse so all confs aligned to lowest energy
            for j, accepted_ind in self.reverse_enumerate(accepted):
                this_rmsd = AllChem.GetBestRMS(mol, mol,
                                               confs[accepted_ind].GetId(),
                                               confs[fit_ind].GetId())
                # reject conformers within the RMSD threshold
                if this_rmsd < self.rmsd_cutoff:
                    rejected.append(fit_ind)
                    break
                else:
                    these_rmsds[-j - 1] = this_rmsd
            else:
                rmsds[fit_ind, accepted] = these_rmsds
                rmsds[accepted, fit_ind] = these_rmsds
                accepted.append(fit_ind)

        # slice and order rmsds and energies to match accepted list
        rmsds = rmsds[np.ix_(accepted, accepted)]
        energies = energies[accepted]

        # create a new molecule with all conformers, sorted by energy
        new = PropertyMol.PropertyMol(mol)
        new.RemoveAllConformers()
        conf_ids = [conf.GetId() for conf in mol.GetConformers()]
        for i in accepted:
            conf = mol.GetConformer(conf_ids[i])
            new.AddConformer(conf, assignId=True)

        logging.debug("Conformers filtered for %s" % mol.GetProp('_Name'))
        return new, np.asarray(accepted, dtype=np.int), energies, rmsds
示例#6
0
def mult_min(mol, name,args):
	'''optimizes a bunch of molecules and then checks for unique conformers and then puts in order of energy'''

	opt = True # switch to off for single point only
	opt_precision = 0.005 # toggle for optimization convergence

	#adjust opt convergence criteria (args.convergence defaults to 1.0)
	opt_precision = opt_precision * args.convergence

	inmols = Chem.SDMolSupplier(name+output, removeHs=False)
	if inmols is None:
		print("Could not open ", name+output)
		sys.exit(-1)

	c_converged, c_energy, outmols = [], [], []
	ani_energy,xtb_energy = 0,0
	if args.ANI1ccx == True or args.xtb == True: SQM_energy, SQM_cartesians = [], []

	globmin = None

	for i,mol in enumerate(inmols):
		conf = 1
		if mol is not None:

			if args.ff == "MMFF":
				GetFF = Chem.MMFFGetMoleculeForceField(mol, Chem.MMFFGetMoleculeProperties(mol))
			elif args.ff == "UFF":
				GetFF = Chem.UFFGetMoleculeForceField(mol)
			else: print(('   Force field {} not supported!'.format(args.ff))); sys.exit()

			GetFF.Initialize()
			converged = GetFF.Minimize(maxIts=1000)
			energy = GetFF.CalcEnergy()
			# append to list
			#if args.verbose: print("   conformer", (i+1), energy)
			if globmin == None: globmin = energy
			if energy < globmin: globmin = energy

			if converged == 0 and (energy - globmin) < args.ewin:
				#if args.verbose: print('   minimization converged!')
				unique, dup_id = 0, None
				#print("Conformer", (i+1), "optimized with", args.ff, "Energy:", energy)
				for j,seenmol in enumerate(outmols):
					if abs(energy - c_energy[j]) < args.energy_threshold:
						#print((i+1), energy, (j+1), c_energy[j], getPMIDIFF(mol,seenmol))
						if getPMIDIFF(mol, seenmol) < args.rms_threshold * 25:
							#print("o  Conformer", (i+1), "matches conformer", (j+1))
							unique += 1
							dup_id = (j+1)

				if unique == 0:
					if args.verbose == True: print("-  Conformer", (i+1), "is unique")

					if args.ANI1ccx == True or args.xtb == True:
						cartesians = mol.GetConformers()[0].GetPositions()
						elements = ''
						for atom in mol.GetAtoms(): elements += atom.GetSymbol()

						coordinates = torch.tensor([cartesians.tolist()], requires_grad=True, device=device)

						if args.ANI1ccx == True:
							species = model.species_to_tensor(elements).to(device).unsqueeze(0)
							_, ani_energy = model((species, coordinates))
							if args.verbose: print("ANI Initial E:",ani_energy.item(),'eH') #Hartree

							if opt == True:
								ase_molecule = ase.Atoms(elements, positions=coordinates.tolist()[0], calculator=model.ase())
								### make a function for constraints and optimization
								if constraints != None:
									fb = ase.constraints.FixBondLength(0, 1)
									ase_molecule.set_distance(0,1,2.0)
									ase_molecule.set_constraint(fb)

								optimizer = ase.optimize.BFGS(ase_molecule)
								optimizer.run(fmax=float(opt_precision))
								species_coords = ase_molecule.get_positions().tolist()
								coordinates = torch.tensor([species_coords], requires_grad=True, device=device)

							###############################################################################
							# Now let's compute energy:
							_, ani_energy = model((species, coordinates))
							aniE = ani_energy.item() #Hartree
							if args.verbose: print("ANI Final E:", aniE,'eH', ase_molecule.get_potential_energy(),'eV') #Hartree, eV
							###############################################################################
### INCLUDE THE OPTIONS TO SOTRE MOLECULAR Descriptors
### CHECK THIS WEBPAGE: https://github.com/grimme-lab/xtb/tree/master/python
						elif args.xtb == True:
							ase_molecule = ase.Atoms(elements, positions=coordinates.tolist()[0], calculator=GFN2()) #define ase molecule using GFN2 Calculator
							if opt == True:
								if args.verbose: print("Initial XTB energy", ase_molecule.get_potential_energy()/Hartree,'Eh',ase_molecule.get_potential_energy(),'eV') #Hartree, eV
								optimizer = ase.optimize.BFGS(ase_molecule)
								optimizer.run(fmax=float(opt_precision))
								species_coords = ase_molecule.get_positions().tolist()
								coordinates = torch.tensor([species_coords], requires_grad=True, device=device)
							###############################################################################
							# Now let's compute energy:
							xtb_energy = ase_molecule.get_potential_energy()
							if args.verbose: print("Final XTB E:",xtb_energy/Hartree,'Eh',xtb_energy,'eV') #Hartree, eV
							###############################################################################

						if args.ANI1ccx == True or args.xtb == True:#save Eh and coordinates to write to SDF
							if args.xtb == True:SQM_energy.append(xtb_energy/Hartree)
							else:SQM_energy.append(ani_energy.item())
							cartesians = np.array(coordinates.tolist()[0])
							SQM_cartesians.append(cartesians)


					pmol = PropertyMol.PropertyMol(mol)
					outmols.append(pmol); c_converged.append(converged); c_energy.append(energy)
					conf += 1

				else: print("x  Conformer", (i+1), "is a duplicate of", dup_id)
			else:
				print("x  Minimization of conformer", (i+1), " not converged / energy too high!", converged, (energy - globmin), args.ewin)
			#pass
		else:
			pass #print("No molecules to optimize")


	# if SQM energy exists, overwrite RDKIT energies and geometries
	cids = list(range(len(outmols)))
	sortedcids = sorted(cids, key = lambda cid: c_energy[cid])

	if args.ANI1ccx == True or args.xtb == True:
		for conf in cids:
			c_energy[conf] = SQM_energy[conf]
			c = outmols[conf].GetConformer()
			for j in range(outmols[conf].GetNumAtoms()):
				#print(cartesians[i])
				[x,y,z] = SQM_cartesians[conf][j]
				c.SetAtomPosition(j,Point3D(x,y,z))

			for j in range(0,conf):
				if abs(c_energy[conf] - c_energy[j]) < args.energy_threshold / 2625.5 and getPMIDIFF(outmols[conf], outmols[j]) <  args.rms_threshold:
					print("It appears ",conf, "is the same as", j)

	for i, cid in enumerate(sortedcids):
		outmols[cid].SetProp('_Name', name + ' conformer ' + str(i+1))
		outmols[cid].SetProp('Energy', c_energy[cid])

	return outmols, c_energy
示例#7
0
def mult_min(name, args, program, log, dup_data, dup_data_idx):
    '''optimizes a bunch of molecules and then checks for unique conformers and then puts in order of energy'''

    inmols = Chem.SDMolSupplier(name + args.output, removeHs=False)
    if inmols is None:
        log.write("Could not open " + name + args.output)
        sys.exit(-1)

    globmin, n_high, n_dup_energy, n_dup_rms_eng = None, 0, 0, 0
    c_converged, c_energy, outmols = [], [], []

    if args.verbose:
        log.write("\n\no  Multiple minimization of " + name + args.output +
                  " with " + program)
    bar = IncrementalBar('o  Minimizing', max=len(inmols))

    for i, mol in enumerate(inmols):
        bar.next()
        conf = 1
        if mol is not None:
            # optimize this structure and record the energy
            mol, converged, energy = optimize(mol, args, program, log,
                                              dup_data, dup_data_idx)
            #if args.verbose: log.write("   conformer", (i+1), energy)

            if globmin == None: globmin = energy
            if energy < globmin: globmin = energy

            if converged == 0 and (
                    energy - globmin) < args.ewin:  # comparison in kcal/mol

                #if args.verbose: log.write('   minimization converged!')
                unique, dup_id = 0, None

                # compare against all previous conformers located
                for j, seenmol in enumerate(outmols):
                    if abs(
                            energy - c_energy[j]
                    ) < args.initial_energy_threshold:  # comparison in kcal/mol
                        unique += 1
                        dup_id = (j + 1)
                        n_dup_energy += 1
                        break
                    #pmi_diff = get_PMIDIFF(mol, seenmol, 0, 0, args.heavyonly)
                    #tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, seenmol, useWeights=False)
                    #rms = get_RMS(mol, seenmol, 0, 0, args.heavyonly)
                    #log.write(rms, tfd, pmi_diff)
                    if abs(energy - c_energy[j]
                           ) < args.energy_threshold:  # comparison in kcal/mol
                        rms = get_conf_RMS(mol, seenmol, 0, 0, args.heavyonly,
                                           args.max_matches_RMSD, log)
                        if rms < args.rms_threshold:
                            #log.write("o  Conformer", (i+1), "matches conformer", (j+1))
                            unique += 1
                            dup_id = (j + 1)
                            n_dup_rms_eng += 1
                            break

                if unique == 0:
                    #if args.verbose == True: log.write("-  Conformer", (i+1), "is unique")
                    pmol = PropertyMol.PropertyMol(mol)
                    outmols.append(pmol)
                    c_converged.append(converged)
                    c_energy.append(energy)
                    conf += 1
                    # if args.verbose == True:log.write("x  Conformer", (i+1), "is a duplicate of", dup_id)
            else:
                #if args.verbose == True: log.write("x  Minimization of conformer", (i+1), " not converged / energy too high!", converged, (energy - globmin), args.ewin)
                n_high += 1
        else:
            pass  #log.write("No molecules to optimize")

    bar.finish()
    if args.verbose == True:
        log.write("o  " + str(n_dup_energy) +
                  " Duplicates removed initial energy ( E < " +
                  str(args.initial_energy_threshold) + " kcal/mol )")
    if args.verbose == True:
        log.write("o  " + str(n_dup_rms_eng) + " Duplicates removed (RMSD < " +
                  str(args.rms_threshold) + " / E < " +
                  str(args.energy_threshold) + " kcal/mol)")
    if args.verbose == True:
        log.write("o  " + str(n_high) +
                  " Conformers rejected based on energy ( E > " +
                  str(args.ewin) + " kcal/mol)")

    # if SQM energy exists, overwrite RDKIT energies and geometries
    cids = list(range(len(outmols)))
    sortedcids = sorted(cids, key=lambda cid: c_energy[cid])

    for i, cid in enumerate(sortedcids):
        outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1))
        outmols[cid].SetProp('Energy', c_energy[cid])

    if program == 'xtb':
        dup_data.at[dup_data_idx, 'xTB-Initial-samples'] = len(inmols)
        dup_data.at[dup_data_idx,
                    'xTB-initial_energy_threshold'] = n_dup_energy
        dup_data.at[dup_data_idx,
                    'xTB-RMS-and-energy-duplicates'] = n_dup_rms_eng
        dup_data.at[dup_data_idx, 'xTB-Unique-conformers'] = len(sortedcids)

    if program == 'ani':
        dup_data.at[dup_data_idx, 'ANI1ccx-Initial-samples'] = len(inmols)
        dup_data.at[dup_data_idx,
                    'ANI1ccx-initial_energy_threshold'] = n_dup_energy
        dup_data.at[dup_data_idx,
                    'ANI1ccx-RMS-and-energy-duplicates'] = n_dup_rms_eng
        dup_data.at[dup_data_idx,
                    'ANI1ccx-Unique-conformers'] = len(sortedcids)

    #bar.finish()
    # write the filtered, ordered conformers to external file
    write_confs(outmols, c_energy, name, args, program, log)
    return outmols, c_energy
示例#8
0
def summ_search(mol,
                name,
                args,
                log,
                dup_data,
                dup_data_idx,
                coord_Map=None,
                alg_Map=None,
                mol_template=None):
    '''embeds core conformers, then optimizes and filters based on RMSD. Finally the rotatable torsions are systematically rotated'''

    sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + args.output)

    Chem.SanitizeMol(mol)
    mol = Chem.AddHs(mol)
    mol.SetProp("_Name", name)

    # detects and applies auto-detection of initial number of conformers
    if args.sample == 'auto':
        initial_confs = int(auto_sampling(args.auto_sample, mol, log))

    else:
        initial_confs = int(args.sample)

    #
    dup_data.at[dup_data_idx, 'Molecule'] = name
    dup_data.at[dup_data_idx, 'RDKIT-Initial-samples'] = initial_confs

    if args.nodihedrals == False:
        rotmatches = getDihedralMatches(mol, args.heavyonly, log)
    else:
        rotmatches = []

    if len(rotmatches) > args.max_torsions:
        log.write("x  Too many torsions (%d). Skipping %s" %
                  (len(rotmatches), (name + args.output)))
        status = -1
    else:
        if coord_Map == None and alg_Map == None and mol_template == None:
            if args.etkdg:
                ps = Chem.ETKDG()
                ps.randomSeed = args.seed
                ps.ignoreSmoothingFailures = True
                ps.numThreads = 0
                cids = rdDistGeom.EmbedMultipleConfs(mol,
                                                     initial_confs,
                                                     params=ps)
            else:
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    ignoreSmoothingFailures=True,
                    randomSeed=args.seed,
                    numThreads=0)
            if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
                log.write(
                    "o  conformers initially sampled with random coordinates")
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    useRandomCoords=True,
                    boxSizeMult=10.0,
                    ignoreSmoothingFailures=True,
                    numZeroFail=1000,
                    numThreads=0)
            if args.verbose:
                log.write("o  " + str(len(cids)) +
                          " conformers initially sampled")
        # case of embed for templates
        else:
            if args.etkdg:
                ps = Chem.ETKDG()
                ps.randomSeed = args.seed
                ps.coordMap = coord_Map
                ps.ignoreSmoothingFailures = True
                ps.numThreads = 0
                cids = rdDistGeom.EmbedMultipleConfs(mol,
                                                     initial_confs,
                                                     params=ps)
            else:
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    ignoreSmoothingFailures=True,
                    coordMap=coord_Map,
                    numThreads=0)
            if len(cids) == 0 or len(cids) == 1 and initial_confs != 1:
                log.write(
                    "o  conformers initially sampled with random coordinates")
                cids = rdDistGeom.EmbedMultipleConfs(
                    mol,
                    initial_confs,
                    randomSeed=args.seed,
                    useRandomCoords=True,
                    boxSizeMult=10.0,
                    numZeroFail=1000,
                    ignoreSmoothingFailures=True,
                    coordMap=coord_Map,
                    numThreads=0)
            if args.verbose:
                log.write("o  " + str(len(cids)) +
                          " conformers initially sampled")

        #energy minimize all to get more realistic results
        #identify the atoms and decide Force Field

        for atom in mol.GetAtoms():
            if atom.GetAtomicNum() > 36:  #upto Kr for MMFF, if not use UFF
                args.ff = "UFF"
                #log.write("UFF is used because there are atoms that MMFF doesn't recognise")
        if args.verbose:
            log.write("o  Optimizing " + str(len(cids)) +
                      " initial conformers with" + args.ff)
        if args.verbose:
            if args.nodihedrals == False:
                log.write("o  Found " + str(len(rotmatches)) +
                          " rotatable torsions")
                # for [a,b,c,d] in rotmatches:
                # 	log.write('  '+mol.GetAtomWithIdx(a).GetSymbol()+str(a+1)+ mol.GetAtomWithIdx(b).GetSymbol()+str(b+1)+ mol.GetAtomWithIdx(c).GetSymbol()+str(c+1)+mol.GetAtomWithIdx(d).GetSymbol()+str(d+1))
            else:
                log.write("o  Systematic torsion rotation is set to OFF")

        cenergy, outmols = [], []
        bar = IncrementalBar('o  Minimizing', max=len(cids))
        for i, conf in enumerate(cids):
            if coord_Map == None and alg_Map == None and mol_template == None:
                if args.ff == "MMFF":
                    GetFF = Chem.MMFFGetMoleculeForceField(
                        mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf)
                elif args.ff == "UFF":
                    GetFF = Chem.UFFGetMoleculeForceField(mol, confId=conf)
                else:
                    log.write('   Force field {} not supported!'.format(
                        args.ff))
                    sys.exit()

                GetFF.Initialize()
                converged = GetFF.Minimize(maxIts=args.opt_steps_RDKit)
                energy = GetFF.CalcEnergy()
                cenergy.append(GetFF.CalcEnergy())

                #if args.verbose:
                #    log.write("-   conformer", (i+1), "optimized: ", args.ff, "energy", GetFF.CalcEnergy())
            #id template realign before doing calculations
            else:
                num_atom_match = mol.GetSubstructMatch(mol_template)
                # Force field parameters
                if args.ff == "MMFF":
                    GetFF = lambda mol, confId=conf: Chem.MMFFGetMoleculeForceField(
                        mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf)
                elif args.ff == "UFF":
                    GetFF = lambda mol, confId=conf: Chem.UFFGetMoleculeForceField(
                        mol, confId=conf)
                else:
                    log.write('   Force field {} not supported!'.format(
                        options.ff))
                    sys.exit()
                getForceField = GetFF

                # clean up the conformation
                ff_temp = getForceField(mol, confId=conf)
                for k, idxI in enumerate(num_atom_match):
                    for l in range(k + 1, len(num_atom_match)):
                        idxJ = num_atom_match[l]
                        d = coord_Map[idxI].Distance(coord_Map[idxJ])
                        ff_temp.AddDistanceConstraint(idxI, idxJ, d, d, 10000)
                ff_temp.Initialize()
                #reassignned n from 4 to 10 for better embed and minimzation
                n = 10
                more = ff_temp.Minimize()
                while more and n:
                    more = ff_temp.Minimize()
                    n -= 1
                energy = ff_temp.CalcEnergy()
                # rotate the embedded conformation onto the core_mol:
                rms = rdMolAlign.AlignMol(mol,
                                          mol_template,
                                          prbCid=conf,
                                          atomMap=alg_Map,
                                          reflect=True,
                                          maxIters=100)
                # elif len(num_atom_match) == 5:
                #     ff_temp = GetFF(mol, confId=conf)
                #     conf_temp = mol_template.GetConformer()
                #     for k in range(mol_template.GetNumAtoms()):
                #         p = conf_temp.GetAtomPosition(k)
                #         q = mol.GetConformer(conf).GetAtomPosition(k)
                #         pIdx = ff_temp.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
                #         ff_temp.AddDistanceConstraint(pIdx, num_atom_match[k], 0, 0, 10000)
                #     ff_temp.Initialize()
                #     n = 10
                #     more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5)
                #     while more and n:
                #         more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5)
                #         n -= 1
                #     # realign
                #     energy = ff_temp.CalcEnergy()
                #     rms = rdMolAlign.AlignMol(mol, mol_template,prbCid=conf, atomMap=alg_Map,reflect=True,maxIters=50)
                cenergy.append(energy)

            # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs"
            # conformers. We do this to SetProp (Name and Energy) to the different conformers
            # and log.write in the SDF file. At the end, since all the mol objects has the same
            # conformers, but the energies are different, we can log.write conformers to SDF files
            # with the energies of the parent mol objects. We measured the computing time and
            # it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp
            # SetProp correctly
            pmol = PropertyMol.PropertyMol(mol)
            outmols.append(pmol)
            bar.next()
        bar.finish()

        for i, cid in enumerate(cids):
            outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1))
            outmols[cid].SetProp('Energy', cenergy[cid])

        cids = list(range(len(outmols)))
        sortedcids = sorted(cids, key=lambda cid: cenergy[cid])

        log.write("\n\no  Filters after intial embedding of " +
                  str(initial_confs) + " conformers")
        selectedcids, selectedcids_initial, eng_dup, eng_rms_dup = [], [], -1, -1
        bar = IncrementalBar('o  Filtering based on energy (pre-filter)',
                             max=len(sortedcids))
        for i, conf in enumerate(sortedcids):
            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if i == 0:
                selectedcids_initial.append(conf)
            # check rmsd
            for seenconf in selectedcids_initial:
                E_diff = abs(cenergy[conf] - cenergy[seenconf])  # in kcal/mol
                if E_diff < args.initial_energy_threshold:
                    eng_dup += 1
                    excluded_conf = True
                    break
            if excluded_conf == False:
                if conf not in selectedcids_initial:
                    selectedcids_initial.append(conf)
            bar.next()
        bar.finish()

        if args.verbose == True:
            log.write("o  " + str(eng_dup) +
                      " Duplicates removed  pre-energy filter (E < " +
                      str(args.initial_energy_threshold) + " kcal/mol )")

        #reduce to unique set
        if args.verbose:
            log.write("o  Removing duplicate conformers ( RMSD < " +
                      str(args.rms_threshold) + " and E difference < " +
                      str(args.energy_threshold) + " kcal/mol)")

        bar = IncrementalBar('o  Filtering based on energy and rms',
                             max=len(selectedcids_initial))
        #check rmsd
        for i, conf in enumerate(selectedcids_initial):

            #set torsions to same value
            for m in rotmatches:
                rdMolTransforms.SetDihedralDeg(
                    outmols[conf].GetConformer(conf), *m, 180.0)

            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if i == 0:
                selectedcids.append(conf)
            # check rmsd
            for seenconf in selectedcids:
                E_diff = abs(cenergy[conf] - cenergy[seenconf])  # in kcal/mol
                if E_diff < args.energy_threshold:
                    rms = get_conf_RMS(outmols[conf], outmols[conf], seenconf,
                                       conf, args.heavyonly,
                                       args.max_matches_RMSD, log)
                    if rms < args.rms_threshold:
                        excluded_conf = True
                        eng_rms_dup += 1
                        break
            if excluded_conf == False:
                if conf not in selectedcids:
                    selectedcids.append(conf)
            bar.next()
        bar.finish()

        # unique_mols, unique_energies = [],[]
        # for id in selectedcids:
        #     unique_mols.append(outmols[id])
        #     unique_energies.append(cenergy[id])

        # log.write(unique_mols[0:2].GetConformers()[0].GetPositions())

        if args.verbose == True:
            log.write("o  " + str(eng_rms_dup) +
                      " Duplicates removed (RMSD < " +
                      str(args.rms_threshold) + " / E < " +
                      str(args.energy_threshold) + " kcal/mol) after rotation")
        if args.verbose:
            log.write("o  " + str(len(selectedcids)) +
                      " unique (ignoring torsions) starting conformers remain")

        dup_data.at[dup_data_idx, 'RDKit-energy-duplicates'] = eng_dup
        dup_data.at[dup_data_idx,
                    'RDKit-RMS-and-energy-duplicates'] = eng_rms_dup
        dup_data.at[dup_data_idx,
                    'RDKIT-Unique-conformers'] = len(selectedcids)

        # now exhaustively drive torsions of selected conformers
        n_confs = int(len(selectedcids) * (360 / args.degree)**len(rotmatches))
        if args.verbose and len(rotmatches) != 0:
            log.write("\n\no  Systematic generation of " + str(n_confs) +
                      " confomers")
            bar = IncrementalBar(
                'o  Generating conformations based on dihedral rotation',
                max=len(selectedcids))
        else:
            bar = IncrementalBar('o  Generating conformations',
                                 max=len(selectedcids))

        total = 0
        for conf in selectedcids:
            #log.write(outmols[conf])
            total += genConformer_r(outmols[conf], conf, 0, rotmatches,
                                    args.degree, sdwriter, args,
                                    outmols[conf].GetProp('_Name'), log)
            bar.next()
        bar.finish()
        if args.verbose and len(rotmatches) != 0:
            log.write("o  %d total conformations generated" % total)
        status = 1
    sdwriter.close()

    #getting the energy from and mols after rotations
    if len(rotmatches) != 0:
        rdmols = Chem.SDMolSupplier(name + '_' + 'rdkit' + args.output,
                                    removeHs=False)
        if rdmols is None:
            log.write("Could not open " + name + args.output)
            sys.exit(-1)

        bar = IncrementalBar(
            'o  Filtering based on energy and rms after rotation of dihedrals',
            max=len(rdmols))
        sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + '_' + 'rotated' +
                                 args.output)

        rd_count = 0
        rd_selectedcids, rd_dup_energy, rd_dup_rms_eng = [], -1, 0
        for i in range(len(rdmols)):
            # This keeps track of whether or not your conformer is unique
            excluded_conf = False
            # include the first conformer in the list to start the filtering process
            if rd_count == 0:
                rd_selectedcids.append(i)
                if args.metal_complex == True:
                    for atom in rdmols[i].GetAtoms():
                        if atom.GetSymbol() == 'I' and (
                                len(atom.GetBonds()) == 6
                                or len(atom.GetBonds()) == 5
                                or len(atom.GetBonds()) == 4
                                or len(atom.GetBonds()) == 3
                                or len(atom.GetBonds()) == 2):
                            for el in elementspt:
                                if el.symbol == args.metal:
                                    atomic_number = el.number
                            atom.SetAtomicNum(atomic_number)
                sdwriter.write(rdmols[i])
            # Only the first ID gets included
            rd_count = 1
            # check rmsd
            for j in rd_selectedcids:
                if abs(
                        float(rdmols[i].GetProp('Energy')) -
                        float(rdmols[j].GetProp('Energy'))
                ) < args.initial_energy_threshold:  # comparison in kcal/mol
                    excluded_conf = True
                    rd_dup_energy += 1
                    break
                if abs(
                        float(rdmols[i].GetProp('Energy')) -
                        float(rdmols[j].GetProp('Energy'))
                ) < args.energy_threshold:  # in kcal/mol
                    rms = get_conf_RMS(rdmols[i], rdmols[j], -1, -1,
                                       args.heavyonly, args.max_matches_RMSD,
                                       log)
                    if rms < args.rms_threshold:
                        excluded_conf = True
                        rd_dup_rms_eng += 1
                        break
            if excluded_conf == False:
                if args.metal_complex == True:
                    for atom in rdmols[i].GetAtoms():
                        if atom.GetSymbol() == 'I' and (
                                len(atom.GetBonds()) == 6
                                or len(atom.GetBonds()) == 5
                                or len(atom.GetBonds()) == 4
                                or len(atom.GetBonds()) == 3
                                or len(atom.GetBonds()) == 2):
                            for el in elementspt:
                                if el.symbol == args.metal:
                                    atomic_number = el.number
                            atom.SetAtomicNum(atomic_number)
                sdwriter.write(rdmols[i])
                if i not in rd_selectedcids:
                    rd_selectedcids.append(i)
            bar.next()
        bar.finish()
        sdwriter.close()

        if args.verbose == True:
            log.write("o  " + str(rd_dup_energy) +
                      " Duplicates removed initial energy ( E < " +
                      str(args.initial_energy_threshold) + " kcal/mol )")
        if args.verbose == True:
            log.write("o  " + str(rd_dup_rms_eng) +
                      " Duplicates removed (RMSD < " +
                      str(args.rms_threshold) + " / E < " +
                      str(args.energy_threshold) + " kcal/mol) after rotation")
        if args.verbose == True:
            log.write("o  " + str(len(rd_selectedcids)) +
                      " unique (after torsions) conformers remain")

        #filtering process after rotations
        dup_data.at[dup_data_idx, 'RDKIT-Rotated-conformers'] = total
        dup_data.at[dup_data_idx,
                    'RDKIT-Rotated-Unique-conformers'] = len(rd_selectedcids)

    return status