Пример #1
0
    def conf_generator(self, inp):
        mol, name = inp

        conf_parm = AllChem.ETKDGv2()
        conf_parm.pruneRmsThresh = self.rmsd
        conf_parm.randomSeed = -1

        ## get rotatable bond-dependent adaptive conformation number
        max_conf = self.adaptive_conf(mol)

        ## Generate 3D conformers, map atom 3D vectors in 'ids' to 'mol'
        ## Hydrogens are supposed to be added beforehand
        ids = AllChem.EmbedMultipleConfs(mol, max_conf, conf_parm)

        ## align all conformers to 1st frame
        rmslist = []
        AllChem.AlignMolConformers(mol, RMSlist=rmslist)

        ## Minimize conformers with UFF, 2x slower than without
        ## with minimization, parameters can be used to cluster conformers
        if self.run_uff:
            for _id in ids:
                AllChem.UFFOptimizeMolecule(mol, confId=_id)
            mol = self.prune_conformers(mol, ids, max_conf)

        return [mol, name]
def generateconformations(m,
                          n,
                          maxAttempts=1000,
                          pruneRmsThresh=0.1,
                          useExpTorsionAnglePrefs=True,
                          useBasicKnowledge=True,
                          enforceChirality=True):
    m = Chem.AddHs(m)
    #ids=AllChem.EmbedMultipleConfs(m, numConfs=n)
    ids = AllChem.EmbedMultipleConfs(
        m,
        numConfs=n,
        maxAttempts=maxAttempts,
        pruneRmsThresh=pruneRmsThresh,
        useExpTorsionAnglePrefs=useExpTorsionAnglePrefs,
        useBasicKnowledge=useBasicKnowledge,
        enforceChirality=enforceChirality,
        numThreads=0)
    for cid in ids:
        #AllChem.UFFOptimizeMolecule(m, confId=cid)
        AllChem.MMFFOptimizeMolecule(m, confId=cid)
    # EmbedMultipleConfs returns a Boost-wrapped type which
    # cannot be pickled. Convert it to a Python list, which can.
    #rmslist =[]
    #AllChem.AlignMolConformers(m, RMSlist=rmslist)
    AllChem.AlignMolConformers(m)
    #rms = AllChem.GetConformerRMS(m, 1, 9, prealigned=True)

    return m, list(ids)
Пример #3
0
def generate_conformers(molecule: Chem.Mol, n_conformers: int, 
            random_seed: int = -1, alignment: bool = False) -> Chem.Mol:
    """Generate conformers for a molecule
    
        Parameters
        ----------
        molecule : rdkit.Chem.Mol
            Molecule for which conformers will be generated.
        
        n_conformers: int
            Number of conformers to generate

        random_seed : float or int, optional 
            Random seed to use.

        alignment : bool, optional
            If true generated conformers will be aligned (Default: False).
        
        Returns
        -------
        molecule : rdkit.Chem.Mol
            Molecule with conformers.
    
    """
    if not isinstance(n_conformers, int):
        raise OpenPharmacophoreTypeError("n_conformers must be an integer")
    if n_conformers < 0:
        raise OpenPharmacophoreValueError("n_conformers must be greater than 0")
    molecule = Chem.AddHs(molecule) # Add hydrogens to generate realistic geometries
    cids = AllChem.EmbedMultipleConfs(molecule, numConfs=n_conformers, randomSeed=random_seed)
    
    if alignment:
        AllChem.AlignMolConformers(molecule)
    return molecule
Пример #4
0
    def align_atoms(self, mol, atoms):

        rmsd = []
        AllChem.AlignMolConformers(mol,
                                   maxIters=self.max_iters,
                                   atomIds=atoms,
                                   RMSlist=rmsd)
        return rmsd
Пример #5
0
def generate_conformers(
    molecule: Chem.rdchem.Mol,
    max_num_conformers: int,
    *,
    random_seed: int = -1,
    prune_rms_thresh: float = -1.0,
    max_iter: int = -1,
    fallback_to_random: bool = False,
) -> Chem.rdchem.Mol:
  """Generates conformers for a given molecule.

  Args:
    molecule: molecular representation of the compound.
    max_num_conformers: maximum number of conformers to generate. If pruning is
      done, the returned number of conformers is not guaranteed to match
      max_num_conformers.
    random_seed: random seed to use for conformer generation.
    prune_rms_thresh: RMSD threshold which allows to prune conformers that are
      too similar.
    max_iter: Maximum number of iterations to perform when optimising MMFF force
      field. If set to <= 0, energy optimisation is not performed.
    fallback_to_random: if conformers cannot be obtained, use random coordinates
      to initialise.

  Returns:
    Copy of a `molecule` with added hydrogens. The returned molecule contains
    force field-optimised conformers. The number of conformers is guaranteed to
    be <= max_num_conformers.
  """
  mol = copy.deepcopy(molecule)
  mol = Chem.AddHs(mol)
  mol = _embed_conformers(
      mol,
      max_num_conformers,
      random_seed,
      prune_rms_thresh,
      fallback_to_random,
      use_random=False)

  if max_iter > 0:
    mol_with_conformers = _minimize_by_mmff(mol, max_iter)
    if mol_with_conformers is None:
      mol_with_conformers = _minimize_by_uff(mol, max_iter)
  else:
    mol_with_conformers = mol
  # Aligns conformations in a molecule to each other using the first
  # conformation as the reference.
  AllChem.AlignMolConformers(mol_with_conformers)

  # We remove hydrogens to keep the number of atoms consistent with the graph
  # nodes.
  mol_with_conformers = Chem.RemoveHs(mol_with_conformers)

  return mol_with_conformers
Пример #6
0
def build_confs(smiles, conf_num=10, element_dict=element_dict):

    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m, addCoords=True)  # uncomment for hydrogen filling

    num_atoms = m.GetNumAtoms()
    AllChem.EmbedMultipleConfs(m, conf_num)
    rmslist = []
    AllChem.AlignMolConformers(m, RMSlist=rmslist)
    #print(rmslist) rms values from first conformer to the others. AllChem.GetConformerRMS(m2, 1, 9, prealigned=True) for others

    for i in range(m.GetNumConformers()):
        AllChem.UFFOptimizeMolecule(m, confId=i)

        #v.ShowMol(m,confId=i,name='conf-%d'%i,showOnly=False)
    w = Chem.SDWriter('confs_sdf.sdf')
    for i in range(m.GetNumConformers()):
        w.write(m, confId=i)

    w.flush()

    f = open('confs_sdf.sdf', 'r')
    lines = f.readlines()
    f.close()

    baseline_in = False

    breaker = "$$$$"
    rm_lines = []
    lines = []

    with open('confs_sdf.sdf') as search:
        for num, line in enumerate(search, 1):
            lines.append(line)
            if breaker in line:
                rm_lines.append(num)
                baseline_in = True

    with open('confs_sdf.sdf') as f:
        lines = f.read().splitlines()

    new_ls = []
    for i in lines:
        cart_bonds = ' '.join(i.split())
        new_ls.append(cart_bonds)

    sdf_dict = {}
    for k in range(1, conf_num + 1):
        if k == 1:
            sdf_dict['sec_{0}'.format(k)] = (new_ls[4:rm_lines[0] - 2])
        elif k > 1 and k < conf_num + 1:
            sdf_dict['sec_{0}'.format(k)] = new_ls[rm_lines[k - 2] +
                                                   4:rm_lines[k - 1] - 2]
    return sdf_dict, num_atoms, conf_num
def torsional_search(smiles, iterations=100000, RMSthresh=1):
    # Input:
    # smiles: smiles string representing the molecule (string)
    # iterations: number of ETRKG iterations (integer)
    # RMSthresh: RMS threshold for ETRKG search (how different new conformers need to be) (float)

    # Returns: xyzs (list (conformers) of lists (atoms) of xyz coordinates), energies (list of conformer MMFF94s energies)

    xyzs = []
    energies = []
    # read mol into RDkit from smiles string
    rdmol = Chem.MolFromSmiles(smiles)
    # rdkit is an absolute piece of c**p, so it wont read in hydrogens, it has to add them itself
    rdmol = Chem.AddHs(rdmol)
    # Do conformational search by ETRKG
    # Riniker, S.; Landrum, G. A. “Better Informed Distance Geometry: Using What We Know To Improve Conformation Generation” J. Chem. Inf. Comp. Sci. 55:2562-74 (2015)
    ids = AllChem.EmbedMultipleConfs(rdmol,
                                     clearConfs=True,
                                     numConfs=iterations,
                                     pruneRmsThresh=RMSthresh)
    # align conformers, not strictly neccesary but should make visualisation more convenient later on
    AllChem.AlignMolConformers(rdmol)
    # Optimise conformers by MMFF, returns success states (ignored atm) and energies
    rd_es = AllChem.MMFFOptimizeMoleculeConfs(rdmol, mmffVariant='MMFF94s')
    # Record energies in list
    for e in rd_es:
        energies.append(e[1])

    # Get list of conformer IDs
    confIds = [x.GetId() for x in rdmol.GetConformers()]
    # Define empty array for lists of coordinates
    xyzs = []
    # Loop through conformers
    for id in confIds:
        xyz = []
        # Loop over length of molecule (defined by size of mol type array)
        for t in range(len(rdmol.GetAtoms())):
            # append atom coordinates
            xyz.append([
                float(rdmol.GetConformer(id).GetAtomPosition(t)[0]),
                float(rdmol.GetConformer(id).GetAtomPosition(t)[1]),
                float(rdmol.GetConformer(id).GetAtomPosition(t)[2])
            ])
        xyzs.append(xyz)

    return xyzs, energies
Пример #8
0
def GenerateMolConformersWithoutMinimization(Mol, MolNum=None):
    "Generate conformers for a molecule without performing minimization."

    ConfIDs = EmbedMolecule(Mol, MolNum)
    if not len(ConfIDs):
        if not OptionsInfo["QuietMode"]:
            MolName = RDKitUtil.GetMolName(Mol, MolNum)
            MiscUtil.PrintWarning(
                "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n"
                % MolName)
        return [Mol, False, None, None]

    if OptionsInfo["AlignConformers"]:
        AllChem.AlignMolConformers(Mol)

    if not OptionsInfo["QuietMode"]:
        MolName = RDKitUtil.GetMolName(Mol, MolNum)
        MiscUtil.PrintInfo("\nNumber of conformations generated for %s: %d" %
                           (MolName, len(ConfIDs)))

    # Convert ConfIDs into a list...
    ConfIDsList = [ConfID for ConfID in ConfIDs]

    # Setup conformation energies...
    ConfEnergies = None
    if OptionsInfo["EnergyOut"]:
        ConfEnergies = []
        for ConfID in ConfIDsList:
            EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID)

            Energy = "%.2f" % Energy if EnergyStatus else "NotAvailable"
            ConfEnergies.append(Energy)

            if not EnergyStatus:
                if not OptionsInfo["QuietMode"]:
                    MolName = RDKitUtil.GetMolName(Mol, MolNum)
                    MiscUtil.PrintWarning(
                        "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n"
                        % (ConfID, MolName))

    return [Mol, True, ConfIDsList, ConfEnergies]
def GenerateMolConformers(Mol, MolCount, Writer):
    "Generate conformers for a molecule and write them out."

    MolName = RDKitUtil.GetMolName(Mol, MolCount)

    ConfIDs = EmbedMolecule(Mol)
    if not len(ConfIDs):
        MiscUtil.PrintWarning(
            "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n"
            % MolName)
        return False

    if OptionsInfo["AlignConformers"]:
        AllChem.AlignMolConformers(Mol)

    # Write out the conformers...
    for ConfID in ConfIDs:
        SetConfMolName(Mol, MolName, ConfID)
        Writer.write(Mol, confId=ConfID)

    MiscUtil.PrintInfo("\nNumber of conformations written for %s: %d" %
                       (MolName, len(ConfIDs)))

    return True
Пример #10
0
    def align_to_me(self, other_conf):
        """Align another conformer to this one.

        :param other_conf: The other conformer to align.
        :type other_conf: MyConformer
        :return: The aligned MyConformer object.
        :rtype: MyConformer
        """

        # Add the conformer of the other MyConformer object.
        self.mol.AddConformer(other_conf.conformer(), assignId=True)

        # Align them.
        AllChem.AlignMolConformers(self.mol, atomIds=self.ids_hvy_atms)

        # Reset the conformer of the other MyConformer object.
        last_conf = self.mol.GetConformers()[-1]
        other_conf.conformer(last_conf)

        # Remove the added conformer.
        self.mol.RemoveConformer(last_conf.GetId())

        # Return that other object.
        return other_conf
Пример #11
0
#smiles = 'C1CCCCC1N(C)C(=O)C(=CI)C(O)C=C'
#smiles = 'C1(C(O)(C)(C))=CC=CC=C1CCC(SCC2CC2CC(=O)O)C3=CC=CC(=C3)C=CC4=CC=C(C5=N4)C=CC(Cl)=C5' #Montelukast
smiles = 'C1=CC=CC=C1C(=O)NC(C2=CC=CC=C2)C(O)C(=O)OC(C(C)-C3(C(OC(=O)C))C(=O)C4(C)C(O)CC(OC5)C5(OC(=O)C)C4C7(OC(=O)C6=CC=CC=C6))CC7(O)(C3(C)(C))'  # Paclitaxel
#smiles = 'C1CCCC1CCSCC'

#smiles = 'OC(=O)O'

m = Chem.MolFromSmiles(smiles)
m = Chem.AddHs(m, addCoords=True)

conf_num = 10

num_atoms = m.GetNumAtoms()
AllChem.EmbedMultipleConfs(m, conf_num)
rmslist = []
AllChem.AlignMolConformers(m, RMSlist=rmslist)
#print(rmslist) rms values from first conformer to the others. AllChem.GetConformerRMS(m2, 1, 9, prealigned=True) for others

for i in range(m.GetNumConformers()):
    AllChem.UFFOptimizeMolecule(m, confId=i)

    #v.ShowMol(m,confId=i,name='conf-%d'%i,showOnly=False)
w = Chem.SDWriter('confs_sdf.sdf')
for i in range(m.GetNumConformers()):
    w.write(m, confId=i)

w.flush()

f = open('confs_sdf.sdf', 'r')
lines = f.readlines()
f.close()
Пример #12
0
def GenerateMolConformersWithMinimization(Mol, MolNum):
    "Generate and mininize conformers for a molecule."

    if OptionsInfo["AddHydrogens"]:
        Mol = Chem.AddHs(Mol)

    ConfIDs = EmbedMolecule(Mol, MolNum)
    if not len(ConfIDs):
        if not OptionsInfo["QuietMode"]:
            MolName = RDKitUtil.GetMolName(Mol, MolNum)
            MiscUtil.PrintWarning(
                "Conformation generation couldn't be performed for molecule %s: Embedding failed...\n"
                % MolName)
        return [Mol, False, None, None]

    CalcEnergyMap = {}
    for ConfID in ConfIDs:
        try:
            if OptionsInfo["UseUFF"]:
                Status = AllChem.UFFOptimizeMolecule(
                    Mol, confId=ConfID, maxIters=OptionsInfo["MaxIters"])
            elif OptionsInfo["UseMMFF"]:
                Status = AllChem.MMFFOptimizeMolecule(
                    Mol,
                    confId=ConfID,
                    maxIters=OptionsInfo["MaxIters"],
                    mmffVariant=OptionsInfo["MMFFVariant"])
            else:
                MiscUtil.PrintError(
                    "Minimization couldn't be performed: Specified forcefield, %s, is not supported"
                    % OptionsInfo["ForceField"])
        except (RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
            if not OptionsInfo["QuietMode"]:
                MolName = RDKitUtil.GetMolName(Mol, MolNum)
                MiscUtil.PrintWarning(
                    "Minimization couldn't be performed for molecule %s:\n%s\n"
                    % (MolName, ErrMsg))
            return [Mol, False, None, None]

        EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID)
        if not EnergyStatus:
            if not OptionsInfo["QuietMode"]:
                MolName = RDKitUtil.GetMolName(Mol, MolNum)
                MiscUtil.PrintWarning(
                    "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n"
                    % (ConfID, MolName))
            return [Mol, False, None, None]

        if Status != 0:
            if not OptionsInfo["QuietMode"]:
                MolName = RDKitUtil.GetMolName(Mol, MolNum)
                MiscUtil.PrintWarning(
                    "Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n"
                    % (ConfID, MolName, OptionsInfo["MaxIters"]))

        CalcEnergyMap[ConfID] = Energy

    if OptionsInfo["RemoveHydrogens"]:
        Mol = Chem.RemoveHs(Mol)

    # Align molecules after minimization...
    if OptionsInfo["AlignConformers"]:
        AllChem.AlignMolConformers(Mol)

    SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])

    MinEnergyConfID = SortedConfIDs[0]
    MinConfEnergy = CalcEnergyMap[MinEnergyConfID]
    EnergyWindow = OptionsInfo["EnergyWindow"]

    EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"]
    ApplyEnergyRMSDCutoff = False
    if EnergyRMSDCutoff > 0:
        ApplyEnergyRMSDCutoff = True

    # Calculate RMSD values for conformers...
    PreAligned = False
    if OptionsInfo["AlignConformers"]:
        PreAligned = True

    CalcRMSDMap = {}
    if ApplyEnergyRMSDCutoff:
        for ConfID in SortedConfIDs:
            RMSD = AllChem.GetConformerRMS(Mol,
                                           MinEnergyConfID,
                                           ConfID,
                                           prealigned=PreAligned)
            CalcRMSDMap[ConfID] = RMSD

    # Track conformers with in the specified energy window  from the lowest
    # energy conformation along with applying RMSD cutoff as needed...
    #
    SelectedConfIDs = []

    ConfCount = 0
    IgnoredByEnergyConfCount = 0
    IgnoredByRMSDConfCount = 0

    FirstConf = True

    for ConfID in SortedConfIDs:
        if FirstConf:
            FirstConf = False
            SelectedConfIDs.append(ConfID)
            continue

        ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy)
        if ConfEnergyDiff > EnergyWindow:
            IgnoredByEnergyConfCount += 1
            continue

        if ApplyEnergyRMSDCutoff:
            if CalcRMSDMap[ConfID] < EnergyRMSDCutoff:
                IgnoredByRMSDConfCount += 1
                continue

        ConfCount += 1
        SelectedConfIDs.append(ConfID)

    if not OptionsInfo["QuietMode"]:
        MolName = RDKitUtil.GetMolName(Mol, MolNum)
        MiscUtil.PrintInfo(
            "\nTotal Number of conformations generated for %s: %d" %
            (MolName, ConfCount))
        MiscUtil.PrintInfo(
            "Number of conformations ignored due to energy window cutoff: %d" %
            (IgnoredByEnergyConfCount))
        if ApplyEnergyRMSDCutoff:
            MiscUtil.PrintInfo(
                "Number of conformations ignored due to energy RMSD cutoff:  %d"
                % (IgnoredByRMSDConfCount))

    SelectedConfEnergies = None
    if OptionsInfo["EnergyOut"]:
        SelectedConfEnergies = [
            "%.2f" % CalcEnergyMap[ConfID] for ConfID in SelectedConfIDs
        ]

    return [Mol, True, SelectedConfIDs, SelectedConfEnergies]
idx = 0
for i in range(1, number_of_conformation):
    rms[i][:i+1] = rms_mat[idx:i+idx+1]
    idx += i

## 4. 重原子の座標をnumpy配列に格納
def genConfCoord(cid):
    conf = m.GetConformer(cid)
    coord = []
    for atom in m.GetAtoms():
        atom_idx = atom.GetIdx()
        x,y,z = conf.GetAtomPosition(atom_idx)
        coord.extend([x,y,z])
    return np.array(coord)

AllChem.AlignMolConformers(m)
coord_array = np.zeros((len(cids), 3*m.GetNumAtoms()))
for i, cid in enumerate(cids):
    coord_array[i] = genConfCoord(cid)

### クラスタリング用に標準化
scaler = RobustScaler()
scaler.fit(coord_array)
scaled_coord = scaler.transform(coord_array)

del_index = set()
for i in range(number_of_conformation):
    d = pd.DataFrame({'rms': rms[:,i], 'energy': energy})
    d.energy = d.energy - d.energy[i]
    del_index = del_index | set(d[i:].query('rms < 0.05 and -0.5 < energy and energy < 0.5').index)
Пример #14
0
    def align_global(self, mol):

        rmsd = []
        AllChem.AlignMolConformers(mol, maxIters=self.max_iters, RMSlist=rmsd)
        return rmsd
Пример #15
0
    def generate_mining_minima_structures(
        self,
        rmsd_threshold: float = 0.1,
        include_entropy_correction: bool = False
    ) -> (list, unit.Quantity, list):
        """
        Minimizes and filters conformations based on a RMSD threshold.
        Parameters
        ----------
        rmsd_threshol : float
            Treshold for RMSD filtering.
        include_entropy_correction : bool
            whether to include a degeneracy correction or not
        Returns
        -------
        confs_traj : list
            list of md.Trajectory objects with filtered conformations
        e : unitless (in kT)
            free energy difference dG(final_state - initial_state)
        minimum_energies : list
            list of energies for the different minimum conformations
        """

        from .ani import ANI1ccx
        from .analysis import prune_conformers, calculate_weighted_energy

        bw_energies = []
        confs_traj = []
        minimum_energies = []
        all_energies = []
        all_conformations = []

        for (
                ase_mol,
                rdkit_mol,
                ligand_atoms,
                get_ligand_coords,
                top,
                entropy_correction,
                get_nr_of_confs,
        ) in zip(
            [self.initial_state_ase_mol, self.final_state_ase_mol],
            [
                copy.deepcopy(self.initial_state_mol),
                copy.deepcopy(self.final_state_mol),
            ],
            [self.initial_state_ligand_atoms, self.final_state_ligand_atoms],
            [
                self.get_initial_state_ligand_coords,
                self.get_final_state_ligand_coords
            ],
            [
                self.initial_state_ligand_topology,
                self.final_state_ligand_topology
            ],
            [
                self.initial_state_entropy_correction,
                self.final_state_entropy_correction,
            ],
            [
                self.get_nr_of_initial_state_ligand_coords,
                self.get_nr_of_final_state_ligand_coords,
            ],
        ):

            print("Mining Minima starting ...")
            model = ANI1ccx()
            model = model.to(device)

            energy_function = ANI_force_and_energy(model=model,
                                                   atoms=ligand_atoms,
                                                   mol=ase_mol)

            energies: list = []
            for conf_id in range(get_nr_of_confs()):
                # minimize
                print(f"Conf: {conf_id}")
                minimized_coords, _ = energy_function.minimize(
                    get_ligand_coords(conf_id))
                # minimized_coords have dimensions [1][N_atoms][3]
                energy = energy_function.calculate_energy(minimized_coords)
                try:
                    thermochemistry_correction = energy_function.get_thermo_correction(
                        minimized_coords)
                except ValueError:
                    logger.critical(
                        "Imaginary frequencies present - found transition state."
                    )
                    continue

                if include_entropy_correction:
                    energies.append(energy.energy[0] +
                                    thermochemistry_correction +
                                    entropy_correction)
                else:
                    energies.append(energy.energy[0] +
                                    thermochemistry_correction)

                # update the coordinates in the rdkit mol
                for atom in rdkit_mol.GetAtoms():
                    conf = rdkit_mol.GetConformer(conf_id)
                    new_coords = Geometry.rdGeometry.Point3D()
                    new_coords.x = minimized_coords[0][
                        atom.GetIdx()][0].value_in_unit(unit.angstrom)
                    new_coords.y = minimized_coords[0][
                        atom.GetIdx()][1].value_in_unit(unit.angstrom)
                    new_coords.z = minimized_coords[0][
                        atom.GetIdx()][2].value_in_unit(unit.angstrom)
                    conf.SetAtomPosition(atom.GetIdx(), new_coords)

            # aligne the molecules
            AllChem.AlignMolConformers(rdkit_mol)
            all_energies.append(copy.deepcopy(energies))
            all_conformations.append(copy.deepcopy(rdkit_mol))
            min_and_filtered_rdkit_mol, filtered_energies = prune_conformers(
                rdkit_mol,
                copy.deepcopy(energies),
                rmsd_threshold=rmsd_threshold)

            # generate mdtraj object
            traj = []
            for conf_idx in range(
                    min_and_filtered_rdkit_mol.GetNumConformers()):
                tmp_coord_list = []
                for a in min_and_filtered_rdkit_mol.GetAtoms():
                    pos = min_and_filtered_rdkit_mol.GetConformer(
                        conf_idx).GetAtomPosition(a.GetIdx())
                    tmp_coord_list.append([pos.x, pos.y, pos.z])
                tmp_coord_list = np.array(tmp_coord_list) * unit.angstrom
                traj.append(tmp_coord_list.value_in_unit(unit.nanometer))

            confs_traj.append(md.Trajectory(traj, top))
            minimum_energies.append(filtered_energies)
            bw_energies.append(calculate_weighted_energy(filtered_energies))
            print("Mining Minima finished ...")

        e = bw_energies[1] - bw_energies[0]
        return confs_traj, e, minimum_energies, all_energies, all_conformations
Пример #16
0
def confsearchsmiles(name, smiles, cmp, eout, optd):
    # Create RDKit MOL
    m = Chem.MolFromSmiles(smiles)
    print('Working on:', name, 'Heavyatoms(', m.GetNumHeavyAtoms(), ')')

    # Conf generation routine
    m = EmbedConformers(m)
    m = OptimizeConformersFF(m)
    nrot = Chem.rdMolDescriptors.CalcNumRotatableBonds(m)
    m = CleanConformers(m,
                        max_keep=max(20, nrot * 4),
                        max_energy=25.0,
                        rmsd_thresh=0.5)
    del nrot

    # Get new cids
    cids = [x.GetId() for x in m.GetConformers()]
    print(len(cids))

    if len(cids) < 1:
        print('Skipping ' + name + ': not enough confs to continue.')
        return

    # ANI OPT
    ochk = np.zeros(len(cids), dtype=np.int64)
    for i, cid in enumerate(cids):
        print('   -Optimizing confid:', cid)
        ochk[i] = cmp.optimize_rdkit_molecule(m, cid=cid, fmax=0.001)

    # Align conformers
    rmslist = []
    AllChem.AlignMolConformers(m, RMSlist=rmslist, maxIters=200)
    #print(rmslist)

    # Get energies and std dev.
    E, V = cmp.energy_rdkit_conformers(m, cids)
    E = hdt.hatokcal * E
    V = hdt.hatokcal * V

    print(E - E.min())

    # Sort by energy (low to high)
    idx = np.argsort(E)
    X, S = pya.__convert_rdkitconfs_to_nparr__(m)
    Xs = X[idx]
    Es = E[idx]
    Vs = V[idx]

    for cid, x in zip(cids, Xs):
        natm = m.GetNumAtoms()
        conf = m.GetConformer(cid)
        for i in range(natm):
            conf.SetAtomPosition(
                i, [float(x[i][0]),
                    float(x[i][1]),
                    float(x[i][2])])

    # Write out conformations
    sdf = AllChem.SDWriter(optd + name + '.sdf')
    for cid in cids:
        sdf.write(m, confId=cid)
    sdf.close()

    # Write out energy and sigma data
    eout.write(name + ' ' + str(Es) + ' ' + str(Vs) + ' ' + str(ochk) + '\n')
    eout.flush()

    return
Пример #17
0
def compare_confomer_generator_and_trajectory_minimum_structures(
    results_path: str, name: str, base: str, tautomer_idx: int, thinning: int = 100
):
    assert tautomer_idx == 1 or tautomer_idx == 2

    ani_results = pickle.load(open(f"{results_path}/ani_mm_results.pickle", "rb"))
    exp_results = pickle.load(open(f"{results_path}/exp_results.pickle", "rb"))

    # generate the tautomer object
    t1_smiles = exp_results[name]["t1-smiles"]
    t2_smiles = exp_results[name]["t2-smiles"]
    t_type, tautomers, flipped = generate_tautomer_class_stereobond_aware(
        name, t1_smiles, t2_smiles, nr_of_conformations=1, enforceChirality=True
    )

    tautomer = tautomers[0]
    print(f"Flipped: {flipped}")
    tautomer.perform_tautomer_transformation()

    tautomer_mol = prune_conformers(
        ani_results[name]["t1-confs"],
        ani_results[name]["t1-energies"],
        rmsd_threshold=0.1,
    )
    print(len(tautomer_mol[1]))

    traj_path = (
        f"{base}/{name}/{name}_lambda_{tautomer_idx-1}.0000_kappa_0.0000_in_vacuum.dcd"
    )
    pdb_path = f"{base}/{name}/{name}_0.pdb"

    # load trajectory, remove dummy atom
    traj = md.load(traj_path, top=pdb_path)
    atom_idx = [a.index for a in traj.topology.atoms]
    if (tautomer_idx - 1) == 1:
        atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_0))
    else:
        atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_1))

    traj = traj.atom_slice(atom_indices=atom_idx)

    # save pdb without dummy atom
    tautomer_pdb = f"{base}/{name}/{name}_without_dummy_{tautomer_idx}.pdb"
    traj[0].save_pdb(tautomer_pdb)

    # generate rdkit mol object with the same atom indizes as the trajectory but without the dummy atom
    mol = Chem.MolFromPDBFile(tautomer_pdb, removeHs=False)
    # remove conf of pdb
    mol.RemoveAllConformers()

    # generate energy function, use atom symbols of rdkti mol
    from .ani import ANI_force_and_energy, ANI1ccx

    model = ANI1ccx()
    energy_function = ANI_force_and_energy(
        model=model, atoms=[a.GetSymbol() for a in mol.GetAtoms()], mol=None
    )

    # take every 100th conformation and minimize it using ANI1
    minimized_traj = []  # store min conformations in here

    for idx, conf in enumerate(traj[::thinning]):

        print(f"{idx}/{len(traj[::thinning])}")
        c = (conf.xyz[0]) * unit.nanometer
        min_conf = energy_function.minimize(c)[
            0
        ]  # only real atoms, therefor lambda not needed
        minimized_traj.append(min_conf)
        new_conf = _generate_conformer(min_conf)
        # add the conformation to the rdkit mol object
        mol.AddConformer(new_conf, assignId=True)

    # generate mdtraj object with minimized confs
    minimum_traj = md.Trajectory(
        np.array([v.value_in_unit(unit.nanometer) for v in minimized_traj]),
        traj.topology,
    )

    # generate reference_mol
    reference = prune_conformers(
        ani_results[name][f"t{tautomer_idx}-confs"],
        ani_results[name][f"t{tautomer_idx}-energies"],
        rmsd_threshold=0.1,
    )

    # remove most hydrogens
    reference_mol = _remove_hydrogens(copy.deepcopy(reference[0]))
    compare_mol = _remove_hydrogens(copy.deepcopy(mol))

    # find atom indices that are compared for RMSD
    sub_m = rdFMCS.FindMCS(
        [reference_mol, compare_mol],
        bondCompare=Chem.rdFMCS.BondCompare.CompareOrder.CompareAny,
        maximizeBonds=False,
    )
    mcsp = Chem.MolFromSmarts(sub_m.smartsString, False)

    # the order of the substructure lists are the same for both
    # substructure matches => substructure_idx_m1[i] = substructure_idx_m2[i]
    substructure_idx_reference = reference_mol.GetSubstructMatches(mcsp, uniquify=False)
    substructure_idx_compare = compare_mol.GetSubstructMatches(mcsp, uniquify=False)

    # generate rmsd matrix
    rmsd = np.zeros(
        (reference_mol.GetNumConformers(), mol.GetNumConformers()), dtype=float
    )

    # save clusters
    got_hit = np.zeros(reference_mol.GetNumConformers(), dtype=int)

    # atom mapping
    from itertools import combinations

    for nr_of_mappings, (e1, e2) in enumerate(
        combinations(substructure_idx_reference + substructure_idx_compare, 2)
    ):

        atom_mapping = [(a1, a2) for a1, a2 in zip(e1, e2)]
        # get rmsd matrix with a given set of atom mapping
        # update rmsd matrix whenever lower RMSD appears
        for i in range(len(reference_mol.GetConformers())):
            for j in range(len(compare_mol.GetConformers())):

                proposed_rmsd = AllChem.AlignMol(
                    reference_mol, compare_mol, i, j, atomMap=atom_mapping
                )
                # test if this is optimal atom mapping
                if nr_of_mappings == 0:
                    rmsd[i, j] = proposed_rmsd
                else:
                    rmsd[i, j] = min(rmsd[i, j], proposed_rmsd)

    for i in range(len(reference_mol.GetConformers())):
        for j in range(len(compare_mol.GetConformers())):
            if rmsd[i, j] <= 0.1:
                got_hit[i] += 1

    sns.heatmap(rmsd)
    plt.show()

    print(f"Nr of clusters: {len(got_hit)}")
    print(
        f"Nr of conformations part of one cluster: {sum(got_hit)}/{mol.GetNumConformers()}"
    )
    print(f"Clusters present: {got_hit}")

    AllChem.AlignMolConformers(reference_mol)
    AllChem.AlignMolConformers(compare_mol)

    return compare_mol, minimum_traj, reference_mol, reference[1]
Пример #18
0
def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness,
                            second_embed):
    """Gets alternate ring conformations. Meant to run with the parallelizer class.

    :param mol: The molecule to process (with non-aromatic ring(s)).
    :type mol: MyMol.MyMol
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param second_embed: Whether to try to generate 3D coordinates using an
        older algorithm if the better (default) algorithm fails. This can add
        run time, but sometimes converts certain molecules that would
        otherwise fail.
    :type second_embed: bool
    :return: A list of MyMol.MyMol objects, with alternate ring conformations.
    :rtype: list
    """

    # Make it easier to access the container index.
    contnr_idx = mol.contnr_idx

    # All the molecules in this container must have nonatomatic rings (because
    # they are all variants of the same source molecule). So just make a new
    # mols list.

    # Get the ring atom indecies
    rings = mol.get_idxs_of_nonaro_rng_atms()

    # Convert that into the bond indecies.
    rings_by_bond_indexes = []  # A list of lists, where each inner list has
    # the indexes of the bonds that comprise a
    # ring.
    for ring_atom_indecies in rings:
        bond_indexes = []
        for ring_atm_idx in ring_atom_indecies:
            a = mol.rdkit_mol.GetAtomWithIdx(ring_atm_idx)
            bonds = a.GetBonds()
            for bond in bonds:
                atom_indecies = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]
                atom_indecies.remove(ring_atm_idx)
                other_atm_idx = atom_indecies[0]
                if other_atm_idx in ring_atom_indecies:
                    bond_indexes.append(bond.GetIdx())
        bond_indexes = list(set(bond_indexes))
        bond_indexes.sort()

        rings_by_bond_indexes.append(bond_indexes)

    # Generate a bunch of conformations, ordered from best energy to worst.
    # Note that this is cached. Minimizing too.
    mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, True)

    if len(mol.conformers) > 0:
        # Sometimes there are no conformers if it's an impossible structure.
        # Like
        # [H]c1nc(N2C(=O)[C@@]3(C([H])([H])[H])[C@@]4([H])O[C@@]([H])(C([H])([H])C4([H])[H])[C@]3(C([H])([H])[H])C2=O)sc1[H]
        # So don't save this one anyway.

        # Get the scores (lowest energy) of these minimized conformers.
        mol.load_conformers_into_rdkit_mol()

        # Extract just the rings.
        ring_mols = [
            Chem.PathToSubmol(mol.rdkit_mol, bi)
            for bi in rings_by_bond_indexes
        ]

        # Align get the rmsds relative to the first conformation, for each
        # ring separately.
        list_of_rmslists = [[]] * len(ring_mols)
        for k in range(len(ring_mols)):
            list_of_rmslists[k] = []
            AllChem.AlignMolConformers(ring_mols[k],
                                       RMSlist=list_of_rmslists[k])

        # Get points for each conformer (rmsd_ring1, rmsd_ring2, rmsd_ring3)
        pts = numpy.array(list_of_rmslists).T
        pts = numpy.vstack((numpy.array([[0.0] * pts.shape[1]]), pts))

        # Cluster those points, get lowest-energy member of each.
        if len(pts) < max_variants_per_compound:
            num_clusters = len(pts)
        else:
            num_clusters = max_variants_per_compound

        # When kmeans2 runs on insufficient clusters, it can sometimes throw
        # an error about empty clusters. This is not necessary to throw for
        # the user and so we have supressed it here.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            groups = kmeans2(pts, num_clusters, minit='points')[1]

        # Note that you have some geometrically diverse conformations here,
        # but there could be other versions (enantiomers, tautomers, etc.)
        # that also contribute similar conformations. In the end, you'll be
        # selecting from all these together, so similar ones could end up
        # together.

        best_ones = {}  # Key is group id from kmeans (int). Values are the
        # MyMol.MyConformers objects.
        conformers = mol.rdkit_mol.GetConformers()
        for k, grp in enumerate(groups):
            if not grp in list(best_ones.keys()):
                best_ones[grp] = mol.conformers[k]
        best_confs = best_ones.values()  # best_confs has the
        # MyMol.MyConformers objects.

        # Convert rdkit mols to MyMol.MyMol and save those MyMol.MyMol objects
        # for returning.
        results = []
        for conf in best_confs:
            new_mol = copy.deepcopy(mol)
            c = MyConformer(new_mol, conf.conformer(), second_embed)
            new_mol.conformers = [c]
            energy = c.energy

            new_mol.genealogy = mol.genealogy[:]
            new_mol.genealogy.append(
                new_mol.smiles(True) + " (nonaromatic ring conformer: " +
                str(energy) + " kcal/mol)")

            results.append(new_mol)  # i is mol index

        return results

    # If you get here, something went wrong.
    return None
Пример #19
0
def rdkit_rms(mol):

    AllChem.AlignMolConformers(mol)
    kernel = AllChem.GetConformerRMSMatrix(mol, prealigned=True)

    return kernel
Пример #20
0
def molconformergenerator(m, fpf, wdir, Nconf, idx):
    TSS = 1
    LOT = 'WB97X/6-31g*'  # High level of theory
    rdm = 'uniform'  #Random dist
    type = 'nmrandom'
    Temp = '400.0'
    SCF = 'Tight'
    MEM = '2048'

    dpf = 'dipeptide-'

    #------- End Parameters ---------

    if m is None:
        print("Error: cannot open file!")
        exit(1)

    m, ids = generateconformations(m, Nconf)

    rmslist = []
    AllChem.AlignMolConformers(m, RMSlist=rmslist)
    for i in rmslist:
        print(i)

    print(m.GetNumConformers())

    confcrd = open(wdir + dpf + str(idx) + '.xyz', 'w')

    for i in range(0, m.GetNumConformers()):
        print("-------Conformer " + str(i) + "------")
        c = m.GetConformer(i)

        #---------- Write Input Variables ------------
        dfname = dpf + str(idx + i) + '_train.dat'
        vdfname = dpf + str(idx + i) + '_valid.dat'
        edfname = dpf + str(idx + i) + '_test.dat'

        filename = wdir + dpf + str(idx + i) + '.ipt'
        f = open(filename, 'w')
        if not f:
            print('Cannot open file: ' + filename)

        DOF = (3 * m.GetNumAtoms() - 6)
        f.write('TSS=' + str(int(TSS * DOF)) + ' \n')
        f.write('VSS=0 \n')
        f.write('ESS=0 \n')

        f.write('LOT=' + LOT + ' \n')
        f.write('rdm=' + rdm + '\n')
        f.write('type=' + type + '\n')
        f.write('Temp=' + Temp + '\n')
        f.write('mem=' + MEM + '\n')
        f.write('SCF=' + SCF + '\n')
        f.write('dfname=' + dfname + ' \n')
        f.write('vdfname=' + vdfname + ' \n')
        f.write('edfname=' + edfname + ' \n')
        f.write('optimize=1 \n')
        f.write('frequency=1 \n')

        f.write('\n')

        if i is 0:
            f.write('#Conformer RMS from 0: 0.0 Mole: ' + fpf)
        else:
            f.write('#Conformer RMS from 0: ' + str(rmslist[i - 1]) +
                    ' Mole: ' + fpf)

        f.write('\n\n')
        f.write('$coordinates\n')

        confcrd.write("\n")
        confcrd.write(str(m.GetNumAtoms()) + "\n")

        for j in range(0, m.GetNumAtoms()):
            pos = c.GetAtomPosition(j)
            typ = m.GetAtomWithIdx(j).GetSymbol()

            f.write(' ' + str(typ) + ' ' + str(typ) + ' ' +
                    "{:.5f}".format(pos.x) + ' ' + "{:.5f}".format(pos.y) +
                    ' ' + "{:.5f}".format(pos.z) + '\n')
            confcrd.write(typ + ' ' + str(pos.x) + ' ' + str(pos.y) + ' ' +
                          str(pos.z) + '\n')

        f.write('&\n\n')

        f.write('$connectivity\n')
        f.write('   NONE\n')
        f.write('&\n\n')

        f.write('$normalmodes\n')
        f.write(' NEED TO COMPUTE\n')
        f.write('&\n\n')
Пример #21
0
def calculate_charges(smiles, resp_type, overwrite=False):
    """Run the charge calculation for the molecule defined by smiles

    Parameters
    ----------
    smiles: str
        the smiles string of the desired molecule
    resp_type: str
        the type of RESP to perform (RESP1 or RESP2)
    overwrite: boolean, optional, default=False
        overwrite the previous results if they exist
    Returns
    -------

    Raises
    ------
    InvalidMoleculeError
        if the smiles string is invalid
    """
    smiles = canonicalize_smiles(smiles)
    iupac_name = smiles_to_iupac(smiles)
    mol_path = Path.joinpath(LIB_PATH, iupac_name)
    if not mol_path.is_dir():
        raise RESPLibraryError(
            f"The directory for the molecule: '{smiles}' with name "
            f"{iupac_name} does not exist. Please run "
            "resp_library.prepare_charge_calculation() first.")

    # Initialize RESP stuff
    resp_type = resp_type.upper()
    resp_path = Path.joinpath(mol_path, resp_type)
    if not overwrite:
        if Path.joinpath(resp_path, "results").is_dir():
            raise RESPLibraryError(
                "It appears that this partial charge calculation "
                "has already been attempted or performed. If you wish "
                "to re-run this calculation, please remove the 'results', "
                "'structures', and 'esp_grids' directories before proceeding.")
    inp, log = _initialize_resp(resp_path)

    # Molecule definition
    assert inp['smiles'] == smiles
    assert inp['resp']['type'].upper() == resp_type
    # Parse the YAML file
    n_conformers = inp['conformer_generation']['n_conformers']
    rms_threshold = inp['conformer_generation']['rms_threshold']
    energy_threshold = inp['conformer_generation'][
        'energy_threshold']  # kJ/mol
    conformer_seed = inp['conformer_generation']['random_seed']
    charge_constraints = inp['resp']['charge_constraints']
    equality_constraints = inp['resp']['equality_constraints']
    point_density = inp['resp']['point_density']
    vdw_scale_factors = inp['resp']['vdw_scale_factors']

    if resp_type == "RESP1":
        esp_method = "hf"
        esp_basis_set = "6-31g*"
    elif resp_type == "RESP2":
        esp_method = "pw6b95"
        esp_basis_set = "aug-cc-pV(D+d)Z"
    else:
        raise RESPLibraryError(
            "Invalid RESP type. Only 'RESP1' and 'RESP2' are supported.")

    # Create the molecule, add H's
    rdmol = Chem.MolFromSmiles(smiles)
    rdmol = Chem.AddHs(rdmol)

    # Get the net charge
    net_charge = Chem.rdmolops.GetFormalCharge(rdmol)
    log.log(f"The net charge is {net_charge}.")

    # Get the elements
    elements = [a.GetSymbol() for a in rdmol.GetAtoms()]
    vdw_radii = {
        elem_sym: ele.element_from_symbol(elem_sym).radius_bondi
        for elem_sym in elements
    }

    # Generate conformers
    cids = AllChem.EmbedMultipleConfs(rdmol,
                                      numConfs=500,
                                      pruneRmsThresh=rms_threshold,
                                      randomSeed=conformer_seed)
    AllChem.AlignMolConformers(rdmol)
    if len(cids) < n_conformers:
        raise ValueError(
            "Not enough conformers found. Please reduce the "
            "'rms_threshold' or the 'n_conformers'. For molecules "
            "with < 5 atoms it may be difficult to generate more "
            "than 2 conformers.")

    # Select n_conformers at random
    np.random.seed(conformer_seed)
    conformer_ids = np.random.choice([i for i in range(len(cids))],
                                     size=n_conformers,
                                     replace=False)
    remove = [i for i in range(len(cids)) if i not in conformer_ids]
    for idx in remove:
        rdmol.RemoveConformer(idx)
    # Renumber conformers
    for idx, c in enumerate(rdmol.GetConformers()):
        c.SetId(idx)

    optimized_p4molecules = []
    optimized_energies = []

    # For each conformer, geometry optimize with psi4
    for conformer in rdmol.GetConformers():
        p4mol = build_p4mol(elements, conformer.GetPositions(), net_charge)
        p4mol, energy = _geometry_optimize(p4mol, resp_type)
        # Save optimized structure and energy
        optimized_p4molecules.append(p4mol)
        optimized_energies.append(energy)

        # Extract optimized coordinates; update coordinates RDKIT molecule
        coords = p4mol.geometry().to_array() * BOHR_TO_ANGSTROM
        for i in range(rdmol.GetNumAtoms()):
            x, y, z = coords[i]
            conformer.SetAtomPosition(i, Point3D(x, y, z))

    # Check energies and remove high energy conformers
    _check_relative_conformer_energies(rdmol, optimized_p4molecules,
                                       optimized_energies, energy_threshold,
                                       log)

    # Align conformers for easier visual comparison
    _save_aligned_conformers(rdmol, log)

    # Save the conformers used for resp
    Path("structures/optimized_geometries.pdb").write_text(
        Chem.rdmolfiles.MolToPDBBlock(rdmol))
    log.log(
        "Wrote the final optimized gemoetries to 'optimized_geometries.pdb'.\n\n"
    )

    # Finally we do multi-conformer RESP
    pcm = False
    charges = _perform_resp(
        optimized_p4molecules,
        charge_constraints,
        equality_constraints,
        esp_method,
        esp_basis_set,
        pcm,
        point_density,
        vdw_radii,
        vdw_scale_factors,
        log,
    )
    _write_results(elements, charges, equality_constraints, "vacuum", log)

    if resp_type == "RESP2":
        pcm = True
        charges = _perform_resp(
            optimized_p4molecules,
            charge_constraints,
            equality_constraints,
            esp_method,
            esp_basis_set,
            pcm,
            point_density,
            vdw_radii,
            vdw_scale_factors,
            log,
        )
        _write_results(elements, charges, equality_constraints, "pcm", log)

    log.close()
def superpos(file_name_prefix,
             mol_list,
             amsol_dir,
             only_planar_rings=False,
             pattern_file=None,
             debug=False):
    """superposed conformational ensembles on rings or smarts patterns
	ARGUMENTS:
	      - file_name_prefix: prefix of file name
	      - mol_list: list with ensembles of molecules
	      - amsol_dir: directory under which xx*cav_CYC lies (bad name for variable)
	      - only_planar_rings: superimpose only on planar rings (optional)
	      - zinc_title: (optional) zinc titles have to be treated diffrently, because they are already "pre-adjusted" (they are not unique when you download them)
	      - pattern_file: (optional) file with smarts for pattern matching
	      - debug: (optinal) gives more output

	    RETURNS:

	      None (output will be saved as files)
	"""

    #rot=DoubleArray(9)
    #trans=DoubleArray(3)

    if pattern_file != None:
        pattern_matching = True
        smarts_file = open(pattern_file, 'r')
        rms_cut_off = 0.001  # fewer atoms in pattern matching -> must be more accurate, otherwise mol2db screws up
        print("pattern matching")
    else:
        pattern_matching = False
        print("ring matching")
        rms_cut_off = 0.015  #used to be 0.01 with OE kits. Does 0.025 really work for mol2db?

    solv_dict = {}
    output_dict = {}

    #pattern matching is currently not working

    if pattern_matching:
        #some options from the original match.py file
        usa = 1
        asym = 1
        exph = 0
        max = 0
        smartsfile = None
        verbose = 0
        qmolfile = None
        kekule = 0
        atom_expr = None
        bond_expr = None
        smartslist = None
        qmollist = None
        nowarn = 0

        smarts_list = chemistry.get_smartslist_file(smarts_file)

        patlist = chemistry.get_patlist(smarts_list, atom_expr, bond_expr, max)

    #out_file = file_name_prefix + '_om_mult_rings.mol2'
    out_file_tmp = file_name_prefix + '_mult_rings_tmp.mol2'
    solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + file_name_prefix + '_CYC.solv'
    #print solv_table_file_name
    new_solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + file_name_prefix + '_mult_rings_CYC.solv'
    solv_table_file = open(solv_table_file_name, 'r')
    #new_solv_talbe_file = open(new_solv_table_file_name, 'w')
    #read solvation_table, this might have to be changed, if tables get too long to be held in memory
    solv_dict = read_solv_table(solv_table_file)
    #print solv_dict.keys()

    for mol in mol_list:  #loop over all molecule esembles in the list
        title = mol.GetProp("_Name")
        print(title)
        output_dict[title] = []
        #go only on, if solvation energy calculation was sucessfull
        if title in solv_dict:
            if not pattern_matching:
                #get the atoms that have to be superimposed
                if only_planar_rings:  #currently not working
                    match_list, count = super_impose_only_on_planar_rings(
                        ringlist, mol, count)
                    #print count
                else:
                    match_list, count = fuse_rings(mol, debug)
            else:  #currently not working
                #print title
                match_list, count = get_matches(mol, patlist)
                #print match_list
                refmol = OECreateOEMol()
                super_pos_counter = 0
            #print count, ' count'

            cids = [x.GetId()
                    for x in mol.GetConformers()]  #get all conformer ids

            #if there is only one conformer, I don't need to align them

            if len(cids) == 1:
                count = 1  #no need to loop over all rings of only one conformer

            super_pos_counter = 0

            for i in range(0, count):  #loop over ring systems
                print(match_list, 'match_list')
                print('ring id:', i)
                saved = 0

                #write out amsol charges if this easily possible

                #align molecules on best reference molecule
                found_ref = []
                for conf_id in cids:
                    if conf_id not in found_ref:
                        found_ref.append(conf_id)
                        align_list = [conf_id]  #align onto current conformer
                        for cid_2 in cids:  #find other conformers to align
                            if cid_2 not in found_ref:
                                align_list.append(
                                    cid_2
                                )  #align these conformers in this round

                        rms_values = []
                        AllChem.AlignMolConformers(mol,
                                                   confIds=align_list,
                                                   atomIds=match_list[i][0],
                                                   RMSlist=rms_values)
                        if debug:
                            print('cids:', cids, 'len rms_values',
                                  len(rms_values), rms_values)
                            print('align_list', align_list)
                        found_this_round = [conf_id]
                        for i_rms in range(
                                0, len(rms_values
                                       )):  #find conformes that aligned well
                            #first entry in rms_values list corresponds to alignement of 2nd entry in align list on first entry in align list and so on
                            if rms_values[i_rms] <= rms_cut_off:
                                found_ref.append(align_list[i_rms + 1])
                                found_this_round.append(align_list[i_rms + 1])
                        print('found this round:', found_this_round)
                        #write out aligned molecules
                        super_pos_counter = super_pos_counter + 1
                        print("super_pos_counter", super_pos_counter)
                        #for zinc, you have to adjust the titles before => don't destroy this adjustment here
                        new_title = gen_new_title(title, super_pos_counter)
                        #mol.SetProp("_Name", new_title)
                        print(mol.GetProp("_Name"), "name >----------")
                        #order atoms for mol2db
                        tmp_mol, index_list = OrderMol2db(
                            mol, match_list[i][0])
                        tmp_mol.SetProp("_Name", title)

                        print(index_list, "index_list")

                        #reorder solv table
                        #new_first_line = solv_dict[title][0].replace(title, new_title)
                        new_solv_table_file_name = amsol_dir + '/' + file_name_prefix + '_amsol_cav_CYC/' + new_title + '_mult_rings_CYC.solv'
                        new_solv_table_file = open(new_solv_table_file_name,
                                                   'w')
                        new_solv_table_file.write(solv_dict[title][0])
                        for index in index_list:  #sort solvation table according to atom indexes
                            new_solv_table_file.write(
                                solv_dict[title][1][index])
                        new_solv_table_file.close()

                        #problem with mol2db: aligned is not accurate enough, does not find identical atoms
                        #get coordinates from ref conformer
                        #use them for matched conformers

                        if len(
                                found_this_round
                        ) > 1:  #no need to resort if only one molecule in the list
                            coord_list = []
                            for atom_idx in match_list[i][0]:
                                pt1 = mol.GetConformer(
                                    found_this_round[0]).GetAtomPosition(
                                        atom_idx)
                                coord_list.append(pt1)

                        out_file = new_title + '_om_mult_rings.mol2'
                        output_dict[title].append(new_title)

                        for cid_3 in found_this_round:
                            #saved = saved + 1
                            if len(
                                    found_this_round
                            ) > 1:  #update coordinates, no need to do if only one molecule in the list
                                for pos in range(0, len(coord_list)
                                                 ):  #atoms are already sorted
                                    tmp_mol.GetConformer(
                                        cid_3).SetAtomPosition(
                                            pos, coord_list[pos])

                            #get amsol charges to write into mol2 file
                            for atom in tmp_mol.GetAtoms():
                                pos_unsorted_solv_table = index_list[
                                    atom.GetIdx()]
                                #print (pos_unsorted_solv_table, atom.GetIdx())
                                charge = solv_dict[title][1][
                                    pos_unsorted_solv_table].split()[0].strip(
                                    )
                                #print (charge)
                                atom.SetProp('_GasteigerCharge', charge)

                            Mol2Writer.MultiMolToMol2File([tmp_mol],
                                                          out_file,
                                                          confId=cid_3,
                                                          addHs=True,
                                                          append=True,
                                                          addCharges=False)

                #print ('saved: ', saved, 'ring id:', i)
                #saved = 0

    #new_solv_talbe_file.close()
    solv_table_file.close()
    print("done superpose_conf_ensemble!")
    return output_dict
Пример #23
0
def align_conformers(mol, clust_ids):
    rmslist = []
    AllChem.AlignMolConformers(mol, confIds=clust_ids, RMSlist=rmslist)
    return rmslist
def confsearchsmiles(name, smiles, Ew, NCONF, cmp, eout, optd):
    # Create RDKit MOL
    m = Chem.MolFromSmiles(smiles)
    print('Working on:', name, 'Heavyatoms(', m.GetNumHeavyAtoms(), ')')
    if m.GetNumHeavyAtoms() > 50:
        print('Skipping ' + name + ': more than 50 atoms.')
        return

    # Add hydrogens
    m = Chem.AddHs(m)

    # Embed 50 random conformations
    cids = AllChem.EmbedMultipleConfs(m,
                                      useExpTorsionAnglePrefs=True,
                                      useBasicKnowledge=True,
                                      numConfs=NCONF)
    print('   -Confs:', len(cids), 'Total atoms:', m.GetNumAtoms())
    if len(cids) < 1:
        print('Skipping ' + name + ': not enough confs embedded.')
        return

    # Classical OPT
    for cid in cids:
        _ = AllChem.MMFFOptimizeMolecule(m, confId=cid)

    # Locate clusters
    dmat = AllChem.GetConformerRMSMatrix(m, prealigned=False)
    rms_clusters = Butina.ClusterData(dmat,
                                      m.GetNumConformers(),
                                      0.3,
                                      isDistData=True,
                                      reordering=True)
    keep_ids = [i[0] for i in rms_clusters]
    mask = np.ones(len(cids), dtype=bool)
    mask[keep_ids] = 0

    # Remove conformers
    RemoveConformers(m, mask)

    # Get new cids
    cids = [x.GetId() for x in m.GetConformers()]

    # Calculate energies
    E, V = cmp.energy_rdkit_conformers(m, cids)
    E = hdt.hatokcal * (E - E.min())

    # Build index < Ew kcal/mol
    mask = np.ones(E.size, dtype=bool)

    if np.where(E < 5.0)[0].size > 25:
        mask[np.where(E < 5.0)[0]] = 0
    elif E.size > 25:
        mask[np.argsort(E)[25]] = 0
    else:
        mask = np.zeros(E.size, dtype=bool)

    # Remove conformers
    RemoveConformers(m, mask)

    # Get new cids
    cids = [x.GetId() for x in m.GetConformers()]

    if len(cids) < 1:
        print('Skipping ' + name + ': not enough confs to continue.')
        return

    # ANI OPT
    ochk = np.zeros(len(cids), dtype=np.int64)
    for i, cid in enumerate(cids):
        #print('   -Optimizing confid:', cid)
        ochk[i] = cmp.optimize_rdkit_molecule(m, cid=cid, fmax=0.001)

    # Align conformers
    rmslist = []
    AllChem.AlignMolConformers(m, RMSlist=rmslist, maxIters=200)
    #print(rmslist)

    # Get energies and std dev.
    E, V = cmp.energy_rdkit_conformers(m, cids)
    E = hdt.hatokcal * E
    V = hdt.hatokcal * V

    # Sort by energy (low to high)
    idx = np.argsort(E)
    X, S = pya.__convert_rdkitconfs_to_nparr__(m)
    Xs = X[idx]
    Es = E[idx]
    Vs = V[idx]

    for cid, x in zip(cids, Xs):
        natm = m.GetNumAtoms()
        conf = m.GetConformer(cid)
        for i in range(natm):
            conf.SetAtomPosition(
                i, [float(x[i][0]),
                    float(x[i][1]),
                    float(x[i][2])])

    # Write out conformations
    sdf = AllChem.SDWriter(optd + name + '.sdf')
    for cid in cids:
        sdf.write(m, confId=cid)
    sdf.close()

    # Write out energy and sigma data
    eout.write(name + ' ' + str(Es) + ' ' + str(Vs) + ' ' + str(ochk) + '\n')
    eout.flush()

    return