Пример #1
0
    def getRMS(self, prb_mol, ref_pos, useFF=False):
        def optimizeWithFF(mol):

            molf = Chem.AddHs(mol, addCoords=True)
            AllChem.MMFFOptimizeMolecule(molf)
            molf = Chem.RemoveHs(molf)

            return molf

        n_est = prb_mol.GetNumAtoms()

        ref_cf = Chem.rdchem.Conformer(n_est)
        for k in range(n_est):
            ref_cf.SetAtomPosition(k, ref_pos[k].tolist())

        ref_mol = copy.deepcopy(prb_mol)
        ref_mol.RemoveConformer(0)
        ref_mol.AddConformer(ref_cf)

        if useFF:
            try:
                res = AllChem.AlignMol(prb_mol, optimizeWithFF(ref_mol))
            except:
                res = AllChem.AlignMol(prb_mol, ref_mol)
        else:
            res = AllChem.AlignMol(prb_mol, ref_mol)

        return res
Пример #2
0
def best_rmsd_tmp_conf(dir_tmp_conf, file_ref):
    best_rmsd = None
    best_f = None
    for f in os.listdir(dir_tmp_conf):
        if os.path.isfile(f) and f.startswith('c_') and f.endswith('.mol2'):
            mol = remove_SiH3(rdk.MolFromMol2File(f))
            ref = rdk.MolFromMol2File(file_ref)

            conf_bestRmsd = None
            iso_matches_iter = getGraphIsoIter(mol, ref)
            for i, m in enumerate(iso_matches_iter):
                atomMap = []
                for k, v in m.iteritems():
                    atomMap.append([k, v])

                rmsd = chm.AlignMol(mol, ref, 0, 0, atomMap)
                w = rdk.PDBWriter(str(i) + f.replace('.mol2', '.pdb'))
                w.write(mol)
                w.close()

                if conf_bestRmsd is None or rmsd < conf_bestRmsd:
                    conf_bestRmsd = rmsd

            print f, conf_bestRmsd
            if best_rmsd is None or conf_bestRmsd < best_rmsd:
                best_f = f
                best_rmsd = conf_bestRmsd

    return best_rmsd, best_f
def overlay(mol1, mol2):
    print(len(mol1.GetAtoms()), mol1.GetNumConformers())
    print(len(mol2.GetAtoms()), mol2.GetNumConformers())
    Chem.SanitizeMol(mol1)
    Chem.SanitizeMol(mol2)
    assert (Chem.MolToSmiles(mol1) == Chem.MolToSmiles(mol2))
    return AllChem.AlignMol(mol1, mol2, prbCid=0, refCid=0, maxIters=1000)
Пример #4
0
def MCSAlignMolecules(ref_mol, ali_mol):
    from rdkit import Chem
    from rdkit.Chem import rdMolAlign
    from rdkit.Chem import rdFMCS
    from rdkit.Chem.rdFMCS import FindMCS, AtomCompare, BondCompare
    '''
    Do not sanitize the molecules, RDKit will freak out and give errors
    And All we want is to do MCSS, we dont care much about health of molecule
    '''
    mol1 = Chem.MolFromMol2File(ref_mol, removeHs=False, sanitize=False)
    mol2 = Chem.MolFromMol2File(ali_mol, removeHs=False, sanitize=False)
    _fmcs_params = dict(maximizeBonds=False,
                        threshold=1.0,
                        timeout=60,
                        verbose=False,
                        matchValences=True,
                        ringMatchesRingOnly=True,
                        completeRingsOnly=True,
                        atomCompare=AtomCompare.CompareAny,
                        bondCompare=BondCompare.CompareAny)
    try:
        mcs = rdFMCS.FindMCS([mol1, mol2], **_fmcs_params)
    except ValueError:
        print(
            '\n Max Common Substructure calculation \n failed for this molecule!! \n Please be judicious '
        )
        sys.exit()
    core = Chem.MolFromSmarts(mcs.smartsString)
    match1 = mol1.GetSubstructMatch(core)
    match2 = mol2.GetSubstructMatch(core)
    from rdkit.Chem import AllChem
    AllChem.AlignMol(mol2, mol1, atomMap=list(zip(match2, match1)))
    Chem.MolToMolFile(mol2, 'aligned.mol', kekulize=False)
    return mol2
Пример #5
0
def gen_coords_rdmol(rdmol):
    ref = rdmol.__copy__()
    conf = rdmol.GetConformer()
    coordDict = {}
    unchanged = []
    maps = []
    # Put known coordinates in coordDict
    for i in range(rdmol.GetNumAtoms()):
        pos = conf.GetAtomPosition(i)
        if (-0.0001 < pos.x < 0.0001) and (-0.0001 < pos.y < 0.0001) and \
           (-0.0001 < pos.z < 0.0001):
            continue  # atom without coordinates
        coordDict[i] = pos
        unchanged.append(i)
        maps.append((i, i))
    # compute coordinates for new atoms, keeping known coordinates
    rms = 1
    rs = 1
    # repeat embedding and alignment until the rms of mapped atoms is sufficiently small
    if rdmol.GetNumAtoms() > len(maps):
        while rms > 0.1:
            AllChem.EmbedMolecule(rdmol, coordMap=coordDict, randomSeed=rs,
                                  useBasicKnowledge=True)
            # align new molecule to original coordinates
            rms = AllChem.AlignMol(rdmol, ref, atomMap=maps)
            rs += 1
    return unchanged
def overlay_ring_atoms(mol1, mol2, mol2_conf_id):
    mol1_atoms = get_ring_atoms(mol1)
    mol2_atoms = get_ring_atoms(mol2)
    return AllChem.AlignMol(mol1,
                            mol2,
                            prbCid=0,
                            refCid=mol2_conf_id,
                            atomMap=list(zip(mol1_atoms, mol2_atoms)),
                            maxIters=1000)
Пример #7
0
    def add_unit(self,
                 unit: MonomerUnit,
                 unit_tags: list = [],
                 polymer_tags: list = [],
                 replace_polymer_atoms: bool = True,
                 minimize: bool = True):
        # TODO: slow. Can I replace this with try/except?

        if not unit_tags or not polymer_tags or not self.n_atoms:
            self.add_unit_only(unit)
            return

        monomer_tags = utils.asiterable(unit_tags)
        polymer_tags = utils.asiterable(polymer_tags)

        n_mtags = len(monomer_tags)
        err = "Must provide same number of tags for monomer and polymer"
        assert len(polymer_tags) == n_mtags, err

        replace_polymer_atoms = utils.asiterable(replace_polymer_atoms)
        if (len(replace_polymer_atoms) != n_mtags
                and len(replace_polymer_atoms) == 1):
            replace_polymer_atoms = replace_polymer_atoms * n_mtags

        m_indices = []
        p_indices = []
        del_rep_atoms = []
        for mtag, ptag, rep in zip(monomer_tags, polymer_tags,
                                   replace_polymer_atoms):
            m_indices.extend(mtag.indices)
            p_indices.extend(ptag.indices)

            rep = utils.asiterable(rep)
            if len(rep) != len(mtag.atoms) and len(rep) == 1:
                rep = rep * len(mtag.atoms)
            for m, p, r in zip(mtag.atoms, ptag.atoms, rep):
                atoms = [p, m]
                if not r:
                    atoms = atoms[::-1]
                del_rep_atoms.append(atoms)

        # great
        atom_map = tuple(zip(map(int, m_indices), map(int, p_indices)))

        AllChem.AlignMol(unit.rdmol, self.rdmol, 0, 0, atomMap=atom_map)
        self.add_unit_only(unit)

        del_rep_atoms = sorted(del_rep_atoms, key=lambda x: x[0].index)
        delete_atoms = [x[0] for x in del_rep_atoms]

        self.remove_params_within_atoms(delete_atoms)

        for to_delete, to_replace in del_rep_atoms[::-1]:
            self.update_atom(to_delete, to_replace)

        self.remove_atoms(delete_atoms[::-1])
        self.clean()
Пример #8
0
def align_mols_mcs_all(system_pdbs, align_to=0):

    ##Load mols
    mols = [Chem.MolFromPDBFile(pdb, removeHs=False) for pdb in system_pdbs]
    num_states = len(system_pdbs)
    ##Align with mcs
    ref = mols[align_to]
    print("ref:\t", align_to, os.path.basename(system_pdbs[align_to]))

    for mol2ID,mv in enumerate(mols):
        if(mol2ID==align_to):
            continue
        print("move:\t", mol2ID, os.path.basename(system_pdbs[mol2ID]))

        if(mol2ID == 4):
            mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True)
        elif(mol2ID == 10):
            ref = mols[3]
            mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True,ringCompare=rdFMCS.RingCompare.PermissiveRingFusion, atomCompare=rdFMCS.AtomCompare.CompareAny)
        elif(mol2ID == 12):
            ref = mols[10]
            mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True)
        else:
            ref = mols[align_to]
            mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True, atomCompare=rdFMCS.AtomCompare.CompareAnyHeavyAtom)

        smart = mcs.smartsString #"[#6&R]1-&@[#6&R](-&!@[#1&!R])-&@[#6&R]-&@[#6&R]-&@[#6&R]-&@[#6&R]" #mcs.smartsString
        patt = Chem.MolFromSmarts(smart)  # smartsString
        print("patternMol: ", mcs.smartsString)
        refMatch = ref.GetSubstructMatch(patt)
        print("refMatch:\t", refMatch)
        mvMatch = mv.GetSubstructMatch(patt)
        print("mvMatch:\t", mvMatch)

        try:
            AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch)),reflect=True)
        except Exception as err:
            raise err

    ##write out
    out_dir = "align"
    if (not os.path.exists(out_dir)):
        os.mkdir(out_dir)
    path_prefix = out_dir + "/aligned_"
    aligned_pdb_paths = []
    for mol1ID, in_pdb in enumerate(system_pdbs):
        base_name = os.path.basename(in_pdb)
        tmp_out = path_prefix + base_name
        Chem.MolToPDBFile(mols[mol1ID], tmp_out)
        aligned_pdb_paths.append(tmp_out)

    return aligned_pdb_paths
Пример #9
0
    def bb_distortion(self, bb_conformers=None, conformer=-1):
        """
        Rmsd difference of building blocks before and after assembly.

        The function looks at each building block in the macromolecule
        and calculates the rmsd between the "free" version and the one
        present in the macromolecule. The mean of these rmsds is
        returned.

        Atoms which form the functional group of the building blocks
        and hydrogens are excluded from the calculation.

        Parameters
        ----------
        bb_conformers : :class:`list` of :class:`int`
            The ids of building block conformers to use. 1 id for each
            building block, in an order corresponding to
            :attr:`building_blocks`. If ``None``, all conformer ids
            default to ``-1``.

        conformer : :class:`int`, optional
            The id of the conformer to use.

        Returns
        -------
        :class:`float`
            The mean rmsd of the macromole's building blocks to their
            "free" counterparts.

        """

        if bb_conformers is None:
            bb_conformers = [-1 for _ in range(len(self.building_blocks))]

        # Go through each of the building blocks. For each building
        # block get the core. Get the corrospending cores in the
        # macromolecules and add the rmsd to the sum. Increment the
        # count to calculate the mean later.
        rmsd = 0
        n = 0
        for i, bb in enumerate(self.building_blocks):
            free = bb.core()
            am = [(x, x) for x in range(free.GetNumAtoms())]
            for frag in self.building_block_cores(i):
                rmsd += rdkit.AlignMol(free,
                                       frag,
                                       bb_conformers[i],
                                       conformer,
                                       atomMap=am)
                n += 1
        return rmsd / n
Пример #10
0
    def evaluate(self, mol, energies, opt_mol, opt_energies, min_energy):
        """
        Determines if the conformers on mol are accepted in the final set of conformers or are rejected based on energy
        difference from the minimum energy conformer and whether conformers are greater than the RMSD threshold apart
        from each other. In the latter case, if they are not, then the lowest energy conformer out of the two is kept.

        Args:
            mol (RDKit Mol): The molecule containing the candidate conformers.
            energies (list): The list of energies of the candidate conformers.
            opt_mol (RDKit Mol): The molecule containing the final set of conformers.
            opt_energies (list): The energies of the final set of conformers.
            min_energy (int): The lowest energy in the final set of conformers.
        """

        for i, macro_conf in enumerate(mol.GetConformers()):

            # skip if energy is too high
            if energies[i] > min_energy + self.energy_diff:
                continue

            similar_confs = []
            for opt_conf in opt_mol.GetConformers():

                # remove conformer if energy is too high
                if opt_energies[
                        opt_conf.GetId()] > min_energy + self.energy_diff:
                    del opt_energies[opt_conf.GetId()]
                    opt_mol.RemoveConformer(opt_conf.GetId())
                    continue

                rmsd = AllChem.AlignMol(mol,
                                        opt_mol,
                                        macro_conf.GetId(),
                                        opt_conf.GetId(),
                                        maxIters=self.max_iters)
                if rmsd < self.min_rmsd:
                    similar_confs.append(opt_conf.GetId())

            similar_energies = [
                opt_energies[conf_id] for conf_id in similar_confs
            ]
            similar_energies.append(energies[i])
            if np.argmin(similar_energies) == len(similar_energies) - 1:
                for conf_id in similar_confs:
                    opt_mol.RemoveConformer(conf_id)
                    del opt_energies[conf_id]
                conf_id = opt_mol.AddConformer(macro_conf, assignId=True)
                opt_energies[conf_id] = energies[i]
Пример #11
0
def combAlignedOptLigHCore(core,lig,list):
    """Aligns ligand carboxylate to core carboxylate.
       Identifies which atom will need to be connected across ligand/core.
       Deletes ligand carboxylate.
       Combines ligand and core molecules to one molecule.
    """
    atomnums=lig.GetSubstructMatch(Chem.MolFromSmarts('[CX3](=O)[OX1H0-,OX2H1]'))
    print "Alignment result: ", AllChem.AlignMol(lig,core,atomMap=zip(atomnums,list))
    connect_atom=lig.GetAtomWithIdx(lig.GetSubstructMatch(Chem.MolFromSmarts('*[CX3](=O)[OX1H0-,OX2H1]'))[0])
    connect_atom.SetProp('connect','Y')
    trunc=Chem.DeleteSubstructs(lig,Chem.MolFromSmarts('[CX3](=O)[OX2H1][H]'))
    if trunc.GetNumAtoms() == lig.GetNumAtoms():
       trunc=Chem.DeleteSubstructs(lig,Chem.MolFromSmarts('[CX3](=O)[OX1H0-]'))
    allatoms=trunc.GetAtoms()
    combo=Chem.CombineMols(core,trunc)
    return combo
Пример #12
0
def EmbedAlignConstrainedScore(prbMol,refMols,core,prbNumConfs=10,refNumConfs=10,prbCharge=None,refCharges=None):
    """Calculates a constrained alignment based on a common pattern in the input molecules. Caution: Will fail if the pattern does not match. 
    Calculates a shape and electrostatic potential similarity of the best alignment.

    :param prbMol: RDKit molecule for which shape and electrostatic similarities are calculated.
    :param refMol: RDKit molecule or list of RDKit molecules serving as references.
    :param core: Common pattern for the constrained embedding as embedded RDKit molecule
    :param prbNumConfs: Number of conformers to create for the probe molecule. A higher number creates better alignments but slows down the algorithm.
    :param refNumConfs: Number of conformers to create for each reference molecule. A higher number creates better alignments but slows down the algorithm.
    :param prbCharge: (optional) List or array of partial charges of the probe molecule. If not given, RDKit Gasteiger Charges are used as default.
    :param refCharge: (optional) List of list or 2D array of partial charges of the reference molecules. If not given, RDKit Gasteiger Charges are used as default.
    :return: shape similarity and ESP similarity.
    """
    
    if type(refMols) != list:
        refMols=[refMols]

    if refCharges == None:
        refCharges=[None]*len(refMols)
        
    prbMol=ConstrainedEmbedMultipleConfs(prbMol, core, numConfs=prbNumConfs)
    for refMol in refMols:
        refMol=ConstrainedEmbedMultipleConfs(refMol, core, numConfs=refNumConfs)
        
    prbMatch = prbMol.GetSubstructMatch(core)
    allShapeDist = []
    allEspSim = []
    
    for idx,refMol in enumerate(refMols):
        shapeDist=1
        prbBestConf=0
        refBestConf=0
        refMatch = refMol.GetSubstructMatch(core)
        for i in range(refNumConfs):
            for j in range(prbNumConfs):
                AllChem.AlignMol(prbMol,refMol,atomMap=list(zip(prbMatch,refMatch)),prbCid=j,refCid=i)
                shape = AllChem.ShapeTanimotoDist(prbMol,refMol,confId1=j,confId2=i)
                if shape<shapeDist:
                    shapeDist=shape
                    prbBestConf=j
                    refBestConf=i
        espSim=GetEspSim(prbMol,refMol,prbBestConf,refBestConf,prbCharge,refCharges[idx])
        allShapeDist.append(1-shapeDist)
        allEspSim.append(espSim)

    return allShapeDist,allEspSim
Пример #13
0
def align_calphas(probe, reference):

    ref_calphas = find_calphas(reference)
    print('Found', len(ref_calphas), 'CAs')
    prb_calphas = find_calphas(probe)
    print('Found', len(prb_calphas), 'CAs')
    atom_map = []
    for resnum, idx in prb_calphas.items():
        if resnum in ref_calphas:
            atom_map.append((idx, ref_calphas[resnum]))
        else:
            print('WARNING: residue', resnum, 'not found in reference')

    print('Mapped', len(atom_map), 'atoms')
    rmsd = AllChem.AlignMol(probe, reference, atomMap=atom_map)

    print('RMSD:', rmsd)
Пример #14
0
    def _align_conformers(self, mol, templates):
        def _get_maps(mol, template):
            matches = mol.GetSubstructMatches(template)
            match_template = template.GetSubstructMatch(template)
            return [(template, list(zip(m, match_template))) for m in matches]

        template_maps = [_get_maps(mol, template) for template in templates]
        template_maps = [item for list in template_maps for item in list]
        template_maps = sorted(template_maps,
                               key=lambda x: len(x[1]),
                               reverse=True)

        result = Chem.Mol(mol)
        result.RemoveAllConformers()

        for conformer in mol.GetConformers():
            candidate_conformers = []
            for template, constraint_map in template_maps:
                score = AllChem.AlignMol(mol,
                                         template,
                                         prbCid=conformer.GetId(),
                                         atomMap=constraint_map)

                if score <= self.align_rms_thresh:
                    newConfId = result.AddConformer(conformer, assignId=True)
                    candidate_conformers.append(newConfId)

            accepted_conformers = candidate_conformers[:1]
            for candidate_confId in candidate_conformers[1:]:
                rmsds = [
                    AllChem.GetConformerRMS(result,
                                            candidate_confId,
                                            accepted_confId,
                                            prealigned=True)
                    for accepted_confId in accepted_conformers
                ]
                if min(rmsds) > self.prune_rms_thresh:
                    accepted_conformers.append(candidate_confId)
                else:
                    result.RemoveConformer(candidate_confId)

        return result
Пример #15
0
def align_mols_mcs(system_pdbs, align_to=0):

    ##Load mols
    mols = [Chem.MolFromPDBFile(pdb, removeHs=False) for pdb in system_pdbs]
    num_states = len(system_pdbs)
    ##Align with mcs
    ref = mols[align_to]
    print("ref:\t", align_to, os.path.basename(system_pdbs[align_to]))

    for mol2ID,mv in enumerate(mols):
        if(mol2ID==align_to):
            continue
        print("move:\t", mol2ID, os.path.basename(system_pdbs[mol2ID]))

        mcs = rdFMCS.FindMCS([ref, mv], ringMatchesRingOnly=True)

        smart = mcs.smartsString
        patt = Chem.MolFromSmarts(smart)  # smartsString
        print("patternMol: ", mcs.smartsString)
        refMatch = ref.GetSubstructMatch(patt)
        print("refMatch:\t", refMatch)
        mvMatch = mv.GetSubstructMatch(patt)
        print("mvMatch:\t", mvMatch)

        try:
            AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch)))
        except Exception as err:
            raise err

    ##write out
    out_dir = "align"
    if (not os.path.exists(out_dir)):
        os.mkdir(out_dir)
    path_prefix = out_dir + "/aligned_"
    aligned_pdb_paths = []
    for mol1ID, in_pdb in enumerate(system_pdbs):
        base_name = os.path.basename(in_pdb)
        tmp_out = path_prefix + base_name
        Chem.MolToPDBFile(mols[mol1ID], tmp_out)
        aligned_pdb_paths.append(tmp_out)

    return aligned_pdb_paths
Пример #16
0
def make_substructure_molecule(template_mol_path, query_mol_path):
    """

    :param template_mol: path to the prepared template molecule (starting fragment)
    :param query_mol: path to the prepared querty molecule (suggested followup)
    :return: string representation fo the MCS with 3D coordinates
    """
    #template_mol = [x for x in Chem.SDMolSupplier(template_mol_path, removeHs=False) if x is not None][0]
    template_mol_ccdc = MoleculeReader(template_mol_path)[0]
    template_mol = rdkitize_ccdc_mol(template_mol_ccdc)

    #query_mol = [y for y in Chem.SDMolSupplier(query_mol_path, removeHs=False, sanitize=False) if y is not None][0]
    #Chem.SanitizeMol(query_mol, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE)
    query_mol_ccdc = MoleculeReader(query_mol_path)[0]
    query_mol = rdkitize_ccdc_mol(query_mol_ccdc)
    print(query_mol)

    mcsResult=rdFMCS.FindMCS([template_mol, query_mol],threshold=0.9, completeRingsOnly=True)    #find the maximum common substructure

    if mcsResult.smartsString and len(mcsResult.smartsString)>0 :
        patt = Chem.MolFromSmarts(mcsResult.smartsString,mergeHs=True)

        # keep only the core of the reference molecule
        ref=AllChem.ReplaceSidechains(template_mol, patt)
        if ref:
            core=AllChem.DeleteSubstructs(ref,Chem.MolFromSmiles('*'))
            core.UpdatePropertyCache()
            try:
                return Chem.MolToMolBlock(core)
            except Exception as e:
                t_match = template_mol.GetSubstructMatch(patt)
                print(e)
                Chem.SanitizeMol(patt, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL^Chem.SanitizeFlags.SANITIZE_KEKULIZE)
                cmap = {i:template_mol.GetConformer().GetAtomPosition(t_match[i]) for i in range(len(t_match))}
                GetFF=lambda x,confId=-1:AllChem.MMFFGetMoleculeForceField(x,AllChem.MMFFGetMoleculeProperties(x),confId=confId)
                n = AllChem.EmbedMolecule(patt,randomSeed=0xf00d,coordMap=cmap, maxAttempts=1000)
                AllChem.UFFOptimizeMolecule(patt)
                AllChem.AlignMol(patt,template_mol,atomMap = list(zip(range(len(t_match)),t_match)))
                return Chem.MolToMolBlock(patt)
Пример #17
0
def alignBySiH3Core(file_mol, file_ref):
    mol = rdk.MolFromMol2File(file_mol)
    ref = rdk.MolFromMol2File(file_ref)
    mol_core, c_m = findCore(mol)
    ref_core, c_r = findCore(ref)
    print mol_core, ref_core
    conf_bestRmsd = None
    iso_matches_iter = getGraphIsoIter(mol_core, ref_core)
    for i, m in enumerate(iso_matches_iter):
        atomMap = []
        for k, v in m.iteritems():
            atomMap.append([k, v])
        try:
            rmsd = chm.AlignMol(mol, ref, 0, 0, atomMap)
        except:
            continue
        w = rdk.PDBWriter(str(i) + file_mol.replace('.mol2', '.pdb'))
        w.write(mol)
        w.close()

        if conf_bestRmsd is None or rmsd < conf_bestRmsd:
            conf_bestRmsd = rmsd

    print conf_bestRmsd
Пример #18
0
def find_conformations(mol,
                       core,
                       match,
                       coordMap,
                       useTethers=True,
                       coreConfId=-1,
                       randomseed=2342,
                       max_iters=200,
                       opt=None):
    """Function to generate conformations. Heavily based on ConstrainedEmbed in the RDKit
    Uses a forcefield (default MMFF) to generate conformations constrained to a
    core smiles. Does energy minimisation. Calculates the RMSD
    Takes an RDKit molecule and a core. Options are to useTethers,
    coreConfId - the conformer ID to use, randomseed - the randomseed to use,
    maxIts - the maximum number of iterations for the minimisation,
    opt -  the forcefield to use.
    Returns an RDKit molecule
    """
    ci = AllChem.EmbedMolecule(mol,
                               coordMap=coordMap,
                               randomSeed=randomseed,
                               useRandomCoords=True)
    if ci < 0:
        print Chem.MolToMolBlock(mol)
        print Chem.MolToMolBlock(core)
        #raise ValueError, 'Could not embed molecule.'
        print "COULD NOT  EMBED"
        return None
    # Now make a map of the points to tether
    algMap = [(j, i) for i, j in enumerate(match)]
    if not useTethers:
        # clean up the conformation
        if opt is "MMFF":
            try:
                mmff_mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol),
                                                sanitize=False,
                                                removeHs=False)
                myff = Chem.rdForceFieldHelpers.SetupMMFFForceField(
                    mmff_mol, mmffVerbosity=0)
                ff = AllChem.MMFFGetMoleculeForceField(mol, myff, confId=0)
            # Because the newer version of RDKit has this difference
            except AttributeError:
                ff = AllChem.MMFFGetMoleculeForceField(
                    mol, AllChem.MMFFGetMoleculeProperties(mol))
        else:
            ff = AllChem.UFFGetMoleculeForceField(mol, confId=0)
        for i, idxI in enumerate(match):
            for j in range(i + 1, len(match)):
                idxJ = match[j]
                d = coordMap[idxI].Distance(coordMap[idxJ])
                ff.AddDistanceConstraint(idxI, idxJ, d, d, 300.)
        ff.Initialize()
        n = 4
        more = ff.Minimize()
        while more and n:
            more = ff.Minimize()
            n -= 1
        # rotate the embedded conformation onto the core:
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
    else:
        # rotate the embedded conformation onto the core:
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
        if opt is "MMFF":
            try:
                mmff_mol = Chem.MolFromMolBlock(Chem.MolToMolBlock(mol),
                                                sanitize=False,
                                                removeHs=False)
                myff = Chem.rdForceFieldHelpers.SetupMMFFForceField(
                    mmff_mol, mmffVerbosity=0)
                ff = AllChem.MMFFGetMoleculeForceField(mol, myff, confId=0)
            # Because the newer version of RDKit has this difference
            except AttributeError:
                ff = AllChem.MMFFGetMoleculeForceField(
                    mol, AllChem.MMFFGetMoleculeProperties(mol))
        else:
            ff = AllChem.UFFGetMoleculeForceField(mol, confId=0)
        conf = core.GetConformer()
        if ff is None:
            sys.stderr.write("FORCEFIELD IS NONE\n" + Chem.MolToSmiles(mol))
            return None

        for i in range(core.GetNumAtoms()):
            p = conf.GetAtomPosition(i)
            pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
            ff.AddDistanceConstraint(pIdx, match[i], 0, 0.0, 300.)
        ff.Initialize()
        # Do an energy minimisation
        # Forcefield parameters taken from Greg Landrum
        more = ff.Minimize(maxIts=max_iters, energyTol=1e-4, forceTol=1e-3)
        # Four extra steps of minimisation -> as prescribed in Greg's method
        n = 4
        while more and n:
            more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
            n -= 1
        # Realign
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
    mol.SetProp('EmbedRMS', str(rms))
    return (mol, ff.CalcEnergy())
Пример #19
0
def ConstrainedEmbed_Slack(mol,
                           core,
                           useTethers=True,
                           tdist=0.25,
                           coreConfId=-1,
                           randomseed=2342,
                           getForceField=UFFGetMoleculeForceField,
                           **kwargs):
    """ generates an embedding of a molecule where part of the molecule
    is constrained to have particular coordinates
    Arguments
      - mol: the molecule to embed
      - core: the molecule to use as a source of constraints
      - useTethers: (optional) if True, the final conformation will be
          optimized subject to a series of extra forces that pull the
          matching atoms to the positions of the core atoms. Otherwise
          simple distance constraints based on the core atoms will be
          used in the optimization.
      - tdist: (optional) if useTethers==True, a distance constraint 
          between the atoms and the positions of the core atoms during
          the optimization procedure. 
      - coreConfId: (optional) id of the core conformation to use
      - randomSeed: (optional) seed for the random number generator
    """
    match = mol.GetSubstructMatch(core)
    if not match:
        raise ValueError("molecule doesn't match the core")
    coordMap = {}
    coreConf = core.GetConformer(coreConfId)
    for i, idxI in enumerate(match):
        corePtI = coreConf.GetAtomPosition(i)
        coordMap[idxI] = corePtI

    ci = AllChem.EmbedMolecule(mol,
                               coordMap=coordMap,
                               randomSeed=randomseed,
                               **kwargs)
    if ci < 0:
        raise ValueError('Could not embed molecule.')

    algMap = [(j, i) for i, j in enumerate(match)]

    if not useTethers:
        # clean up the conformation
        ff = getForceField(mol, confId=0)
        for i, idxI in enumerate(match):
            for j in range(i + 1, len(match)):
                idxJ = match[j]
                d = coordMap[idxI].Distance(coordMap[idxJ])
                ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.)
        ff.Initialize()
        n = 4
        more = ff.Minimize()
        while more and n:
            more = ff.Minimize()
            n -= 1
        # rotate the embedded conformation onto the core:
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
    else:
        # rotate the embedded conformation onto the core:
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
        ff = getForceField(mol, confId=0)
        conf = core.GetConformer()
        for i in range(core.GetNumAtoms()):
            p = conf.GetAtomPosition(i)
            pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
            ff.AddDistanceConstraint(pIdx, match[i], 0, tdist, 100.)
        ff.Initialize()
        n = 4
        more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
        while more and n:
            more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
            n -= 1
        # realign
        rms = AllChem.AlignMol(mol, core, atomMap=algMap)
    mol.SetProp('EmbedRMS', str(rms))
    return mol
Пример #20
0
        mol_init_1=Chem.AddHs(mol_ref)
        # remove all conformers from molecule
        mol_init_1.RemoveAllConformers()
        AllChem.EmbedMultipleConfs(mol_init_1,args.num_parallel_samples,\
                                    numThreads=args.num_threads)

        try:
            ## baseline force field part with UFF
            mol_baseUFF = copy.deepcopy(mol_init_1)
            AllChem.UFFOptimizeMoleculeConfs(mol_baseUFF, numThreads=args.num_threads, maxIters=200)
            mol_baseUFF=Chem.RemoveHs(mol_baseUFF)
            RMSlist_UFF = []
            for c in mol_baseUFF.GetConformers():
                c_id = c.GetId()
                RMS_UFF = AllChem.AlignMol(mol_baseUFF, mol_ref, prbCid=c_id, refCid=0)
                RMSlist_UFF.append(RMS_UFF)

            ttest_uff.extend(RMSlist_UFF)
        except:
            continue

        try:
            ## baseline force field part with MMFF
            mol_baseMMFF = copy.deepcopy(mol_init_1)
            AllChem.MMFFOptimizeMoleculeConfs(mol_baseMMFF, numThreads=args.num_threads, maxIters=200)
            mol_baseMMFF=Chem.RemoveHs(mol_baseMMFF)
            RMSlist_MMFF = []
            for c in mol_baseMMFF.GetConformers():
                c_id = c.GetId()
                RMS_MMFF = AllChem.AlignMol(mol_baseMMFF, mol_ref, prbCid=c_id, refCid=0)
Пример #21
0
def get_conformations(rdkit_mol, nconfs=1, name=None, forcefield=None, rms=-1):
    """
    Generates 3D conformation(s) for an rdkit_mol

    :parameter rdkit_mol: RDKit molecule
    :type rdkit_mol: rdkit.Chem.Mol
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
    optimization and ranking of comformations. The default value None results
    in skipping of the geometry optimization step
    :parameter float rms: Root Mean Square deviation threshold for removing
    similar/equivalent conformations.
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """
    def MMFFenergy(cid):
        ff = AllChem.MMFFGetMoleculeForceField(
            rdkit_mol, AllChem.MMFFGetMoleculeProperties(rdkit_mol), confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    def UFFenergy(cid):
        ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    if name:
        rdkit_mol.SetProp('name', name)
    cids = list(AllChem.EmbedMultipleConfs(rdkit_mol, nconfs, pruneRmsThresh=rms, randomSeed=1))
    if forcefield:
        optimize_molecule, energy = {
            'uff': [AllChem.UFFOptimizeMolecule, UFFenergy],
            'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy],
        }[forcefield]
        for cid in cids:
            optimize_molecule(rdkit_mol, confId=cid)
        cids.sort(key=energy)
        if rms > 0:
            keep = [cids[0]]
            for cid in cids[1:]:
                for idx in keep:
                    try:
                        r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx)
                    except:
                        r = rms + 1
                        message = "Alignment failed in multiple conformation generation: "
                        message += Chem.MolToSmiles(rdkit_mol)
                        message += "\nAssuming different conformations."
                        warn(message)
                    if r < rms:
                        break
                else:
                    keep.append(cid)
            cids = keep
    if nconfs == 1:
        return from_rdmol(rdkit_mol)
    else:
        return [from_rdmol(rdkit_mol, cid) for cid in cids]
Пример #22
0
            conf.SetAtomPosition(i, coords[i].tolist())

        mol_init_hs.AddConformer(conf)

        mol_init_hs = Chem.AddHs(mol_init_hs, addCoords=True)

        mol_init_embed = copy.deepcopy(mol_init_hs)
        # some weird issue
        # Can't kekulize mol.  Unkekulized atoms: 6 7 8
        try:
            mol_init_embed = Chem.RemoveHs(mol_init_embed)
        except:
            logger.info('Cant kekulize mol issue')
            continue

        RMS_EMBED = AllChem.AlignMol(mol_init_embed, mol_ref)
        pred_embed.append(mol_init_embed)
        ttest_embed.append(RMS_EMBED)

        # run MMFF/UFF on top of it
        try:
            ## baseline force field part with UFF
            mol_baseUFF = copy.deepcopy(mol_init_hs)
            uff_out = AllChem.UFFOptimizeMolecule(mol_baseUFF,
                                                  maxIters=args.max_iters)
            mol_baseUFF = Chem.RemoveHs(mol_baseUFF)
            RMS_UFF = AllChem.AlignMol(mol_baseUFF, mol_ref)
            pred_uff.append(mol_baseUFF)
            ttest_uff.append(RMS_UFF)
        except:
            continue
def align_confs(mol):
    for conf in mol.GetConformers():
        AllChem.AlignMol(mol, mol, prbCid=conf.GetId(), refCid=0)
Пример #24
0
def get_conformations(mol,
                      nconfs=1,
                      name=None,
                      forcefield=None,
                      rms=-1,
                      enforceChirality=False):
    """
    Generates 3D conformation(s) for an rdkit_mol or a PLAMS Molecule

    :parameter mol: RDKit or PLAMS Molecule
    :type mol: rdkit.Chem.Mol or |Molecule|
    :parameter int nconfs: Number of conformers to be generated
    :parameter str name: A name for the molecule
    :parameter str forcefield: Choose 'uff' or 'mmff' forcefield for geometry
        optimization and ranking of comformations. The default value None results
        in skipping of the geometry optimization step
    :parameter float rms: Root Mean Square deviation threshold for removing
        similar/equivalent conformations.
    :parameter bool enforceChirality: Enforce the correct chirality if chiral centers are present
    :return: A molecule with hydrogens and 3D coordinates or a list of molecules if nconfs > 1
    :rtype: |Molecule| or list of PLAMS Molecules
    """

    if isinstance(mol, Molecule):
        rdkit_mol = to_rdmol(mol, assignChirality=enforceChirality)
    else:
        rdkit_mol = mol

    def MMFFenergy(cid):
        ff = AllChem.MMFFGetMoleculeForceField(
            rdkit_mol,
            AllChem.MMFFGetMoleculeProperties(rdkit_mol),
            confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    def UFFenergy(cid):
        ff = AllChem.UFFGetMoleculeForceField(rdkit_mol, confId=cid)
        try:
            energy = ff.CalcEnergy()
        except:
            msg = "MMFF energy calculation failed for molecule: " + Chem.MolToSmiles(rdkit_mol) + \
                  "\nNo geometry optimization was performed."
            warn(msg)
            energy = 1e9
        return energy

    if name:
        rdkit_mol.SetProp('name', name)

    try:
        cids = list(
            AllChem.EmbedMultipleConfs(rdkit_mol,
                                       nconfs,
                                       pruneRmsThresh=rms,
                                       randomSeed=1,
                                       enforceChirality=enforceChirality))
    except:
        # ``useRandomCoords = True`` prevents (poorly documented) crash for large systems
        cids = list(
            AllChem.EmbedMultipleConfs(rdkit_mol,
                                       nconfs,
                                       pruneRmsThresh=rms,
                                       randomSeed=1,
                                       useRandomCoords=True,
                                       enforceChirality=enforceChirality))

    if forcefield:
        # Select the forcefield (UFF or MMFF)
        optimize_molecule, energy = {
            'uff': [AllChem.UFFOptimizeMolecule, UFFenergy],
            'mmff': [AllChem.MMFFOptimizeMolecule, MMFFenergy],
        }[forcefield]

        # Optimize and sort conformations
        for cid in cids:
            optimize_molecule(rdkit_mol, confId=cid)
        cids.sort(key=energy)

        # Remove duplicate conformations based on RMS
        if rms > 0:
            keep = [cids[0]]
            for cid in cids[1:]:
                for idx in keep:
                    try:
                        r = AllChem.AlignMol(rdkit_mol, rdkit_mol, cid, idx)
                    except:
                        r = rms + 1
                        message = "Alignment failed in multiple conformation generation: "
                        message += Chem.MolToSmiles(rdkit_mol)
                        message += "\nAssuming different conformations."
                        warn(message)
                    if r < rms:
                        break
                else:
                    keep.append(cid)
            cids = keep

    if nconfs == 1:
        return from_rdmol(rdkit_mol)
    else:
        return [from_rdmol(rdkit_mol, cid) for cid in cids]
Пример #25
0
def compare_confomer_generator_and_trajectory_minimum_structures(
    results_path: str, name: str, base: str, tautomer_idx: int, thinning: int = 100
):
    assert tautomer_idx == 1 or tautomer_idx == 2

    ani_results = pickle.load(open(f"{results_path}/ani_mm_results.pickle", "rb"))
    exp_results = pickle.load(open(f"{results_path}/exp_results.pickle", "rb"))

    # generate the tautomer object
    t1_smiles = exp_results[name]["t1-smiles"]
    t2_smiles = exp_results[name]["t2-smiles"]
    t_type, tautomers, flipped = generate_tautomer_class_stereobond_aware(
        name, t1_smiles, t2_smiles, nr_of_conformations=1, enforceChirality=True
    )

    tautomer = tautomers[0]
    print(f"Flipped: {flipped}")
    tautomer.perform_tautomer_transformation()

    tautomer_mol = prune_conformers(
        ani_results[name]["t1-confs"],
        ani_results[name]["t1-energies"],
        rmsd_threshold=0.1,
    )
    print(len(tautomer_mol[1]))

    traj_path = (
        f"{base}/{name}/{name}_lambda_{tautomer_idx-1}.0000_kappa_0.0000_in_vacuum.dcd"
    )
    pdb_path = f"{base}/{name}/{name}_0.pdb"

    # load trajectory, remove dummy atom
    traj = md.load(traj_path, top=pdb_path)
    atom_idx = [a.index for a in traj.topology.atoms]
    if (tautomer_idx - 1) == 1:
        atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_0))
    else:
        atom_idx.remove(int(tautomer.hybrid_hydrogen_idx_at_lambda_1))

    traj = traj.atom_slice(atom_indices=atom_idx)

    # save pdb without dummy atom
    tautomer_pdb = f"{base}/{name}/{name}_without_dummy_{tautomer_idx}.pdb"
    traj[0].save_pdb(tautomer_pdb)

    # generate rdkit mol object with the same atom indizes as the trajectory but without the dummy atom
    mol = Chem.MolFromPDBFile(tautomer_pdb, removeHs=False)
    # remove conf of pdb
    mol.RemoveAllConformers()

    # generate energy function, use atom symbols of rdkti mol
    from .ani import ANI_force_and_energy, ANI1ccx

    model = ANI1ccx()
    energy_function = ANI_force_and_energy(
        model=model, atoms=[a.GetSymbol() for a in mol.GetAtoms()], mol=None
    )

    # take every 100th conformation and minimize it using ANI1
    minimized_traj = []  # store min conformations in here

    for idx, conf in enumerate(traj[::thinning]):

        print(f"{idx}/{len(traj[::thinning])}")
        c = (conf.xyz[0]) * unit.nanometer
        min_conf = energy_function.minimize(c)[
            0
        ]  # only real atoms, therefor lambda not needed
        minimized_traj.append(min_conf)
        new_conf = _generate_conformer(min_conf)
        # add the conformation to the rdkit mol object
        mol.AddConformer(new_conf, assignId=True)

    # generate mdtraj object with minimized confs
    minimum_traj = md.Trajectory(
        np.array([v.value_in_unit(unit.nanometer) for v in minimized_traj]),
        traj.topology,
    )

    # generate reference_mol
    reference = prune_conformers(
        ani_results[name][f"t{tautomer_idx}-confs"],
        ani_results[name][f"t{tautomer_idx}-energies"],
        rmsd_threshold=0.1,
    )

    # remove most hydrogens
    reference_mol = _remove_hydrogens(copy.deepcopy(reference[0]))
    compare_mol = _remove_hydrogens(copy.deepcopy(mol))

    # find atom indices that are compared for RMSD
    sub_m = rdFMCS.FindMCS(
        [reference_mol, compare_mol],
        bondCompare=Chem.rdFMCS.BondCompare.CompareOrder.CompareAny,
        maximizeBonds=False,
    )
    mcsp = Chem.MolFromSmarts(sub_m.smartsString, False)

    # the order of the substructure lists are the same for both
    # substructure matches => substructure_idx_m1[i] = substructure_idx_m2[i]
    substructure_idx_reference = reference_mol.GetSubstructMatches(mcsp, uniquify=False)
    substructure_idx_compare = compare_mol.GetSubstructMatches(mcsp, uniquify=False)

    # generate rmsd matrix
    rmsd = np.zeros(
        (reference_mol.GetNumConformers(), mol.GetNumConformers()), dtype=float
    )

    # save clusters
    got_hit = np.zeros(reference_mol.GetNumConformers(), dtype=int)

    # atom mapping
    from itertools import combinations

    for nr_of_mappings, (e1, e2) in enumerate(
        combinations(substructure_idx_reference + substructure_idx_compare, 2)
    ):

        atom_mapping = [(a1, a2) for a1, a2 in zip(e1, e2)]
        # get rmsd matrix with a given set of atom mapping
        # update rmsd matrix whenever lower RMSD appears
        for i in range(len(reference_mol.GetConformers())):
            for j in range(len(compare_mol.GetConformers())):

                proposed_rmsd = AllChem.AlignMol(
                    reference_mol, compare_mol, i, j, atomMap=atom_mapping
                )
                # test if this is optimal atom mapping
                if nr_of_mappings == 0:
                    rmsd[i, j] = proposed_rmsd
                else:
                    rmsd[i, j] = min(rmsd[i, j], proposed_rmsd)

    for i in range(len(reference_mol.GetConformers())):
        for j in range(len(compare_mol.GetConformers())):
            if rmsd[i, j] <= 0.1:
                got_hit[i] += 1

    sns.heatmap(rmsd)
    plt.show()

    print(f"Nr of clusters: {len(got_hit)}")
    print(
        f"Nr of conformations part of one cluster: {sum(got_hit)}/{mol.GetNumConformers()}"
    )
    print(f"Clusters present: {got_hit}")

    AllChem.AlignMolConformers(reference_mol)
    AllChem.AlignMolConformers(compare_mol)

    return compare_mol, minimum_traj, reference_mol, reference[1]
Пример #26
0
def GenReaction(EdName, Pro1Name, Pro2Name):
    EdFile = EdName + ".mol"
    Pro1File = Pro1Name + ".mol"
    Pro2File = Pro2Name + ".mol"

    edmol = Chem.MolFromMolFile(EdFile, sanitize=True, removeHs=False)
    pro1mol = Chem.MolFromMolFile(Pro1File, sanitize=True, removeHs=False)
    pro2mol = Chem.MolFromMolFile(Pro2File, sanitize=True, removeHs=False)

    print(EdFile)
    print(Pro1File)
    print(Pro2File)

    #   print(Chem.MolToMolBlock(edmol))
    #   print(Chem.MolToMolBlock(pro1mol))
    #   print(Chem.MolToMolBlock(pro2mol))

    xyzed, atomed, charge, spin = io.ReadMol(EdName)
    xyzpro1, atompro1, charge, spin = io.ReadMol(Pro1Name)
    xyzpro2, atompro2, charge, spin = io.ReadMol(Pro2Name)

    xyzed_new = []
    atomed_new = []

    #   io.PrintXYZ('Educt',atomed,xyzed)

    # Align first product to molecule
    bestpair = []
    bestrmsd = 100
    ifit = 0

    while ifit < 10000:  # repeatedly tests random atom-pairlists. probably not the smartest way
        ifit += 1
        pairlist1 = range(len(atompro1))
        atomlist = range(len(atompro1))
        random.shuffle(atomlist)
        flags = [False] * len(atomed)
        weightlist = [1] * len(atompro1)
        #       print(atomlist)

        for j in atomlist:
            for i in range(len(atomed)):
                if (atomed[i] != atompro1[j]):
                    continue
                elif not flags[i]:
                    pairlist1[j] = [j, i]
                    flags[i] = True
                    break

#       print(pairlist1)

        if len(pairlist1) == 1:
            # if the fragment is a single atom rd-kits AlignMol fails. Simply move atom to corresponding location
            xyzpro1[0] = xyzed[pairlist1[0][1]]
            xyzed_new.append(xyzed[pairlist1[0][1]])
            atomed_new.append(atomed[pairlist1[0][1]])
            ifit = 1000000
        else:
            for i in range(len(pairlist1)):
                weightlist[i] = pro1mol.GetAtomWithIdx(i).GetAtomicNum()**2
            rmsd = AllChem.AlignMol(pro1mol,
                                    edmol,
                                    atomMap=pairlist1,
                                    weights=weightlist)
            #           print(rmsd)
            if rmsd < bestrmsd:
                bestrmsd = rmsd
                bestpair = pairlist1

    if len(pairlist1) > 1:
        # realign with best pairlist. also, write to new educt geo
        pairlist1 = bestpair
        for i in range(len(pairlist1)):
            weightlist[i] = pro1mol.GetAtomWithIdx(i).GetAtomicNum()**2
            xyzed_new.append(xyzed[pairlist1[i][1]])
            atomed_new.append(atomed[pairlist1[i][1]])
        rmsd = AllChem.AlignMol(pro1mol,
                                edmol,
                                atomMap=pairlist1,
                                weights=weightlist)
        print(rmsd)
        print(Chem.MolToMolBlock(pro1mol), file=open('pro1mol.mol', 'w+'))
        xyzpro1, atompro1, charge, spin = io.ReadMol('pro1mol')

#   io.PrintXYZ('Product1',atompro1,xyzpro1)

#   print(Chem.MolToMolBlock(edmol),file=open('edmol.mol','w+'))
#   print(Chem.MolToMolBlock(pro1mol),file=open('pro1mol.mol','w+'))

# Align second product to remaining framework. Same as above
    bestpair = []
    bestrmsd = 100
    ifit = 0

    while ifit < 10000:
        ifit += 1
        pairlist2 = range(len(atompro2))
        atomlist = range(len(atompro2))
        random.shuffle(atomlist)
        flags = [False] * len(atomed)
        weightlist = [1] * len(atompro2)
        #       print(atomlist)

        for j in atomlist:
            for i in range(len(atomed)):
                if (atomed[i] != atompro2[j]):
                    continue
                elif any(x[1] == i for x in
                         pairlist1):  # skip atoms already used in prev step.
                    continue
                elif not flags[i]:
                    pairlist2[j] = [j, i]
                    flags[i] = True
                    break

#       print(pairlist1)

        if len(pairlist2) == 1:
            xyzpro2[0] = xyzed[pairlist2[0][1]]
            xyzed_new.append(xyzed[pairlist2[0][1]])
            atomed_new.append(atomed[pairlist2[0][1]])
            ifit = 1000000
        else:
            for i in range(len(pairlist2)):
                weightlist[i] = pro2mol.GetAtomWithIdx(i).GetAtomicNum()**2
            rmsd = AllChem.AlignMol(pro2mol,
                                    edmol,
                                    atomMap=pairlist2,
                                    weights=weightlist)
            #           print(rmsd)
            if rmsd < bestrmsd:
                bestrmsd = rmsd
                bestpair = pairlist2

    if len(pairlist2) > 1:
        pairlist2 = bestpair
        for i in range(len(pairlist2)):
            weightlist[i] = pro2mol.GetAtomWithIdx(i).GetAtomicNum()**2
            xyzed_new.append(xyzed[pairlist2[i][1]])
            atomed_new.append(atomed[pairlist2[i][1]])
        rmsd = AllChem.AlignMol(pro2mol,
                                edmol,
                                atomMap=pairlist2,
                                weights=weightlist)
        print(rmsd)
        print(Chem.MolToMolBlock(pro2mol), file=open('pro2mol.mol', 'w+'))
        xyzpro2, atompro2, charge, spin = io.ReadMol('pro2mol')


#   io.PrintXYZ('Product2',atompro2,xyzpro2)

# shift along vector connecting fragments' centers of mass
    xyzpro1s, xyzpro2s = CoM_shift(atompro1, xyzpro1, atompro2, xyzpro2, 4.0)

    # merge aligned and shifted product geometries
    atom_diss, xyz_diss, frags = io.MergeXYZ(atompro1, xyzpro1, atompro2,
                                             xyzpro2)
    atom_shift, xyz_shift, frags = io.MergeXYZ(atompro1, xyzpro1s, atompro2,
                                               xyzpro2s)

    io.PrintXYZ('Reordered Educt', atomed_new, xyzed_new)
    #   io.PrintXYZ('Merged Products',atom_diss,xyz_diss)
    io.PrintXYZ('Shifted Products', atom_shift, xyz_shift)

    io.PrintAimsGeo('Reordered Educt', atomed_new, xyzed_new)
    io.PrintAimsGeo('Shifted Products', atom_shift, xyz_shift)

    return xyzed, atomed, charge, spin
        mv = mols[1]
        #mcs = rdFMCS.FindMCS([ref, mv], completeRingsOnly=True, matchValences=True, ringMatchesRingOnly=True) # G078
        mcs = rdFMCS.FindMCS([ref, mv],
                             completeRingsOnly=True,
                             ringMatchesRingOnly=True)

        smartsString = mcs.smartsString
        #from rdkit.Chem import MCS
        #smartsString = MCS.FindMCS(mols, atomCompare="any").smarts

        patt = Chem.MolFromSmarts(smartsString)  # smartsString
        refMatch = ref.GetSubstructMatch(patt)
        mvMatch = mv.GetSubstructMatch(patt)

        try:
            AllChem.AlignMol(mv, ref, atomMap=list(zip(mvMatch, refMatch)))
        except Exception as err:
            print(err.args)
            pass

        ##write out
        out_pdb_path = out_dir + "/" + out_prefix + ".pdb"
        out_text = ""
        for mol in mols:
            out_text += Chem.MolToPDBBlock(mol)

        file_out = open(out_pdb_path, "w")
        file_out.write(out_text)
        file_out.close()

        ###############################
Пример #28
0
def EmbedAlignConstrainedScore(
    prbMol,
    refMols,
    core,
    prbNumConfs=10,
    refNumConfs=10,
    prbCharge=[],
    refCharges=[],
    metric="carbo",
    integrate="gauss",
    partialCharges="gasteiger",
    renormalize=False,
    customrange=None,
    marginMC=10,
    nMC=1,
    basisPsi4='3-21G',
    methodPsi4='scf',
    gridPsi4=1,
):
    """Calculates a constrained alignment based on a common pattern in the input molecules. Caution: Will fail if the pattern does not match. 
    Calculates a shape and electrostatic potential similarity of the best alignment.

    :param prbMol: RDKit molecule for which shape and electrostatic similarities are calculated.
    :param refMol: RDKit molecule or list of RDKit molecules serving as references.
    :param core: Common pattern for the constrained embedding as embedded RDKit molecule
    :param prbNumConfs: Number of conformers to create for the probe molecule. A higher number creates better alignments but slows down the algorithm.
    :param refNumConfs: Number of conformers to create for each reference molecule. A higher number creates better alignments but slows down the algorithm.
    :param prbCharge: (optional) List or array of partial charges of the probe molecule. If not given, RDKit Gasteiger Charges are used as default.
    :param refCharge: (optional) List of list or 2D array of partial charges of the reference molecules. If not given, RDKit Gasteiger Charges are used as default.
    :param metric:  (optional) Similarity metric.
    :param integrate: (optional) Integration method.
    :param partialCharges: (optional) Partial charge distribution.
    :param renormalize: (optional) Boolean whether to renormalize the similarity score to [0:1].
    :param customrange: (optional) Custom range to renormalize to, supply as tuple or list of two values (lower bound, upper bound).
    :param marginMC: (optional) Margin up to which to integrate (added to coordinates plus/minus their vdW radii) if MC integration is utilized.
    :param nMC: (optional) Number of grid points per 1 Angstrom**3 volume of integration vox if MC integration is utilized.
    :param basisPsi4: (optional) Basis set for Psi4 calculation.
    :param methodPsi4: (optional) Method for Psi4 calculation.
    :param gridPsi4: (optional) Integer grid point density for ESP evaluation for Psi4 calculation.
    :return: shape similarity and ESP similarity.
    """

    if type(refMols) != list:
        refMols = [refMols]

    if refCharges == []:
        refCharges = [[]] * len(refMols)

    prbMol = ConstrainedEmbedMultipleConfs(prbMol, core, numConfs=prbNumConfs)
    for refMol in refMols:
        refMol = ConstrainedEmbedMultipleConfs(refMol,
                                               core,
                                               numConfs=refNumConfs)

    prbMatch = prbMol.GetSubstructMatch(core)
    allShapeDist = []
    allEspSim = []

    for idx, refMol in enumerate(refMols):
        shapeDist = 1
        prbBestConf = 0
        refBestConf = 0
        refMatch = refMol.GetSubstructMatch(core)
        for i in range(refNumConfs):
            for j in range(prbNumConfs):
                AllChem.AlignMol(prbMol,
                                 refMol,
                                 atomMap=list(zip(prbMatch, refMatch)),
                                 prbCid=j,
                                 refCid=i)
                shape = AllChem.ShapeTanimotoDist(prbMol,
                                                  refMol,
                                                  confId1=j,
                                                  confId2=i)
                if shape < shapeDist:
                    shapeDist = shape
                    prbBestConf = j
                    refBestConf = i
        espSim = GetEspSim(prbMol, refMol, prbBestConf, refBestConf, prbCharge,
                           refCharges[idx], metric, integrate, partialCharges,
                           renormalize, customrange, marginMC, nMC, basisPsi4,
                           methodPsi4, gridPsi4)
        allShapeDist.append(1 - shapeDist)
        allEspSim.append(espSim)

    return allShapeDist, allEspSim
Пример #29
0
    def optimize_linear_rotamers(self, linear_mol, conf_id, cleaved_atom1,
                                 cleaved_atom2, dihedrals):
        """
        Helper function of generate() that generates combinations of dihedrals that are rotated together and determines
        if the rotations have brought the cleaved atoms to within the distance thresholds. If so the dihedrals are kept
        and further refinement on those dihedral angles are performed, where the best set of conformers resulting from
        these manipulations are kept and returned to caller.

        Args:
            linear_mol (RDKit Mol): The linear oligomer.
            conf_id (int): The conformer id of the conformer on the linear oligomer to optimize.
            dihedrals (dict): The dict of dihedral angles that can be rotated on the linear oligomer.

        Returns:
            list: A list of RDKit Mols, each with an optimized conformer.
        """

        mast_mol = deepcopy(linear_mol)
        mast_mol.RemoveAllConformers()
        optimized_linear_confs, distances = [], []
        linear_conf = linear_mol.GetConformer(conf_id)

        # generate length 2 combinations for dihedrals that don't contain cleaved atoms and get the resulting
        # distances between the two cleaved atoms after applying various angles to those dihedrals. Sort the results
        # based on distance
        for dihedral1, dihedral2 in combinations(dihedrals['other'], 2):
            ini_dihedral1 = AllChem.GetDihedralDeg(linear_conf, dihedral1[0],
                                                   dihedral1[1], dihedral1[2],
                                                   dihedral1[3])
            ini_dihedral2 = AllChem.GetDihedralDeg(linear_conf, dihedral2[0],
                                                   dihedral2[1], dihedral2[2],
                                                   dihedral2[3])
            dist = calc_distance(linear_conf, cleaved_atom1, cleaved_atom2)
            distances.append(
                [dist, ini_dihedral1, dihedral1, ini_dihedral2, dihedral2])

            angle1, angle2 = 0, 0
            while angle1 < 360:
                AllChem.SetDihedralDeg(linear_conf, dihedral1[0], dihedral1[1],
                                       dihedral1[2], dihedral1[3], angle1)
                while angle2 < 360:
                    AllChem.SetDihedralDeg(linear_conf, dihedral2[0],
                                           dihedral2[1], dihedral2[2],
                                           dihedral2[3], angle2)
                    dist = calc_distance(linear_conf, cleaved_atom1,
                                         cleaved_atom2)
                    distances.append(
                        [dist, angle1, dihedral1, angle2, dihedral2])
                    angle2 += self.large_angle_gran
                angle1 += self.large_angle_gran

            # reset dihedrals
            AllChem.SetDihedralDeg(linear_conf, dihedral1[0], dihedral1[1],
                                   dihedral1[2], dihedral1[3], ini_dihedral1)
            AllChem.SetDihedralDeg(linear_conf, dihedral2[0], dihedral2[1],
                                   dihedral2[2], dihedral2[3], ini_dihedral2)
        distances.sort(key=lambda x: x[0])

        # starting with the dihedral combinations that minimized the distance between cleaved atoms the most, find
        # the optimimum angles for dihedrals that contain cleaved atoms and no hydrogens, then for dihedrals that
        # contain cleaved atoms and hydrogens, until desired number of conformers has been generated
        for distance in distances:
            linear_mol_copy = deepcopy(linear_mol)
            linear_conf = linear_mol_copy.GetConformer(conf_id)

            # set starting dihedrals
            AllChem.SetDihedralDeg(linear_conf, distance[2][0], distance[2][1],
                                   distance[2][2], distance[2][3], distance[1])
            AllChem.SetDihedralDeg(linear_conf, distance[4][0], distance[4][1],
                                   distance[4][2], distance[4][3], distance[3])

            # if no clashes are detected optimize continue optimization
            if not is_clashing(linear_mol, conf_id, self.clash_threshold):
                self.optimize_dihedrals(linear_conf, cleaved_atom1,
                                        cleaved_atom2, dihedrals['cleaved'])
                self.optimize_dihedrals(linear_conf, cleaved_atom1,
                                        cleaved_atom2,
                                        dihedrals['cleaved_and_Hs'])

                for ref_conf in range(mast_mol.GetNumConformers()):
                    rms = AllChem.AlignMol(linear_mol_copy,
                                           mast_mol,
                                           conf_id,
                                           ref_conf,
                                           maxIters=self.max_iters)
                    if rms < self.min_rmsd:
                        break
                else:
                    optimized_linear_confs.append(linear_conf)
                    mast_mol.AddConformer(linear_conf, assignId=True)

                # return when num_confs valid conformers has been obtained
                if len(optimized_linear_confs) == self.num_confs:
                    break

        return optimized_linear_confs