Пример #1
0
        def search(self, mol0, mol1):
            mol0 = mol0._struc
            mol1 = mol1._struc

            p0 = mol0.CreateCopy()
            p1 = mol1.CreateCopy()
            #set atom int type.
            for mol in (
                    p0,
                    p1,
            ):
                for atom in mol.GetAtoms():
                    if (atom.IsHydrogen()):
                        atom.SetIntType(1)
                    else:
                        atom.SetIntType(2)
            #suppress hydrogens before mcs search
            oechem.OESuppressHydrogens(p0)
            oechem.OESuppressHydrogens(p1)
            if (self._is_approximate):
                mcss = oechem.OEMCSSearch(p1, self._atom_expr, self._bond_expr,
                                          oechem.OEMCSType_Approximate)
            else:
                mcss = oechem.OEMCSSearch(p1, self._atom_expr, self._bond_expr)
            #set minimum atom of the mcs
            mcss.SetMinAtoms(1)
            #set the function to evalue the mcs search
            mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles(1.5))

            # There could be multiple matches. We select the one with the maximum number of atoms.
            # If there are more than 1 matches with the same maximum number of atoms, we arbitrarily select the first one.
            mcs_mol = None
            max_num = 0
            #do the mcs search
            for match in mcss.Match(p0, True):
                num_atom = 0
                mcs_tmp = oechem.OEMol()
                oechem.OESubsetMol(mcs_tmp, match, True)
                oechem.OEFindRingAtomsAndBonds(mcs_tmp)
                for atom in mcs_tmp.GetAtoms():
                    if (not atom.IsHydrogen()):
                        num_atom += 1

                if (num_atom > max_num):
                    max_num = num_atom
                    mcs_mol = mcs_tmp
                    atom_match0 = []
                    atom_match1 = []
                    for matchpair in match.GetAtoms():
                        atom_match0.append(matchpair.target.GetIdx() + 1)
                        atom_match1.append(matchpair.pattern.GetIdx() + 1)
            #dump search result to kbase
            if (mcs_mol):
                mol0 = struc.OeStruc(mol0)
                mol1 = struc.OeStruc(mol1)
                mcs_mol = struc.OeStruc(mcs_mol)
                return self.deposit_to_kbase(mol0.id(), mol1.id(), atom_match0,
                                             atom_match1)
Пример #2
0
def LigandProteinCloseContacts(prot, lig, maxgap):
    """atoms in the protein within maxgap Angstroms of the ligand"""

    oechem.OESuppressHydrogens(prot)
    oechem.OESuppressHydrogens(lig)

    DropLigandFromProtein(prot, lig)

    nn = oechem.OENearestNbrs(prot, maxgap)

    return list(nn.GetNbrs(lig))
Пример #3
0
 def _process_mol(mol: oechem.OEMol, explicit_H: Optional[str] = None):
     if explicit_H == 'all':
         oechem.OEAddExplicitHydrogens(mol)
     elif explicit_H == 'polar':
         oechem.OESuppressHydrogens(mol, explicit_H)
     elif explicit_H is None:
         oechem.OESuppressHydrogens(mol)
     else:
         raise ValueError
     oechem.OEAssignAromaticFlags(mol)
     oechem.OEAssignHybridization(mol)
     oechem.OEAssignFormalCharges(mol)
     mol.Sweep()
Пример #4
0
def main(argv=[__name__]):
    if len(argv) != 4:
        oechem.OEThrow.Usage("%s <refmol> <fitmol> <outfile>" % argv[0])

    reffs = oechem.oemolistream()
    if not reffs.open(argv[1]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])
    if not oechem.OEIs3DFormat(reffs.GetFormat()):
        oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates")
    refmol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(reffs, refmol):
        oechem.OEThrow.Fatal("Unable to read molecule in %s" % argv[1])
    if not refmol.GetDimension() == 3:
        oechem.OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle())

    fitfs = oechem.oemolistream()
    if not fitfs.open(argv[2]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[2])
    if not oechem.OEIs3DFormat(fitfs.GetFormat()):
        oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates")

    ofs = oechem.oemolostream()
    if not ofs.open(argv[3]):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[3])
    if not oechem.OEIs3DFormat(ofs.GetFormat()):
        oechem.OEThrow.Fatal("Invalid output format: need 3D coordinates")

    oechem.OEWriteConstMolecule(ofs, refmol)
    oechem.OESuppressHydrogens(refmol)

    for fitmol in fitfs.GetOEGraphMols():
        if not fitmol.GetDimension() == 3:
            oechem.OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle())
            continue
        MCSAlign(refmol, fitmol, ofs)
Пример #5
0
    def prepare(self):
        """[summary]
         # OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True)
        oechem.OESuppressHydrogens(self.__oeMol)
        """
        self.__setupImage()
        for idx, cell in enumerate(self.__grid.GetCells()):
            ccId, oeMol, title = self._molTitleList[idx]
            logger.debug("Preparing %s %r", ccId, title)
            #
            if self._params["suppressHydrogens"]:
                # mol = oeMol.getGraphMolSuppressH()
                #  OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True)
                mol = oechem.OESuppressHydrogens(oechem.OEGraphMol(oeMol))
            else:
                mol = oeMol
            #
            if self.__useTitle and title:
                mol.SetTitle(title)
                self._opts.SetTitleHeight(5.0)
            else:
                mol.SetTitle("")
            #
            #
            oedepict.OEPrepareDepiction(mol)
            self._opts.SetDimensions(cell.GetWidth(), cell.GetHeight(),
                                     oedepict.OEScale_AutoScale)

            self._assignDisplayOptions()

            disp = oedepict.OE2DMolDisplay(mol, self._opts)
            oedepict.OERenderMolecule(cell, disp)
            if self._params["cellBorders"]:
                oedepict.OEDrawBorder(cell,
                                      oedepict.OEPen(oedepict.OEBlackPen))
Пример #6
0
def test_adjacency(smiles, expected_adj):
    if not oechem.OEChemIsLicensed():
        logging.warning(
            "License for OpenEye OEChem TK is not found. Not testing featurizers."
        )
        return True

    mol = oechem.OEMol()
    oechem.OESmilesToMol(mol, smiles)
    oechem.OESuppressHydrogens(mol)
    adj = Adjacency(mol=mol)
    assert adj.adj_mat.tolist() == expected_adj
Пример #7
0
def ImportMolecule(filename):

    ifs = oechem.oemolistream()
    if not ifs.open(filename):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % filename)

    mol = oechem.OEGraphMol()
    oechem.OEReadMolecule(ifs, mol)

    oechem.OEAssignBondiVdWRadii(mol)
    oechem.OESuppressHydrogens(mol)

    return mol
Пример #8
0
def align2d(file1, file2):

    atomexpr = oechem.OEExprOpts_AtomicNumber | oechem.OEExprOpts_RingMember
    bondexpr = oechem.OEExprOpts_RingMember

    ifs1 = oechem.oemolistream(file1)
    ifs2 = oechem.oemolistream(file2)
    ifs1.SetConfTest(oechem.OEAbsCanonicalConfTest())
    ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest())

    popts, dopts, report = prep_pdf_writer()

    for mol1, mol2 in zip(ifs1.GetOEMols(), ifs2.GetOEMols()):
        oechem.OESuppressHydrogens(mol1)
        oechem.OESuppressHydrogens(mol2)
        oechem.OEGenerate2DCoordinates(mol2)
        ss = oechem.OESubSearch(mol2, atomexpr, bondexpr)

        oechem.OEPrepareSearch(mol1, ss)
        alignres = oedepict.OEPrepareAlignedDepiction(mol1, ss)

        if not alignres.IsValid():
            oechem.OEThrow.Error(
                "Substructure is not found in input molecule!")

        cell1 = report.NewCell()
        cell2 = report.NewCell()
        oedepict.OEPrepareDepiction(mol1, popts)
        oedepict.OEPrepareDepiction(mol2, popts)
        disp1 = oedepict.OE2DMolDisplay(mol1, dopts)
        disp2 = oedepict.OE2DMolDisplay(mol2, dopts)
        oedepict.OERenderMolecule(cell1, disp1)
        oedepict.OERenderMolecule(cell2, disp2)

    ofs = oechem.oeofstream()
    if not ofs.open('output.pdf'):
        oechem.OEThrow.Fatal("Cannot open output file!")
    oedepict.OEWriteReport(ofs, "pdf", report)
Пример #9
0
def main(args):
    if len(args) != 4:
        oechem.OEThrow.Usage("%s <protein> <ligand> <surface>" % args[0])

    prtfs = oechem.oemolistream(args[1])
    prt = oechem.OEGraphMol()
    oechem.OEReadMolecule(prtfs, prt)
    oechem.OESuppressHydrogens(prt)
    oechem.OEAssignBondiVdWRadii(prt)

    ligfs = oechem.oemolistream(args[2])
    lig = oechem.OEGraphMol()
    oechem.OEReadMolecule(ligfs, lig)
    oechem.OESuppressHydrogens(lig)
    oechem.OEAssignBondiVdWRadii(lig)

    grid = oegrid.OEScalarGrid()
    oespicoli.OEMakeVoidVolume(prt, lig, grid, 0.5)

    surf = oespicoli.OESurface()
    oespicoli.OEMakeSurfaceFromGrid(surf, grid, 0.5)
    oespicoli.OEWriteSurface(args[3], surf)

    return 0
Пример #10
0
def _OEFixConnectionNH(protein):
    """
    Temporary fix, thanks to Jesper!
    """
    for atom in protein.GetAtoms(
            oechem.OEAndAtom(oespruce.OEIsModeledAtom(),
                             oechem.OEIsNitrogen())):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_N:
            expected_h_count = 1
            if oechem.OEGetResidueIndex(atom) == oechem.OEResidueIndex_PRO:
                expected_h_count = 0
            if atom.GetTotalHCount() != expected_h_count:
                oechem.OESuppressHydrogens(atom)
                atom.SetImplicitHCount(1)
                oechem.OEAddExplicitHydrogens(protein, atom)
                for nbr in atom.GetAtoms(oechem.OEIsHydrogen()):
                    oechem.OESet3DHydrogenGeom(protein, nbr)
Пример #11
0
def PrepareDepiction(mol, clearcoords=False, suppressH=True):

    oechem.OESetDimensionFromCoords(mol)
    oechem.OEPerceiveChiral(mol)

    if mol.GetDimension() != 2 or clearcoords:
        if mol.GetDimension() == 3:
            oechem.OE3DToBondStereo(mol)
            oechem.OE3DToAtomStereo(mol)
        if suppressH:
            oechem.OESuppressHydrogens(mol)
        oechem.OEAddDepictionHydrogens(mol)

        oechem.OEDepictCoordinates(mol)
        oechem.OEMDLPerceiveBondStereo(mol)

    mol.SetDimension(2)
    return True
Пример #12
0
    def prepare(self):
        self.__setupImage()
        rows = self._params["gridRows"]
        cols = self._params["gridCols"]
        grid = oedepict.OEImageGrid(self.__image, rows, cols)

        citer = grid.GetCells()

        for ccId, oeMol, title in self._molTitleList:
            logger.debug("Preparing %s %r", ccId, title)
            if not citer.IsValid():
                # go to next page
                self.__image = self.__multi.NewPage()
                grid = oedepict.OEImageGrid(self.__image, rows, cols)
                grid.SetCellGap(self._params["cellGap"])
                grid.SetMargins(self._params["cellMargin"])
                citer = grid.GetCells()

            cell = citer.Target()
            #
            if self._params["suppressHydrogens"]:
                # mol = oeMol.getGraphMolSuppressH()
                #  OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True)
                mol = oechem.OESuppressHydrogens(oechem.OEGraphMol(oeMol))
            else:
                mol = oeMol

            if self.__useTitle and title:
                mol.SetTitle(title)
                self._opts.SetTitleHeight(5.0)
            else:
                mol.SetTitle("")
            #
            #
            oedepict.OEPrepareDepiction(mol)
            self._opts.SetDimensions(cell.GetWidth(), cell.GetHeight(),
                                     oedepict.OEScale_AutoScale)
            self._assignDisplayOptions()

            disp = oedepict.OE2DMolDisplay(mol, self._opts)
            oedepict.OERenderMolecule(cell, disp)
            oedepict.OEDrawBorder(cell, oedepict.OEPen(oedepict.OEBlackPen))

            citer.Next()
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None):
    """
    Generate and select a conformer of the specified molecule using the reference molecule

    Parameters
    ----------
    receptor : openeye.oechem.OEGraphMol
        Receptor (already prepped for docking) for identifying optimal pose
    refmol : openeye.oechem.OEGraphMol
        Reference molecule which shares some part in common with the proposed molecule
    mol : openeye.oechem.OEGraphMol
        Molecule whose conformers are to be enumerated
    core_smarts : str, optional, default=None
        If core_smarts is specified, substructure will be extracted using SMARTS.
    """
    from openeye import oechem, oeomega

    logging.debug(
        f'mol: {oechem.OEMolToSmiles(mol)} | core_smarts: {core_smarts}')

    # Be quiet
    from openeye import oechem
    oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Quiet)
    #oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error)

    # Get core fragment
    if core_smarts:
        # Truncate refmol to SMARTS if specified
        #print(f'Trunctating using SMARTS {refmol_smarts}')
        ss = oechem.OESubSearch(core_smarts)
        oechem.OEPrepareSearch(refmol, ss)
        for match in ss.Match(refmol):
            core_fragment = oechem.OEGraphMol()
            oechem.OESubsetMol(core_fragment, match)
            logging.debug(
                f'Truncated refmol to generate core_fragment: {oechem.OEMolToSmiles(core_fragment)}'
            )
            break
        #print(f'refmol has {refmol.NumAtoms()} atoms')
    else:
        core_fragment = GetCoreFragment(refmol, [mol])
        oechem.OESuppressHydrogens(core_fragment)
        #print(f'  Core fragment has {core_fragment.NumAtoms()} heavy atoms')
        MIN_CORE_ATOMS = 6
        if core_fragment.NumAtoms() < MIN_CORE_ATOMS:
            return None

    # Create an Omega instance
    #omegaOpts = oeomega.OEOmegaOptions()
    omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense)

    # Set the fixed reference molecule
    omegaFixOpts = oeomega.OEConfFixOptions()
    omegaFixOpts.SetFixMaxMatch(10)  # allow multiple MCSS matches
    omegaFixOpts.SetFixDeleteH(True)  # only use heavy atoms
    omegaFixOpts.SetFixMol(core_fragment)
    #omegaFixOpts.SetFixSmarts(core_smarts) # DEBUG
    omegaFixOpts.SetFixRMS(0.5)

    # This causes a warning:
    #Warning: OESubSearch::Match() is unable to match unset hybridization in the target (EN300-221518_3_1) for patterns with set hybridization, call OEPrepareSearch on the target first
    #atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization

    atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_AtomicNumber
    bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity
    omegaFixOpts.SetAtomExpr(atomexpr)
    omegaFixOpts.SetBondExpr(bondexpr)
    omegaOpts.SetConfFixOptions(omegaFixOpts)

    molBuilderOpts = oeomega.OEMolBuilderOptions()
    molBuilderOpts.SetStrictAtomTypes(
        False)  # don't give up if MMFF types are not found
    omegaOpts.SetMolBuilderOptions(molBuilderOpts)

    omegaOpts.SetWarts(False)  # expand molecule title
    omegaOpts.SetStrictStereo(True)  # set strict stereochemistry
    omegaOpts.SetIncludeInput(False)  # don't include input
    omegaOpts.SetMaxConfs(1000)  # generate lots of conformers
    omegaOpts.SetEnergyWindow(20.0)  # allow high energies
    omega = oeomega.OEOmega(omegaOpts)

    # TODO: Expand protonation states and tautomers
    from openeye import oequacpac
    if not oequacpac.OEGetReasonableProtomer(mol):
        logging.warning('No reasonable protomer found')
        return None

    mol = oechem.OEMol(mol)  # multi-conformer molecule

    ret_code = omega.Build(mol)
    if (mol.GetDimension() != 3) or (ret_code !=
                                     oeomega.OEOmegaReturnCode_Success):
        msg = f'\nOmega failure for {mol.GetTitle()} : SMILES {oechem.OEMolToSmiles(mol)} : core_smarts {core_smarts} : {oeomega.OEGetOmegaError(ret_code)}\n'
        logging.warning(msg)
        return None
        # Return the molecule with an error code
        #oechem.OESetSDData(mol, 'error', '{oeomega.OEGetOmegaError(ret_code)}')
        #return mol

    # Extract poses
    class Pose(object):
        def __init__(self, conformer):
            self.conformer = conformer
            self.clash_score = None
            self.docking_score = None
            self.overlap_score = None

    poses = [Pose(conf) for conf in mol.GetConfs()]

    # Score clashes
    bump_check = BumpCheck(receptor)
    for pose in poses:
        pose.clash_score = bump_check.count(pose.conformer)

    # Score docking poses
    from openeye import oedocking
    score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4)
    score.Initialize(receptor)
    for pose in poses:
        pose.docking_score = score.ScoreLigand(pose.conformer)

    # Compute overlap scores
    from openeye import oeshape
    overlap_prep = oeshape.OEOverlapPrep()
    overlap_prep.Prep(refmol)
    shapeFunc = oeshape.OEExactShapeFunc()
    shapeFunc.SetupRef(refmol)
    oeshape_result = oeshape.OEOverlapResults()
    for pose in poses:
        tmpmol = oechem.OEGraphMol(pose.conformer)
        overlap_prep.Prep(tmpmol)
        shapeFunc.Overlap(tmpmol, oeshape_result)
        pose.overlap_score = oeshape_result.GetRefTversky()

    # Filter poses based on top 10% of overlap
    poses = sorted(poses, key=lambda pose: pose.overlap_score)
    poses = poses[int(0.9 * len(poses)):]

    # Select the best docking score
    import numpy as np
    poses = sorted(poses, key=lambda pose: pose.docking_score)
    pose = poses[0]
    mol.SetActive(pose.conformer)
    oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score))
    oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score))
    oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score))

    # Convert to single-conformer molecule
    mol = oechem.OEGraphMol(mol)

    # Compute MMFF energy
    energy = mmff_energy(mol)
    oechem.OESetSDData(mol, 'MMFF_internal_energy', str(energy))

    # Store SMILES
    docked_smiles = oechem.OEMolToSmiles(mol)
    oechem.OESetSDData(mol, 'docked_smiles', docked_smiles)

    return mol
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None):
    """
    Generate and select a conformer of the specified molecule using the reference molecule

    Parameters
    ----------
    receptor : openeye.oechem.OEGraphMol
        Receptor (already prepped for docking) for identifying optimal pose
    refmol : openeye.oechem.OEGraphMol
        Reference molecule which shares some part in common with the proposed molecule
    mol : openeye.oechem.OEGraphMol
        Molecule whose conformers are to be enumerated
    core_smarts : str, optional, default=None
        If core_smarts is specified, substructure will be extracted using SMARTS.
    """
    from openeye import oechem, oeomega

    # DEBUG: For benzotriazoles, truncate refmol
    core_smarts = 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1' # prospective
    core_smarts = 'NC(=O)[C,N]n2nnc3ccccc32' # retrospective

    # Get core fragment
    if core_smarts:
        # Truncate refmol to SMARTS if specified
        #print(f'Trunctating using SMARTS {refmol_smarts}')
        ss = oechem.OESubSearch(core_smarts)
        oechem.OEPrepareSearch(refmol, ss)
        for match in ss.Match(refmol):
            core_fragment = oechem.OEGraphMol()
            oechem.OESubsetMol(core_fragment, match)
            break
        #print(f'refmol has {refmol.NumAtoms()} atoms')
    else:
        core_fragment = GetCoreFragment(refmol, [mol])
        oechem.OESuppressHydrogens(core_fragment)
        #print(f'  Core fragment has {core_fragment.NumAtoms()} heavy atoms')
        MIN_CORE_ATOMS = 6
        if core_fragment.NumAtoms() < MIN_CORE_ATOMS:
            return None

    # Create an Omega instance
    #omegaOpts = oeomega.OEOmegaOptions()
    omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense)

    # Set the fixed reference molecule
    omegaFixOpts = oeomega.OEConfFixOptions()
    omegaFixOpts.SetFixMaxMatch(10) # allow multiple MCSS matches
    omegaFixOpts.SetFixDeleteH(True) # only use heavy atoms
    omegaFixOpts.SetFixMol(core_fragment)
    #omegaFixOpts.SetFixSmarts(smarts)
    omegaFixOpts.SetFixRMS(0.5)

    atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization
    bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity
    omegaFixOpts.SetAtomExpr(atomexpr)
    omegaFixOpts.SetBondExpr(bondexpr)
    omegaOpts.SetConfFixOptions(omegaFixOpts)

    molBuilderOpts = oeomega.OEMolBuilderOptions()
    molBuilderOpts.SetStrictAtomTypes(False) # don't give up if MMFF types are not found
    omegaOpts.SetMolBuilderOptions(molBuilderOpts)

    omegaOpts.SetWarts(False) # expand molecule title
    omegaOpts.SetStrictStereo(False) # set strict stereochemistry
    omegaOpts.SetIncludeInput(False) # don't include input
    omegaOpts.SetMaxConfs(1000) # generate lots of conformers
    #omegaOpts.SetEnergyWindow(10.0) # allow high energies
    omega = oeomega.OEOmega(omegaOpts)

    from openeye import oequacpac
    if not oequacpac.OEGetReasonableProtomer(mol):
        print('No reasonable protomer found')
        return None

    mol = oechem.OEMol(mol) # multi-conformer molecule

    ret_code = omega.Build(mol)
    if (mol.GetDimension() != 3) or (ret_code != oeomega.OEOmegaReturnCode_Success):
        print(f'Omega failure: {mol.GetDimension()} and {oeomega.OEGetOmegaError(ret_code)}')
        return None

    # Extract poses
    class Pose(object):
        def __init__(self, conformer):
            self.conformer = conformer
            self.clash_score = None
            self.docking_score = None
            self.overlap_score = None

    poses = [ Pose(conf) for conf in mol.GetConfs() ]

    # Score clashes
    bump_check = BumpCheck(receptor)
    for pose in poses:
        pose.clash_score = bump_check.count(pose.conformer)

    # Score docking poses
    from openeye import oedocking
    score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4)
    score.Initialize(receptor)
    for pose in poses:
        pose.docking_score = score.ScoreLigand(pose.conformer)

    # Compute overlap scores
    from openeye import oeshape
    overlap_prep = oeshape.OEOverlapPrep()
    overlap_prep.Prep(refmol)
    shapeFunc = oeshape.OEExactShapeFunc()
    shapeFunc.SetupRef(refmol)
    oeshape_result = oeshape.OEOverlapResults()
    for pose in poses:
        tmpmol = oechem.OEGraphMol(pose.conformer)
        overlap_prep.Prep(tmpmol)
        shapeFunc.Overlap(tmpmol, oeshape_result)
        pose.overlap_score = oeshape_result.GetRefTversky()

    # Filter poses based on top 10% of overlap
    poses = sorted(poses, key= lambda pose : pose.overlap_score)
    poses = poses[int(0.9*len(poses)):]

    # Select the best docking score
    import numpy as np
    poses = sorted(poses, key=lambda pose : pose.docking_score)
    pose = poses[0]
    mol.SetActive(pose.conformer)
    oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score))
    oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score))
    oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score))

    # Convert to single-conformer molecule
    mol = oechem.OEGraphMol(mol)

    return mol
Пример #15
0
def main(argv=[__name__]):
    """
    itf = oechem.OEInterface()
    oechem.OEConfigure(itf, InterfaceData)
    if not oechem.OEParseCommandLine(itf, argv):
        return 1

    oname = itf.GetString("-out")
    iname = itf.GetString("-in")

    ext = oechem.OEGetFileExtension(oname)
    if not oedepict.OEIsRegisteredImageFile(ext):
        oechem.OEThrow.Fatal("Unknown image type!")

    ofs = oechem.oeofstream()
    if not ofs.open(oname):
        oechem.OEThrow.Fatal("Cannot open output file!")

  
    ## INPUT PARAMETERS
    #########################################################
    #########################################################
    
    mm = 'tyk2/og_pdbs'
    qml = 'tyk2/forward_snapshots'
    phase = 'solvent'
    which_ligand = 'old'
    dir_name = iname
    ligand_pdbs_mm = glob.glob(f"{mm}/{dir_name}/{which_ligand}*{phase}.pdb")
    print(len(ligand_pdbs_mm))
    ligand_pdbs_qml = glob.glob(f"{qml}/{dir_name}/{which_ligand}*{phase}.pdb")
    print(len(ligand_pdbs_qml))

    #d = np.load('full_data_dict.npy', allow_pickle=True)
    from_ligand, to_ligand = iname.replace('from', '').replace('to', '').replace('lig', '')
    print(from_ligand)
    print(to_ligand)
    #key1 = (1, 8)
    #key2 = ('solvent', which_ligand)
    #########################################################
    #########################################################

    #d = d.flatten()[0]
    #work = d[key1][key2]
    #print(work)

    
    for i, (mm_pdb_path, ani_pdb_path) in enumerate(zip(ligand_pdbs_mm, ligand_pdbs_qml)):
        print(mm_pdb_path, ani_pdb_path)
        if i == 0:
            MM_mol = createOEMolFromSDF(mm_pdb_path, 0)
            ANI_mol = createOEMolFromSDF(ani_pdb_path, 0)
        else:
            # there absolutely must be a better/faster way of doing this because this is ugly and slow
            MM_mol.NewConf(createOEMolFromSDF(mm_pdb_path, 0))
            ANI_mol.NewConf(createOEMolFromSDF(ani_pdb_path, 0))
"""
    ofs = oechem.oeofstream()
    oname = f"tor_out"
    ext = oechem.OEGetFileExtension(oname)

    mm_pdb_path = f"og_lig0_solvent.pdb"
    ani_pdb_path = f"forward_lig0.solvent.pdb"
    MM_mol = createOEMolFromSDF(mm_pdb_path, 0)
    ANI_mol = createOEMolFromSDF(ani_pdb_path, 0)

    mol = MM_mol
    mol2 = ANI_mol

    for m in [mol, mol2]:
        oechem.OESuppressHydrogens(m)
        oechem.OECanonicalOrderAtoms(m)
        oechem.OECanonicalOrderBonds(m)
        m.Sweep()

    refmol = None

    stag = "dihedral_histogram"
    itag = oechem.OEGetTag(stag)

    nrbins = 20

    print(mol.NumConfs())
    print(mol2.NumConfs())

    get_dihedrals(mol, itag)
    set_dihedral_histograms(mol, itag, nrbins)

    get_dihedrals(mol2, itag)
    #set_weighted_dihedral_histograms(mol2, itag, work, nrbins)
    set_dihedral_histograms(mol2, itag, nrbins)

    width, height = 800, 400
    image = oedepict.OEImage(width, height)

    moffset = oedepict.OE2DPoint(0, 0)
    mframe = oedepict.OEImageFrame(image, width * 0.70, height, moffset)
    doffset = oedepict.OE2DPoint(mframe.GetWidth(), height * 0.30)
    dframe = oedepict.OEImageFrame(image, width * 0.30, height * 0.5, doffset)

    flexibility = True
    colorg = get_color_gradient(nrbins, flexibility)

    opts = oedepict.OE2DMolDisplayOptions(mframe.GetWidth(),
                                          mframe.GetHeight(),
                                          oedepict.OEScale_AutoScale)

    depict_dihedrals(mframe, dframe, mol, mol2, refmol, opts, itag, nrbins,
                     colorg)

    if flexibility:
        lopts = oedepict.OELegendLayoutOptions(
            oedepict.OELegendLayoutStyle_HorizontalTopLeft,
            oedepict.OELegendColorStyle_LightBlue,
            oedepict.OELegendInteractiveStyle_Hover)
        lopts.SetButtonWidthScale(1.2)
        lopts.SetButtonHeightScale(1.2)
        lopts.SetMargin(oedepict.OEMargin_Right, 40.0)
        lopts.SetMargin(oedepict.OEMargin_Bottom, 80.0)

        legend = oedepict.OELegendLayout(image, "Legend", lopts)

        legend_area = legend.GetLegendArea()
        draw_color_gradient(legend_area, colorg)

        oedepict.OEDrawLegendLayout(legend)

    iconscale = 0.5
    oedepict.OEAddInteractiveIcon(image, oedepict.OEIconLocation_TopRight,
                                  iconscale)
    oedepict.OEDrawCurvedBorder(image, oedepict.OELightGreyPen, 10.0)

    oedepict.OEWriteImage(ofs, ext, image)

    return 0
Пример #16
0
def prepare_receptor(complex_pdb_filename, output_basepath, dimer=False):
    """
    Parameters
    ----------
    complex_pdb_filename : str
        The complex PDB file to read in
    output_basepath : str
        Base path for output
    dimer : bool, optional, default=False
        If True, generate the dimer as the biological unit
    """
    import os
    basepath, filename = os.path.split(complex_pdb_filename)
    prefix, extension = os.path.splitext(filename)
    prefix = os.path.join(output_basepath, prefix)

    # Check if receptor already exists
    receptor_filename = f'{prefix}-receptor.oeb.gz'
    thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz'
    if os.path.exists(receptor_filename) and os.path.exists(
            thiolate_receptor_filename):
        return

    # Read in PDB file
    pdbfile_lines = [
        line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line
    ]

    # If monomer is specified, drop crystal symmetry lines
    if not dimer:
        pdbfile_lines = [
            line for line in pdbfile_lines if 'REMARK 350' not in line
        ]

    # Reconstruct PDBFile contents
    pdbfile_contents = ''.join(pdbfile_lines)

    # Read the receptor and identify design units
    from openeye import oespruce, oechem
    from tempfile import NamedTemporaryFile
    with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile:
        pdbfile.write(pdbfile_contents)
        pdbfile.close()
        complex = read_pdb_file(pdbfile.name)
        # TODO: Clean up

    #print('Identifying design units...')
    design_units = list(oespruce.OEMakeDesignUnits(complex))
    if len(design_units) == 1:
        design_unit = design_units[0]
    elif len(design_units) > 1:
        #print('More than one design unit found---using first one')
        design_unit = design_units[0]
    elif len(design_units) == 0:
        raise Exception(f' * No design units found for {complex_pdb_filename}')

    # Prepare the receptor
    #print('Preparing receptor...')
    from openeye import oedocking
    protein = oechem.OEGraphMol()
    design_unit.GetProtein(protein)
    ligand = oechem.OEGraphMol()
    design_unit.GetLigand(ligand)

    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)
    with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))

    # Adjust protonation state of CYS145 to generate thiolate form
    #print('Deprotonating CYS145...')
    pred = oechem.OEAtomMatchResidue(["CYS:145: :A"])
    for atom in protein.GetAtoms(pred):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG:
            oechem.OESuppressHydrogens(atom)
            atom.SetFormalCharge(-1)
            atom.SetImplicitHCount(0)
    # Adjust protonation states
    #print('Re-optimizing hydrogen positions...')
    place_hydrogens_opts = oechem.OEPlaceHydrogensOptions()
    place_hydrogens_opts.SetBypassPredicate(pred)
    protonate_opts = oespruce.OEProtonateDesignUnitOptions(
        place_hydrogens_opts)
    success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts)
    design_unit.GetProtein(protein)

    # Old hacky way to adjust protonation states
    #opts = oechem.OEPlaceHydrogensOptions()
    #opts.SetBypassPredicate(pred)
    #describe = oechem.OEPlaceHydrogensDetails()
    #success = oechem.OEPlaceHydrogens(protein, describe, opts)
    #if success:
    #    oechem.OEUpdateDesignUnit(design_unit, protein, oechem.OEDesignUnitComponents_Protein)

    # Write thiolate form of receptor
    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein-thiolate.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))
Пример #17
0
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

src = oechem.OEGraphMol()
oechem.OESmilesToMol(src, "c1ccccc1")

# make holes in the molecule index space and juggle things around
oechem.OEAddExplicitHydrogens(src)
oechem.OESuppressHydrogens(src)
oechem.OEAddExplicitHydrogens(src)
oechem.OECanonicalOrderAtoms(src)

atommap = oechem.OEAtomArray(src.GetMaxAtomIdx())

dst = oechem.OEGraphMol()
oechem.OECopyMol(dst, src, atommap)

for srcatom in src.GetAtoms():
    dstatom = atommap[srcatom.GetIdx()]
    print(srcatom.GetIdx(), " -> ", dstatom.GetIdx())
# @ </SNIPPET>
Пример #18
0
 def suppressHydrogens(self, oeMol):
     tMol = oechem.OEMol(oeMol) if oeMol else None
     if tMol:
         oechem.OESuppressHydrogens(tMol)
     return tMol
                        help='if specified, will only store minimal information for each molecule (default: False)')
    parser.add_argument('--sort', dest='sort', action='store_true', default=False,
                        help='if specified, will sort according to overlap (default: False)')
    parser.add_argument('--covalent', dest='covalent', action='store_true', default=False,
                        help='if specified, will only consider those with `covalent_warhead=True` (default: False)')

    args = parser.parse_args()

    # Read the docked molecules as CSV
    print('Loading molecules...')
    docked_molecules = list()
    with oechem.oemolistream(args.docked_molecules) as ifs:
        docked_molecules = list()
        molecule = oechem.OEGraphMol()
        while oechem.OEReadMolecule(ifs, molecule):
            oechem.OESuppressHydrogens(molecule)
            docked_molecules.append( molecule.CreateCopy() )
    print(f'{len(docked_molecules)} read')

    if args.covalent:
        print('Only filtering covalent fragments')
        docked_molecules = [molecule for molecule in docked_molecules if oechem.OEGetSDData(molecule, 'covalent_warhead')=='TRUE']
        print(f'{len(docked_molecules)} remain after filtering')

    print('Loading fragments')
    filenames = glob(os.path.join(args.receptor_basedir, 'Mpro-x*-ligand.mol2'))
    fragments = dict()
    for filename in tqdm(filenames):
        with oechem.oemolistream(filename) as ifs:
            fragment = oechem.OEGraphMol()
            while oechem.OEReadMolecule(ifs, fragment):
def prepare_receptor(complex_pdb_filename,
                     output_basepath,
                     dimer=False,
                     retain_water=False):
    """
    Parameters
    ----------
    complex_pdb_filename : str
        The complex PDB file to read in
    output_basepath : str
        Base path for output
    dimer : bool, optional, default=False
        If True, generate the dimer as the biological unit
    retain_water : bool, optional, default=False
        If True, will retain waters
    """
    # Check whether this is a diamond SARS-CoV-2 Mpro structure or not
    import re
    is_diamond_structure = (re.search('-x\d+_', complex_pdb_filename)
                            is not None)

    import os
    basepath, filename = os.path.split(complex_pdb_filename)
    prefix, extension = os.path.splitext(filename)
    prefix = os.path.join(output_basepath, prefix)

    # Check if receptor already exists
    receptor_filename = f'{prefix}-receptor.oeb.gz'
    thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz'
    if os.path.exists(receptor_filename) and os.path.exists(
            thiolate_receptor_filename):
        return

    # Read in PDB file, skipping UNK atoms (left over from processing covalent ligands)
    pdbfile_lines = [
        line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line
    ]

    # Check if biological symmetry header is present
    has_biological_symmetry_header = False
    for line in pdbfile_lines:
        if 'REMARK 350' in line:
            has_biological_symmetry_header = True
            break

    # Prepend REMARK 350 (biological symmetry) header lines for Mpro (from 5RGG) if not present
    if is_diamond_structure and (not has_biological_symmetry_header):
        pdbfile_lines = [
            line + '\n' for line in BIOLOGICAL_SYMMETRY_HEADER.split('\n')
        ] + pdbfile_lines

    # If monomer is specified, drop crystal symmetry lines
    if not dimer:
        pdbfile_lines = [
            line for line in pdbfile_lines if 'REMARK 350' not in line
        ]

    # Filter out waters
    if not retain_water:
        pdbfile_lines = [line for line in pdbfile_lines if 'HOH' not in line]

    # Filter out LINK records to covalent inhibitors so we can model non-covalent complex
    pdbfile_lines = [line for line in pdbfile_lines if 'LINK' not in line]

    # Reconstruct PDBFile contents
    pdbfile_contents = ''.join(pdbfile_lines)

    # Append SEQRES to all structures if they do not have it
    seqres = """\
SEQRES   1 A  306  SER GLY PHE ARG LYS MET ALA PHE PRO SER GLY LYS VAL
SEQRES   2 A  306  GLU GLY CYS MET VAL GLN VAL THR CYS GLY THR THR THR
SEQRES   3 A  306  LEU ASN GLY LEU TRP LEU ASP ASP VAL VAL TYR CYS PRO
SEQRES   4 A  306  ARG HIS VAL ILE CYS THR SER GLU ASP MET LEU ASN PRO
SEQRES   5 A  306  ASN TYR GLU ASP LEU LEU ILE ARG LYS SER ASN HIS ASN
SEQRES   6 A  306  PHE LEU VAL GLN ALA GLY ASN VAL GLN LEU ARG VAL ILE
SEQRES   7 A  306  GLY HIS SER MET GLN ASN CYS VAL LEU LYS LEU LYS VAL
SEQRES   8 A  306  ASP THR ALA ASN PRO LYS THR PRO LYS TYR LYS PHE VAL
SEQRES   9 A  306  ARG ILE GLN PRO GLY GLN THR PHE SER VAL LEU ALA CYS
SEQRES  10 A  306  TYR ASN GLY SER PRO SER GLY VAL TYR GLN CYS ALA MET
SEQRES  11 A  306  ARG PRO ASN PHE THR ILE LYS GLY SER PHE LEU ASN GLY
SEQRES  12 A  306  SER CYS GLY SER VAL GLY PHE ASN ILE ASP TYR ASP CYS
SEQRES  13 A  306  VAL SER PHE CYS TYR MET HIS HIS MET GLU LEU PRO THR
SEQRES  14 A  306  GLY VAL HIS ALA GLY THR ASP LEU GLU GLY ASN PHE TYR
SEQRES  15 A  306  GLY PRO PHE VAL ASP ARG GLN THR ALA GLN ALA ALA GLY
SEQRES  16 A  306  THR ASP THR THR ILE THR VAL ASN VAL LEU ALA TRP LEU
SEQRES  17 A  306  TYR ALA ALA VAL ILE ASN GLY ASP ARG TRP PHE LEU ASN
SEQRES  18 A  306  ARG PHE THR THR THR LEU ASN ASP PHE ASN LEU VAL ALA
SEQRES  19 A  306  MET LYS TYR ASN TYR GLU PRO LEU THR GLN ASP HIS VAL
SEQRES  20 A  306  ASP ILE LEU GLY PRO LEU SER ALA GLN THR GLY ILE ALA
SEQRES  21 A  306  VAL LEU ASP MET CYS ALA SER LEU LYS GLU LEU LEU GLN
SEQRES  22 A  306  ASN GLY MET ASN GLY ARG THR ILE LEU GLY SER ALA LEU
SEQRES  23 A  306  LEU GLU ASP GLU PHE THR PRO PHE ASP VAL VAL ARG GLN
SEQRES  24 A  306  CYS SER GLY VAL THR PHE GLN
"""
    has_seqres = 'SEQRES' in pdbfile_contents
    if not has_seqres:
        #print('Adding SEQRES')
        pdbfile_contents = seqres + pdbfile_contents

    # Read the receptor and identify design units
    from openeye import oespruce, oechem
    from tempfile import NamedTemporaryFile
    with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile:
        pdbfile.write(pdbfile_contents)
        pdbfile.close()
        complex = read_pdb_file(pdbfile.name)
        # TODO: Clean up

    # Strip protons from structure to allow SpruceTK to add these back
    # See: 6wnp, 6wtj, 6wtk, 6xb2, 6xqs, 6xqt, 6xqu, 6m2n
    #print('Suppressing hydrogens')
    #print(f' Initial: {sum([1 for atom in complex.GetAtoms()])} atoms')
    for atom in complex.GetAtoms():
        if atom.GetAtomicNum() > 1:
            oechem.OESuppressHydrogens(atom)
    #print(f' Final: {sum([1 for atom in complex.GetAtoms()])} atoms')

    # Delete and rebuild C-terminal residue because Spruce causes issues with this
    # See: 6m2n 6lze
    #print('Deleting C-terminal residue O')
    pred = oechem.OEIsCTerminalAtom()
    for atom in complex.GetAtoms():
        if pred(atom):
            for nbor in atom.GetAtoms():
                if oechem.OEGetPDBAtomIndex(nbor) == oechem.OEPDBAtomName_O:
                    complex.DeleteAtom(nbor)

    #pred = oechem.OEAtomMatchResidue(["GLN:306:.*:.*:.*"])
    #for atom in complex.GetAtoms(pred):
    #    if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_O:
    #        print('Deleting O')
    #        complex.DeleteAtom(atom)

    #het = oespruce.OEHeterogenMetadata()
    #het.SetTitle("LIG")  # real ligand 3 letter code
    #het.SetID("CovMoonShot1234")  # in case you have corporate IDs
    #het.SetType(oespruce.OEHeterogenType_Ligand)
    #   mdata.AddHeterogenMetadata(het)

    #print('Identifying design units...')
    # Produce zero design units if we fail to protonate

    # Log warnings
    errfs = oechem.oeosstream(
    )  # create a stream that writes internally to a stream
    oechem.OEThrow.SetOutputStream(errfs)
    oechem.OEThrow.Clear()
    oechem.OEThrow.SetLevel(
        oechem.OEErrorLevel_Verbose)  # capture verbose error output

    opts = oespruce.OEMakeDesignUnitOptions()
    #print(f'ligand atoms: min {opts.GetSplitOptions().GetMinLigAtoms()}, max {opts.GetSplitOptions().GetMaxLigAtoms()}')
    opts.GetSplitOptions().SetMinLigAtoms(
        7)  # minimum fragment size (in heavy atoms)

    mdata = oespruce.OEStructureMetadata()
    opts.GetPrepOptions().SetStrictProtonationMode(True)

    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions(
    ).SetAllowTruncate(False)
    # Build loops and sidechains
    opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True)
    opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True)

    # Don't flip Gln189
    #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"])
    pred = oechem.OEAtomMatchResidue(["GLN:189:.*:.*:.*"])
    protonate_opts = opts.GetPrepOptions().GetProtonateOptions()
    place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions()
    #place_hydrogens_opts.SetBypassPredicate(pred)
    place_hydrogens_opts.SetNoFlipPredicate(pred)
    #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts)
    #opts.GetPrepOptions().SetProtonateOptions(protonate_options);

    # Make design units
    design_units = list(oespruce.OEMakeDesignUnits(complex, mdata, opts))

    # Restore error stream
    oechem.OEThrow.SetOutputStream(oechem.oeerr)

    # Capture the warnings to a string
    warnings = errfs.str().decode("utf-8")

    if len(design_units) >= 1:
        design_unit = design_units[0]
        print('')
        print('')
        print(f'{complex_pdb_filename} : SUCCESS')
        print(warnings)
    elif len(design_units) == 0:
        print('')
        print('')
        print(f'{complex_pdb_filename} : FAILURE')
        print(warnings)
        msg = f'No design units found for {complex_pdb_filename}\n'
        msg += warnings
        msg += '\n'
        raise Exception(msg)

    # Prepare the receptor
    #print('Preparing receptor...')
    from openeye import oedocking
    protein = oechem.OEGraphMol()
    design_unit.GetProtein(protein)
    ligand = oechem.OEGraphMol()
    design_unit.GetLigand(ligand)

    # Create receptor and other files
    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)
    with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))

    # Adjust protonation state of CYS145 to generate thiolate form
    #print('Deprotonating CYS145...') # DEBUG
    #pred = oechem.OEAtomMatchResidue(["CYS:145: :A"])
    pred = oechem.OEAtomMatchResidue(["CYS:145:.*:.*:.*"])
    place_hydrogens_opts.SetBypassPredicate(pred)
    for atom in protein.GetAtoms(pred):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG:
            #print('Modifying CYS 145 SG')
            oechem.OESuppressHydrogens(atom)
            atom.SetFormalCharge(-1)
            atom.SetImplicitHCount(0)
    #print('Protonating HIS41...') # DEBUG
    #pred = oechem.OEAtomMatchResidue(["HIS:41: :A"])
    pred = oechem.OEAtomMatchResidue(["HIS:41:.*:.*:.*"])
    place_hydrogens_opts.SetBypassPredicate(pred)
    for atom in protein.GetAtoms(pred):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_ND1:
            #print('Protonating HIS 41 ND1')
            oechem.OESuppressHydrogens(atom)  # strip hydrogens from residue
            atom.SetFormalCharge(+1)
            atom.SetImplicitHCount(1)
    # Update the design unit with the modified formal charge for CYS 145 SG
    oechem.OEUpdateDesignUnit(design_unit, protein,
                              oechem.OEDesignUnitComponents_Protein)

    # Don't flip Gln189
    #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"])
    #protonate_opts = opts.GetPrepOptions().GetProtonateOptions();
    #place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions()
    #place_hydrogens_opts.SetNoFlipPredicate(pred)

    # Adjust protonation states
    #print('Re-optimizing hydrogen positions...') # DEBUG
    #place_hydrogens_opts = oechem.OEPlaceHydrogensOptions()
    #place_hydrogens_opts.SetBypassPredicate(pred)
    #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts)
    success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts)
    design_unit.GetProtein(protein)

    # Write thiolate form of receptor
    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein-thiolate.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))
    def GetBestOverlays(self, querymolstr, options, iformat, oformat):
        """ Return a string of the format specified by 'oformat'
        containing nhits overlaid confomers using querymolstr as the
        query interpretted as iformat.

        querymolstr - a string containing a molecule to use as the query
        options - an instance of OEShapeDatabaseOptions
        iformat - a string representing the file extension to parse the querymolstr as.
                  Note: old clients could be passing .sq files, so
                  iformat == '.oeb' will try to interpret the file as
                  a .sq file.
        oformat - file format to write the results as
        """
        timer = oechem.OEWallTimer()

        # make sure to wait for the load to finish
        blocking = True
        loaded = self.IsLoaded(blocking)
        assert loaded

        if iformat.startswith(".sq"):
            query = ReadShapeQuery(querymolstr)
        else:
            # read in query
            qfs = oechem.oemolistream()
            qfs = SetupStream(qfs, iformat)
            if not qfs.openstring(querymolstr):
                raise ValueError("Unable to open input molecule string")

            query = oechem.OEGraphMol()
            if not oechem.OEReadMolecule(qfs, query):
                if iformat == ".oeb":  # could be an old client trying to send a .sq file.
                    query = ReadShapeQuery(querymolstr)
                else:
                    raise ValueError(
                        "Unable to read a molecule from the string of format '%s'"
                        % iformat)

        ofs = oechem.oemolostream()
        ofs = SetupStream(ofs, oformat)
        if not ofs.openstring():
            raise ValueError("Unable to openstring for output")

        # do we only want shape based results?

        # this is a "Write" lock to be paranoid and not overload the GPU
        self.rwlock.AcquireWriteLock()
        try:
            # do search
            scores = self.shapedb.GetSortedScores(query, options)
            sys.stderr.write("%f seconds to do search\n" % timer.Elapsed())
        finally:
            self.rwlock.ReleaseWriteLock()

        timer.Start()
        # write results
        for score in scores:
            mcmol = oechem.OEMol()
            if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()):
                oechem.OEThrow.Warning(
                    "Can't retrieve molecule %i from the OEMolDatabase, "
                    "skipping..." % score.GetMolIdx())
                continue
            # remove hydrogens to make output smaller, this also
            # ensures OEPrepareFastROCSMol will have the same output
            oechem.OESuppressHydrogens(mcmol)

            mol = oechem.OEGraphMol(
                mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx())))
            oechem.OECopySDData(mol, mcmol)

            if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto:
                oechem.OESetSDData(mol, "ShapeTanimoto",
                                   "%.4f" % score.GetShapeTanimoto())
                oechem.OESetSDData(mol, "ColorTanimoto",
                                   "%.4f" % score.GetColorTanimoto())
                oechem.OESetSDData(mol, "TanimotoCombo",
                                   "%.4f" % score.GetTanimotoCombo())
            else:
                oechem.OESetSDData(mol, "ShapeTversky",
                                   "%.4f" % score.GetShapeTversky())
                oechem.OESetSDData(mol, "ColorTversky",
                                   "%.4f" % score.GetColorTversky())
                oechem.OESetSDData(mol, "TverskyCombo",
                                   "%.4f" % score.GetTverskyCombo())

            if options.GetInitialOrientation(
            ) != oefastrocs.OEFastROCSOrientation_Inertial:
                oechem.OEAddSDData(
                    mol, "Opt. Starting Pos.",
                    GetAltStartsString(options.GetInitialOrientation()))

            score.Transform(mol)

            oechem.OEWriteMolecule(ofs, mol)

        output = ofs.GetString()
        sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed())
        sys.stderr.flush()
        ofs.close()

        return output
Пример #22
0
def _expand_states(molecules, enumerate='protonation', max_states=200, suppress_hydrogen=True, reasonable=True,
                   carbon_hybridization=True, level=0, verbose=True):
    """
    Expand the state specified by enumerate variable

    Parameters
    ----------
    molecules: OEMol or list of OEMol
        molecule to expand states
    enumerate: str, optional, default='protonation'
        Kind of state to enumerate. Choice of protonation, tautomers, stereoiserms
    suppress_hydrogen: bool, optional, default=True
        If True, will suppress explicit hydrogen
    reasonable: bool, optional, default=True
        If True, will rank tautomers by the most reasonable energetically
    carbon_hybridization: bool, optional, default=True
        If True, will allow carbon to change hybridization
    max_states: int, optional, default=200
    verbose: Bool, optional, deault=TRue

    Returns
    -------
    states: list of OEMol
        enumerated states

    """
    if type(molecules) != type(list()):
        molecules = [molecules]

    states = list()
    for molecule in molecules:
        ostream = oechem.oemolostream()
        ostream.openstring()
        ostream.SetFormat(oechem.OEFormat_SDF)
        states_enumerated = 0
        if suppress_hydrogen:
            oechem.OESuppressHydrogens(molecule)
        if enumerate == 'protonation':
            formal_charge_options = oequacpac.OEFormalChargeOptions()
            formal_charge_options.SetMaxCount(max_states)
            formal_charge_options.SetVerbose(verbose)
            if verbose:
                logger().debug("Enumerating protonation states...")
            for protonation_state in oequacpac.OEEnumerateFormalCharges(molecule, formal_charge_options):
                states_enumerated += 1
                oechem.OEWriteMolecule(ostream, protonation_state)
                states.append(protonation_state)
        if enumerate == 'tautomers':
            #max_zone_size = molecule.GetMaxAtomIdx()
            tautomer_options = oequacpac.OETautomerOptions()
            tautomer_options.SetMaxTautomersGenerated(max_states)
            tautomer_options.SetLevel(level)
            tautomer_options.SetRankTautomers(reasonable)
            tautomer_options.SetCarbonHybridization(carbon_hybridization)
            #tautomer_options.SetMaxZoneSize(max_zone_size)
            tautomer_options.SetApplyWarts(True)
            if verbose:
                logger().debug("Enumerating tautomers...")
            for tautomer in oequacpac.OEEnumerateTautomers(molecule, tautomer_options):
                states_enumerated += 1
                states.append(tautomer)
        if enumerate == 'stereoisomers':
            if verbose:
                logger().debug("Enumerating stereoisomers...")
            for enantiomer in oeomega.OEFlipper(molecule, max_states, True):
                states_enumerated += 1
                enantiomer = oechem.OEMol(enantiomer)
                oechem.OEWriteMolecule(ostream, enantiomer)
                states.append(enantiomer)

    return states
Пример #23
0
def generate_fragalysis(
    series: CompoundSeriesAnalysis,
    fragalysis_config: FragalysisConfig,
    results_path: str,
) -> None:
    """
    Generate input and upload to fragalysis from fragalysis_config

    Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera​

    Parameters
    ----------
    series : CompoundSeriesAnalysis
        Analysis results
    fragalysis_config : FragalysisConfig
        Fragalysis input paramters
    results_path : str
        The path to the results
    """

    import os
    from openeye import oechem
    from rich.progress import track

    # make a directory to store fragalysis upload data
    fa_path = os.path.join(results_path, "fragalysis_upload")
    os.makedirs(fa_path, exist_ok=True)

    ref_mols = fragalysis_config.ref_mols  # e.g. x12073
    ref_pdb = fragalysis_config.ref_pdb  # e.g. x12073

    # set paths
    ligands_path = os.path.join(results_path,
                                fragalysis_config.ligands_filename)
    fa_ligands_path = os.path.join(fa_path,
                                   fragalysis_config.fragalysis_sdf_filename)

    # copy sprint generated sdf to new name for fragalysis input
    from shutil import copyfile

    copyfile(ligands_path, fa_ligands_path)

    # Read ligand poses
    molecules = []

    with oechem.oemolistream(ligands_path) as ifs:
        oemol = oechem.OEGraphMol()
        while oechem.OEReadMolecule(ifs, oemol):
            molecules.append(oemol.CreateCopy())
    print(f"{len(molecules)} ligands read")

    # Get zipped PDB if specified
    if fragalysis_config.ref_pdb == "references.zip":
        consolidate_protein_snapshots_into_pdb(
            oemols=molecules,
            results_path=results_path,
            pdb_filename="references.pdb",
            fragalysis_input=True,
            fragalysis_path=fa_path,
        )

    descriptions = {
        "DDG (kcal/mol)":
        "Relative computed free energy difference",
        "dDDG (kcal/mol)":
        "Uncertainty in computed relative free energy difference",
        "ref_mols":
        "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)",
        "ref_pdb":
        "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose",
        "original SMILES":
        "the original SMILES of the compound before any computation was carried out",
    }

    # Preprocess molecules
    tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"}
    index = 0
    for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."):
        # Remove hydogrens
        oechem.OESuppressHydrogens(oemol, True)
        # Get original SMILES
        original_smiles = oechem.OEGetSDData(oemol, "SMILES")
        # Remove irrelevant SD tags
        for sdpair in oechem.OEGetSDDataPairs(oemol):
            tag = sdpair.GetTag()
            value = sdpair.GetValue()
            if tag not in tags_to_retain:
                oechem.OEDeleteSDData(oemol, tag)
        # Add required SD tags
        oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols)

        # If ref_pdb is zip file, use this
        if fragalysis_config.ref_pdb == "references.zip":
            oechem.OESetSDData(oemol, "ref_pdb",
                               f"references/references_{index}.pdb"),
            index += 1
        else:
            oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb)

        oechem.OESetSDData(oemol, "original SMILES", original_smiles)

    # Add initial blank molecule (that includes distances)
    import copy
    from datetime import datetime

    # Find a molecule that includes distances, if present
    oemol = molecules[0].CreateCopy()
    # Add descriptions to each SD field
    for sdpair in oechem.OEGetSDDataPairs(oemol):
        tag = sdpair.GetTag()
        value = sdpair.GetValue()
        oechem.OESetSDData(oemol, tag, descriptions[tag])

    # Add other fields
    oemol.SetTitle("ver_1.2")
    oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url)
    oechem.OESetSDData(oemol, "submitter_name",
                       fragalysis_config.submitter_name)
    oechem.OESetSDData(oemol, "submitter_email",
                       fragalysis_config.submitter_email)
    oechem.OESetSDData(oemol, "submitter_institution",
                       fragalysis_config.submitter_institution)
    oechem.OESetSDData(oemol, "generation_date",
                       datetime.today().strftime("%Y-%m-%d"))
    oechem.OESetSDData(oemol, "method", fragalysis_config.method)
    molecules.insert(0, oemol)  # make it first molecule

    # Write sorted molecules
    with oechem.oemolostream(fa_ligands_path) as ofs:
        for oemol in track(molecules,
                           description="Writing Fragalysis SDF file..."):
            oechem.OEWriteMolecule(ofs, oemol)

    # TODO add check SDF step here?

    # Upload to fragalysis
    print("Uploading to Fragalysis...")
    print(f"--> Target: {fragalysis_config.target_name}")

    from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL

    if fragalysis_config.new_upload:
        update_set = "None"  # new upload
        print(f"--> Uploading a new set")
    else:
        update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" +
                      "".join(fragalysis_config.method.split()))

        print(f"--> Updating set: {update_set}")

    if fragalysis_config.ref_pdb == "references.zip":
        pdb_zip_path = os.path.join(fa_path, "references.zip")
    else:
        pdb_zip_path = None

    taskurl = update_cset(
        REQ_URL,
        target_name=fragalysis_config.target_name,
        sdf_path=fa_ligands_path,
        pdb_zip_path=pdb_zip_path,
        update_set=update_set,
        upload_key=fragalysis_config.upload_key,
        submit_choice=1,
        add=False,
    )

    print(f"Upload complete, check upload status: {taskurl}")
Пример #24
0
            'CYS145-warhead dist stddev (A)':
            'is the standard deviation (in Angstroms) of the distance between CYS145 SG and any covalent warhead heavy atom during 10 ns simulation, where large values may indicate the warhead samples a variety of distances from CYS145 (covalent inhibitors only)',
            'ref_mols':
            'a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)',
            'ref_pdb':
            'The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose',
            'original SMILES':
            'the original SMILES of the compound before any computation was carried out',
        }

        for molecule in tqdm(docked_molecules):
            for sdpair in oechem.OEGetSDDataPairs(molecule):
                tag = sdpair.GetTag()
                value = sdpair.GetValue()
                # Remove hydrogens
                oechem.OESuppressHydrogens(molecule, True)
                # Remap SD tags
                if tag == 'Hybrid2':
                    oechem.OESetSDData(molecule, 'Chemgauss4', value)
                    oechem.OEDeleteSDData(molecule, tag)
                elif tag == 'fragments':
                    value = ','.join(
                        [f'{fragment}_0' for fragment in value.split(',')])
                    oechem.OESetSDData(molecule, 'ref_mols', value)
                    oechem.OEDeleteSDData(molecule, tag)
                elif tag == 'docked_fragment':
                    oechem.OESetSDData(molecule, 'ref_pdb', value + '_0')
                    oechem.OEDeleteSDData(molecule, tag)
                elif tag == 'covalent_distance_min':
                    oechem.OESetSDData(molecule,
                                       'CYS145-warhead dist minimum (A)',
Пример #25
0
def create_dyad(
        state: str, docking_system: DockingSystem,
        design_unit: oechem.OEDesignUnit,
        options: oespruce.OEMakeDesignUnitOptions) -> oechem.OEDesignUnit:

    protonate_opts = options.GetPrepOptions().GetProtonateOptions()
    place_h_opts = protonate_opts.GetPlaceHydrogensOptions()
    protein = docking_system.protein

    if state == 'His41(+) Cys145(-1)':
        atoms = get_atoms(protein, "CYS:145:.*:.*:.*", "SG")
        for atom in atoms:
            if atom.GetExplicitHCount() == 1:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(0)
                atom.SetFormalCharge(-1)

        atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "ND1")
        for atom in atoms:
            if atom.GetExplicitHCount() == 0:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(1)
                atom.SetFormalCharge(+1)

        atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "NE2")
        for atom in atoms:
            if atom.GetExplicitHCount() == 0:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(1)
                atom.SetFormalCharge(+1)

    elif state == 'His41(0) Cys145(0)':
        atoms = get_atoms(protein, "CYS:145:.*:.*:.*", "SG")
        for atom in atoms:
            if atom.GetExplicitHCount() == 0:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(1)
                atom.SetFormalCharge(0)

        atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "ND1")
        for atom in atoms:
            if atom.GetFormalCharge() == 1:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(0)
                atom.SetFormalCharge(0)

        atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "NE2")
        for atom in atoms:
            if atom.GetFormalCharge() == 1:
                oechem.OESuppressHydrogens(
                    atom)  # strip hydrogens from residue
                atom.SetImplicitHCount(0)
                atom.SetFormalCharge(0)

    else:
        ValueError(
            "dyad_state must be one of ['His41(0) Cys145(0)', 'His41(+) Cys145(-)']"
        )

    place_h_opts = bypass_atoms(["HIS:41:.*:.*:.*", "CYS:145:.*:.*:.*"],
                                place_h_opts)
    oechem.OEAddExplicitHydrogens(protein)
    oechem.OEUpdateDesignUnit(design_unit, protein,
                              oechem.OEDesignUnitComponents_Protein)
    oespruce.OEProtonateDesignUnit(design_unit, protonate_opts)
    return design_unit
Пример #26
0
def strip_hydrogens(complex: oechem.OEGraphMol) -> oechem.OEGraphMol:
    for atom in complex.GetAtoms():
        if atom.GetAtomicNum() > 1:
            oechem.OESuppressHydrogens(atom)
    return complex