def keep_molecule(mol, max_heavy_atoms = 100,
        remove_smirks = list(), max_metals = 0, elements = [], check_type = None):
    if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals:
        return False
    if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms:
        return False
    # Remove very small molecules that are not interesting
    if oechem.OECount(mol, oechem.OEIsHeavy()) < 5:
        return False
    for smirks in remove_smirks:
        qmol = oechem.OEQMol()
        if not oechem.OEParseSmarts(qmol, smirks):
            continue
        ss = oechem.OESubSearch(qmol)
        matches = [match for match in ss.Match(mol, False)]
        if len(matches) > 0:
            return False
    if elements != None:
        elements_list = read_Elements(elements)
        if not check_element(mol, elements_list):
            return False
    if check_type != None:
        types = check_type.split(",")
        if not check_atomtype(mol, types):
            return False
    return check_valence(mol)
示例#2
0
def extract_molecule_torsion_data(parent_mol, frag_mols=None):
    """
    extract dihedral angle associated with each torsion motif in the input molecule
    Torsion motifs are represented using generic modified inchi (central two atoms)
    and specific modified inchi (4 torsion atoms)

    @param parent_mol:
    @type parent_mol: oechem.OEGraphMol
    @return: tuple(str, dict[str, list[float]])
    """
    if frag_mols is None:
        frag_mols = get_molecule_torsion_fragments(parent_mol)

    torsion_data = collections.defaultdict(list)
    for frag_mol in frag_mols:
        inchi_key = oechem.OECreateInChIKey(frag_mol)
        atom_map = get_fragment_to_parent_atom_mapping(parent_mol, frag_mol)

        try:
            _, b, c, _ = get_torsion_oeatom_list(frag_mol)

            for a in b.GetAtoms(oechem.OEIsHeavy()):
                for d in c.GetAtoms(oechem.OEIsHeavy()):
                    if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx():
                        continue

                    ap = atom_map[a]
                    bp = atom_map[b]
                    cp = atom_map[c]
                    dp = atom_map[d]

                    if (a.GetAtomicNum() == ap.GetAtomicNum()
                            and b.GetAtomicNum() == bp.GetAtomicNum()
                            and c.GetAtomicNum() == cp.GetAtomicNum()
                            and d.GetAtomicNum() == dp.GetAtomicNum()):
                        angle = (
                            oechem.OEGetTorsion(parent_mol, ap, bp, cp, dp) *
                            oechem.Rad2Deg)
                        torsion_inchi = inchi_key + get_modified_inchi_key(
                            frag_mol, [a, b, c, d])

                        torsion_data[torsion_inchi].append(
                            (ap.GetIdx(), bp.GetIdx(), cp.GetIdx(),
                             dp.GetIdx(), angle))

        except Exception as e:
            logging.warning(e)
            continue

    parent_inchi = get_modified_molecule_inchi(parent_mol)

    return (parent_inchi, torsion_data)
def keep_molecule(mol, remove_smirks = list()):
    """
    Determines if the molecule will be stored.

    Parameters
    ----------
    mol - OEMol
    remove_smirks - list of SMIRKS strings you don't want in your molecules

    Returns
    -------
    boolean - True (molecule meets the requirements below)
            - has no metal atoms
            - no more than 200 heavy atoms
            - has none of the SMIRKS in remove_smirks list
            - molecule has appropriate valency
    """
    # Check number of metal atoms
    if oechem.OECount(mol, oechem.OEIsMetal()) > 0:
        return False
    # Check number of heavy atoms
    if oechem.OECount(mol, oechem.OEIsHeavy()) > 200:
        return False
    # Check for patterns in remove smirks list
    for smirks in remove_smirks:
        qmol = oechem.OEQMol()
        if not oechem.OEParseSmarts(qmol, smirks):
            continue
        ss = oechem.OESubSearch(qmol)
        matches = [match for match in ss.Match(mol, False)]
        if len(matches) > 0:
            return False
    # check valency
    return check_valence(mol)
示例#4
0
    def assign_canonical_idx(mol):
        for atom in mol.GetAtoms():
            atom.SetMapIdx(0)
        for map_idx, atom in enumerate(mol.GetAtoms(oechem.OEIsHeavy())):
            atom.SetMapIdx(map_idx + 1)

        can_smiles = oechem.OEMolToSmiles(mol)

        can_mol = oechem.OEGraphMol()
        # smiles_opt = OEParseSmilesOptions(canon=True)
        # OEParseSmiles(can_mol, can_smiles, smiles_opt)
        oechem.OESmilesToMol(can_mol, can_smiles)

        for can_atom in can_mol.GetAtoms(oechem.OEIsHeavy()):
            atom = mol.GetAtom(oechem.OEHasMapIdx(can_atom.GetMapIdx()))
            atom.SetData(CANONICAL_IDX_TAG, can_atom.GetIdx())
示例#5
0
def get_dihedrals(mol, itag):
    """
    Iterates over rotatable bonds and identifies their dihedral
    atoms. These atoms are added to the molecule in a group
    using the given tag.
    :type mol: oechem.OEMol
    :type itag: int
    :return: Number of dihedral angles identified
    :rtype: int
    """
    nrdihedrals = 0
    for bond in mol.GetBonds(IsRotatableOrMacroCycleBond()):
        atomB = bond.GetBgn()
        atomE = bond.GetEnd()

        neighB = None
        neighE = None

        for atom in atomB.GetAtoms(oechem.OEIsHeavy()):
            if atom != atomE:
                neighB = atom
                break
        for atom in atomE.GetAtoms(oechem.OEIsHeavy()):
            if atom != atomB:
                neighE = atom
                break

        if neighB is None or neighE is None:
            continue

        atomorder = [neighB, atomB, atomE, neighE]
        bondorder = [
            mol.GetBond(neighB, atomB), bond,
            mol.GetBond(neighE, atomE)
        ]

        if neighB.GetIdx() < neighE.GetIdx():
            atomorder.reverse()
            bondorder.reverse()

        atoms = oechem.OEAtomVector(atomorder)
        bonds = oechem.OEBondVector(bondorder)

        nrdihedrals += 1
        mol.NewGroup(itag, atoms, bonds)

    return nrdihedrals
def main(argv=[__name__]):
    if len(argv) != 4:
        oechem.OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0])

    # input - preserve rotor-offset-compression
    ifs = oechem.oemolistream()
    oechem.OEPreserveRotCompress(ifs)

    ifname = argv[1]
    if not ifs.open(ifname):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    # output
    prefix = argv[2]
    ext = oechem.OEGetFileExtension(prefix)
    extstrt = len(prefix)
    if ext:
        extstrt = -(len(ext) + 1)
    else:
        ext = oechem.OEGetFileExtension(ifname)
    base = prefix[:extstrt]
    fmt = base + "_%i." + ext

    nservers = int(argv[3])
    outstrms = []
    for i in range(1, nservers + 1):
        ofs = oechem.oemolostream()
        if not ofs.open(fmt % i):
            oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])

        outstrms.append(ofs)

    dots = oechem.OEDots(10000, 200, "molecules")
    for mol in ifs.GetOEMols():
        oefastrocs.OEPrepareFastROCSMol(mol)

        nhvyatoms = oechem.OECount(mol, oechem.OEIsHeavy())

        ofs = outstrms[nhvyatoms % nservers]
        oechem.OEWriteMolecule(ofs, mol)

        dots.Update()

    dots.Total()

    for strm in outstrms:
        fname = strm.GetFileName()
        strm.close()
        oechem.OEThrow.Info("Indexing %s" % fname)
        if not oechem.OECreateMolDatabaseIdx(fname):
            oechem.OEThrow.Fatal("Failed to index %s" % fname)

    return 0
    def heavy_atom_count(self):
        """ Counts the number of heavy atoms in an oemol

        Parameters
        ----------


        Returns
        -------
        int, number of heavy atoms in molecule
        """
        return oechem.OECount(self.mol, oechem.OEIsHeavy())
示例#8
0
def GetMinAndMaxBFactor(ligand, protein, maxdistance=4.0):

    minbfactor = float("inf")
    maxbfactor = float("-inf")

    # Ligand atoms
    for latom in ligand.GetAtoms(oechem.OEIsHeavy()):
        res = oechem.OEAtomGetResidue(latom)
        minbfactor = min(minbfactor, res.GetBFactor())
        maxbfactor = max(maxbfactor, res.GetBFactor())

    # Protein atoms close to ligand atoms
    nn = oechem.OENearestNbrs(protein, maxdistance)
    for latom in ligand.GetAtoms(oechem.OEIsHeavy()):
        for neigh in nn.GetNbrs(latom):
            ratom = neigh.GetBgn()
            res = oechem.OEAtomGetResidue(ratom)
            if ConsiderResidueAtom(ratom, res):
                minbfactor = min(minbfactor, res.GetBFactor())
                maxbfactor = max(maxbfactor, res.GetBFactor())

    return minbfactor, maxbfactor
示例#9
0
def get_labeled_mol(smiles, label='heavy'):
    """
    returns an OEMol with heavy atoms labeled with a specific indice
    """
    mol = oechem.OEMol()
    if not oechem.OESmilesToMol(mol, smiles):
        print("Couldn't parse smiles (%s) returning None" % smiles)
        return None

    for idx, a in enumerate(mol.GetAtomIter(oechem.OEIsHeavy())):
        a.SetData('heavy', idx + 1)

    return mol
示例#10
0
    def MarkBridgingAtoms(BRIDGE_ATOM_IDX, mol, torsionSet):
        NorOorS = oechem.OEOrAtom(
            oechem.OEOrAtom(oechem.OEIsNitrogen(), oechem.OEIsOxygen()),
            oechem.OEIsSulfur())
        for atom in mol.GetAtoms(
                oechem.OEAndAtom(oechem.OEHasMapIdx(2), NorOorS)):
            for nbr in atom.GetAtoms(oechem.OEIsHeavy()):
                if not torsionSet.HasAtom(nbr):
                    if nbr.GetMapIdx() == 0:
                        torsionSet.AddAtom(nbr)
                        if nbr.GetHvyDegree() == 1:
                            nbr.SetMapIdx(3)
                            continue

                        nbr.SetMapIdx(BRIDGE_ATOM_IDX)
示例#11
0
def SetAverageBFactorOfNearbyProteinAtoms(ligand, protein, itag, maxdistance=4.0):

    nn = oechem.OENearestNbrs(protein, maxdistance)
    for latom in ligand.GetAtoms(oechem.OEIsHeavy()):
        sumbfactor = 0.0
        neighs = []
        for neigh in nn.GetNbrs(latom):
            ratom = neigh.GetBgn()
            res = oechem.OEAtomGetResidue(ratom)
            if ConsiderResidueAtom(ratom, res):
                sumbfactor += res.GetBFactor()
                neighs.append(ratom)

        avgbfactor = 0.0
        if len(neighs) > 0:
            avgbfactor = sumbfactor / len(neighs)
        latom.SetDoubleData(itag, avgbfactor)
示例#12
0
    def GetFuncGroups(mol):
        '''
        :param mol:
        :return:
        '''
        funcGrps = []
        for funcGrp in oemedchem.OEGetFuncGroupFragments(mol):
            if oechem.OECount(funcGrp, oechem.OEIsHeavy()) > 5:
                continue
            if oechem.OECount(funcGrp, oechem.OEIsHetero()) == 0:
                continue
            if oechem.OECount(funcGrp, oechem.OEAtomIsInRing()) > 0:
                continue

            funcGrps.append(oechem.OEAtomBondSet(funcGrp))

        return funcGrps
示例#13
0
def main(argv=[__name__]):
    if len(argv) != 2:
        oechem.OEThrow.Usage("%s <infile>" % argv[0])

    ifs = oechem.oemolistream()
    if not ifs.open(argv[1]):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])

    print("Title MolWt NumAtoms NumHeavyAtoms NumRingAtoms NumRotors NumConfs")

    for mol in ifs.GetOEMols():
        title = mol.GetTitle()
        if not title:
            title = "Untitled"
        print("%s %.3f %d %d %d %d %d" %
              (title, oechem.OECalculateMolecularWeight(mol), mol.NumAtoms(),
               oechem.OECount(mol, oechem.OEIsHeavy()),
               oechem.OECount(mol, oechem.OEAtomIsInRing()),
               oechem.OECount(mol, oechem.OEIsRotor()), mol.NumConfs()))
示例#14
0
def construct_dihedral_energy_profile(torsion_conformers, num_points=24):
    angle_list = np.array([360 * i / num_points for i in range(num_points)])

    num_confs = 0
    profile = np.full(num_points, np.nan)
    for mol in torsion_conformers:
        if not mol:
            continue
        num_confs += 1
        conf = mol.GetActive()
        conf_title = get_sd_data(conf, "CONFORMER_LABEL")
        tor_atoms = get_sd_data(mol, "TORSION_ATOMS_ParentMol").split()
        parent_name = conf_title[:-3]
        dih_label = "_".join(str(x) for x in tor_atoms)
        fragment_label = parent_name + "_" + dih_label
        angle_idx = int(conf_title[-2:])

        profile[angle_idx] = np.float(get_sd_data(conf, "PSI4_ENERGY"))
        logging.debug("angle_idx: %d", angle_idx)
        logging.debug("Psi4 Energy: %f",
                      float(get_sd_data(conf, "PSI4_ENERGY")))

    # check for angles where no energies are available
    for angle in angle_list[np.all(np.isnan(profile))]:
        logging.warning(
            "Warning: No energies found for angle {:.1f} for fragment: {}".
            format(angle, fragment_label))

    # calculate relative energies
    min_energy = np.nanmin(profile)
    profile -= min_energy
    profile[np.isnan(profile)] = -1  # set nans to -1
    torsional_strain = np.column_stack((angle_list, profile))

    # combine conformers
    output_conformers = oechem.OEMol(torsion_conformers[0])
    output_conformers.DeleteConfs()
    title = fragment_label
    output_conformers.SetTitle(title)

    # setup normalization
    torsion_tag = "TORSION_ATOMS_FRAGMENT"
    torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
    print(torsion_atoms_in_fragment)
    dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment]
    dih, _ = get_dihedral(output_conformers, dihedral_atom_indices)

    for old_conf in torsion_conformers:
        if old_conf:
            new_conf = output_conformers.NewConf(old_conf)
            normalize_coordinates(new_conf, dih)
            oechem.OEClearSDData(new_conf)
            for dp in oechem.OEGetSDDataPairs(old_conf.GetActive()):
                if dp.GetTag() not in ["OEConfTitle", "CONFORMER_LABEL"]:
                    oechem.OESetSDData(new_conf, dp.GetTag(), dp.GetValue())
            torsion_angle = get_sd_data(old_conf, "TORSION_ANGLE")
            title = fragment_label + ": Angle " + torsion_angle
            new_conf.SetTitle(title)

    write_energy_profile_to_sddata(output_conformers, torsional_strain.copy())

    # Calculate all possible torsion inchi keys for this fragment
    torsion_inchi_list = []
    inchi_key = oechem.OECreateInChIKey(output_conformers)
    _, b, c, _ = get_torsion_oeatom_list(output_conformers)
    for a in b.GetAtoms(oechem.OEIsHeavy()):
        for d in c.GetAtoms(oechem.OEIsHeavy()):
            if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx():
                continue

            torsion_inchi = inchi_key + get_modified_inchi_key(
                output_conformers, [a, b, c, d])
            torsion_inchi_list.append(torsion_inchi)

    return output_conformers, torsional_strain, torsion_inchi_list
示例#15
0
def IsMoleculeInHeavyAtomCountRange(min, max, mol):
    count = oechem.OECount(mol, oechem.OEIsHeavy())
    return IsBetween(min, max, count)
示例#16
0
    def GetTorsions(mol):
        '''
        Goes through each rotatable bond in the molecule
        and extracts torsion atoms (a-b-c-d)
        Core torsion atoms are extended by one bond
        If core or extended atoms are part of a ring,
        then entire ring is kept
        Keep ortho substitution
        Keep functional groups that have at least one atom overlap
        with the core/extended torsion atoms
        Functional group inclusion criteria:
        - <= 5 heavy atoms
        - must contain at least one hetero atom
        - non-ring
        Add methyl cap if bond involving hetero atom is broken
        @param mol: OEGraphMol
        @type mol: OEGraphMol
        @return: list[OEGraphMol]
        '''
        # mol = OEGraphMol(input_mol)

        oechem.OEAssignHybridization(mol)
        funcGrps = TorsionGenerator.GetFuncGroups(mol)
        includedTorsions = oechem.OEAtomBondSet()
        torsionMols = []
        for atom in mol.GetAtoms():
            atom.SetData("idx", atom.GetIdx() + 1)

        torsions = get_canonical_torsions(mol)
        if torsions is None:
            torsions = oechem.OEGetTorsions(mol, oechem.OEIsRotor())

        for torsion in torsions:
            if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \
                torsion.c.IsHydrogen() or torsion.d.IsHydrogen():
                continue

            torsion_bond = mol.GetBond(torsion.b, torsion.c)
            if includedTorsions.HasBond(torsion_bond):
                continue
            # if includedTorsions.HasAtom(torsion.b) and \
            #     includedTorsions.HasAtom(torsion.c):
            #     continue

            # revert map idx to zero in original mol
            for atom in mol.GetAtoms():
                atom.SetMapIdx(0)

            # includedTorsions.AddAtom(torsion.b)
            # includedTorsions.AddAtom(torsion.c)
            includedTorsions.AddBond(torsion_bond)

            torsionSet = oechem.OEAtomBondSet(mol.GetBonds())
            torsionSet.AddAtoms([torsion.a, torsion.b, torsion.c, torsion.d])
            for atom in torsionSet.GetAtoms():
                atom.SetMapIdx(1)

            # extend core torsion atoms by one bond
            nbrs = TorsionGenerator.GetNbrs(torsionSet)
            torsionSet.AddAtoms(nbrs)

            # include ring atoms
            ringAtoms = TorsionGenerator.GetSameRingAtoms(mol, torsionSet)
            torsionSet.AddAtoms(ringAtoms)

            for atom in torsionSet.GetAtoms():
                if not atom.GetMapIdx() == 1:
                    atom.SetMapIdx(2)

            # add functional groups that overlap with torsion set
            TorsionGenerator.AddFuncGroupAtoms(funcGrps, torsionSet)

            # add relevant ring atoms (ortho substituents and ring H)
            TorsionGenerator.AddRelevantRingAtoms(mol, torsion, torsionSet)

            # special treatment for C=O
            for atom in torsionSet.GetAtoms(
                    oechem.OEAndAtom(
                        oechem.OEIsOxygen(),
                        oechem.OEIsAtomHybridization(
                            oechem.OEHybridization_sp2))):
                for nbr in atom.GetAtoms():
                    if torsionSet.HasAtom(nbr):
                        for nbr2 in nbr.GetAtoms(oechem.OEIsHeavy()):
                            if not torsionSet.HasAtom(nbr2):
                                nbr2.SetMapIdx(2)
                                torsionSet.AddAtom(nbr2)

            # mark bridging atom and cap if needed
            BRIDGE_ATOM_IDX = 4
            TorsionGenerator.MarkBridgingAtoms(BRIDGE_ATOM_IDX, mol,
                                               torsionSet)

            A_IDX = 11
            B_IDX = 12
            C_IDX = 13
            D_IDX = 14
            torsion.a.SetMapIdx(A_IDX)
            torsion.b.SetMapIdx(B_IDX)
            torsion.c.SetMapIdx(C_IDX)
            torsion.d.SetMapIdx(D_IDX)

            torsionMol = oechem.OEGraphMol()
            oechem.OESubsetMol(torsionMol, mol, torsionSet, True)
            torsionMol.Sweep()
            torsionMols.append(torsionMol)

            # change bridge atom to Carbon
            for atom in torsionMol.GetAtoms(
                    oechem.OEHasMapIdx(BRIDGE_ATOM_IDX)):
                atom.SetAtomicNum(oechem.OEElemNo_C)
                explicit_valence = atom.GetExplicitValence()
                if explicit_valence < 4:
                    atom.SetImplicitHCount(4 - explicit_valence)

            TorsionGenerator.SetSDData(A_IDX, B_IDX, C_IDX, D_IDX, torsion,
                                       torsionMol)

            # set map idx to zero in torsion mol
            for atom in torsionMol.GetAtoms():
                atom.SetMapIdx(0)

        # revert map idx to zero in original mol
        for atom in mol.GetAtoms():
            atom.SetMapIdx(0)

        return torsionMols
    # create subdirectory for this set
    if not os.path.exists(fileprefix):
        os.makedirs(fileprefix)
    os.chdir(fileprefix)

    # copy temporary files
    copyfile('../../frcmod.Frosst_AlkEthOH', './frcmod.Frosst_AlkEthOH')
    copyfile('../../leaprc.Frosst_AlkEthOH', './leaprc.Frosst_AlkEthOH')
    copyfile('../../' + fileprefix + '.oeb', './' + fileprefix + '.oeb')

    ifs = oechem.oemolistream(fileprefix + '.oeb')
    mol = oechem.OEMol()
    for mol in ifs.GetOEMols():
        # add atom names c0 (methane) and c1302 (water)
        if (oechem.OECount(mol, oechem.OEIsHeavy()) == 1):
            oechem.OETriposAtomNames(mol)
        # generate input files
        if hasAmberParams(mol, cmd_string):
            print('%s successful writing amber .mol2, .top, and .crd file' %
                  mol.GetTitle())

        # treat water with diff pre-existing tleap input file
        elif mol.GetTitle().split("_")[1] == 'c1302':
            copyfile('../../files_for_c1302/frcmod.tip3p', './frcmod.tip3p')
            copyfile('../../files_for_c1302/AlkEthOH_c1302_edited.leap_in',
                     './AlkEthOH_c1302_edited.leap_in')
            os.system(
                'tleap -f leaprc.Frosst_AlkEthOH -f AlkEthOH_c1302_edited.leap_in >| leap_lig.stdout'
            )
            print('%s successful writing amber .mol2, .top, and .crd file' %
示例#18
0
        if not frag in frags:
            print('{} not in {}'.format(frag, bond))
            failures[ser_bond] = frag
            continue

        idx = frags.index(frag)
        sqrt_mmd = np.sqrt(np.asarray(mmd_scores))
        norm = plt.Normalize(min(sqrt_mmd), max(sqrt_mmd))
        normed_scores = norm(sqrt_mmd)
        score = sqrt_mmd[idx]
        normed_score = normed_scores[idx]
        print(f.fragments)
        if tuple(bond) not in f.fragments:
            bond = tuple(reversed(bond))
        mol = f.fragments[tuple(bond)]
        size = oechem.OECount(mol, oechem.OEIsHeavy())
        score_size[ser_bond] = [frag, score, normed_score, size]

        if ser_bond not in frag_scores_2:
            continue
        frags_2 = frag_scores_2[ser_bond]['frags']
        mmd_scores_2 = frag_scores_2[ser_bond]['mmd_scores']
        sqrt_mmd_2 = np.sqrt(np.asarray(mmd_scores_2))
        idx_2 = frags_2.index(frag)

        score_2 = sqrt_mmd_2[idx_2]
        norm_2 = plt.Normalize(min(sqrt_mmd_2), max(sqrt_mmd_2))
        normed_scores_2 = norm_2(sqrt_mmd_2)

        normed_score_2 = normed_scores_2[idx_2]
        if tuple(bond) not in f.fragments:
示例#19
0
#!/usr/bin/env python
# (C) 2017 OpenEye Scientific Software Inc. All rights reserved.
#
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is
# provided to current licensees or subscribers of OpenEye products or
# SaaS offerings (each a "Customer").
# Customer is hereby permitted to use, copy, and modify the Sample Code,
# subject to these terms. OpenEye claims no rights to Customer's
# modifications. Modification of Sample Code is at Customer's sole and
# exclusive risk. Sample Code may require Customer to have a then
# current license or subscription to the applicable OpenEye offering.
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED.  OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
# PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be
# liable for any damages or liability in connection with the Sample Code
# or its use.

# @ <SNIPPET>
from __future__ import print_function
from openeye import oechem

mol = oechem.OEGraphMol()
oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2")

print("Number of heavy atoms =", oechem.OECount(mol, oechem.OEIsHeavy()))
print("Number of ring atoms  =", oechem.OECount(mol, oechem.OEAtomIsInRing()))
# @ </SNIPPET>
示例#20
0
def do(controller):
    """
    """
    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    # predicate to remove non-polymer atoms from structure
    nonpolymers = oechem.OEOrAtom(
        OEAtomHasIntData(('entity_type_bm', 0)),
        OEAtomBinaryAndIntData(('entity_type_bm', 3)))

    assemblysets = get_assembly_sets(args)

    # directory containing all the biological assemblies in OEB format
    OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb')

    # directory where surface areas will be written
    CREDO_DATA_DIR = app.config.get('directories', 'credo_data')

    ifs = oechem.oemolistream()
    ifs.SetFormat(oechem.OEFormat_OEB)

    # initialize progressbar
    if args.progressbar:
        bar = ProgressBar(widgets=[
            'PDB entries: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(assemblysets)).start()

    # iterate through assembly sets
    for counter, (pdb, assemblyset) in enumerate(assemblysets, 1):
        if args.progressbar: bar.update(counter)

        # create a data directory for this structure to which all data will be written
        struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(),
                                       pdb.lower())

        # make necessary directories recursively if they do not exist yet
        if not exists(struct_data_dir):
            os.makedirs(struct_data_dir)

        # path to the file where the atom surface areas of all atoms will be written
        surface_areas_path = os.path.join(
            struct_data_dir, 'binding_site_atom_surface_areas.credo')

        # do not recalculate atom surface area contributions if incremental
        if args.incremental and exists(
                surface_areas_path) and getsize(surface_areas_path) > 0:
            continue
        elif (args.update and exists(surface_areas_path)
              and getmtime(surface_areas_path) >= time() -
              (args.update * 60 * 60 * 24) and getsize(surface_areas_path)):
            app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\
                         .format(pdb, args.update))
            continue

        # output file stream and CSV writer
        atomfs = open(surface_areas_path, 'w')
        atomwriter = csv.writer(atomfs, dialect='tabs')

        # deal with each found assembly separately
        # some pdb entries consist of more than one
        for assembly in assemblyset:
            if args.quat:
                path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(),
                                    pdb.lower(), assembly)

            else:
                app.log.error("the calculation of buried ligand surface areas "
                              "is only supported for quaternary structures.")
                sys.exit(1)

            if not os.path.isfile(path):
                app.log.warn("cannot calculate buried surface areas: "
                             "file {} does not exist!".format(path))

            # get the quaternary structure
            ifs.open(str(path))

            try:
                assembly = ifs.GetOEGraphMols().next()
            except StopIteration:
                assembly = None

            if not assembly:
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} does not contain a valid molecule!".format(path))
                continue

            if not assembly.GetListData('ligands'):
                continue

            # identifier of the assembly
            assembly_serial = assembly.GetIntData('assembly_serial')

            # remove all non-polymers from assembly
            for atom in assembly.GetAtoms(nonpolymers):
                assembly.DeleteAtom(atom)

            # ignore bizarre assemblies
            if not assembly.NumAtoms():
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} contains assembly with no atoms!".format(path))
                continue

            # keep only the location state with the largest average occupancy
            assembly_hi_occ = oechem.OEGraphMol()
            altlocfactory = oechem.OEAltLocationFactory(assembly)
            altlocfactory.MakeCurrentAltMol(assembly_hi_occ)

            # get the ligands
            ligands = assembly_hi_occ.GetListData('ligands')

            # iterate through all ligands of the biomolecule and calculate the buried
            # surface area atom contributions for all involved atoms
            for ligand in ligands:

                # ignore small ligands
                if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue

                entity_serial = ligand.GetIntData('entity_serial')

                # keep only the location state with the largest average occupancy
                altlig = oechem.OEGraphMol()
                altlocfactory = oechem.OEAltLocationFactory(ligand)
                altlocfactory.MakeCurrentAltMol(altlig)

                cmplx_srf = oespicoli.OESurface()
                ligand_srf = oespicoli.OESurface()

                # make solvent-accessible surface of ligand
                oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf)

                # extract the binding site of the assembly to speed up surface
                # area calculation
                binding_site = get_binding_site(assembly_hi_occ, altlig)

                # make solvent-accessible surface of binding site
                binding_site_srf = oespicoli.OESurface()
                oespicoli.OEMakeAccessibleSurface(binding_site_srf,
                                                  binding_site, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                binding_site_atom_areas = get_atom_surface_areas(
                    binding_site, binding_site_srf)

                # create complex
                cmplx = oechem.OEGraphMol()
                oechem.OEAddMols(cmplx, binding_site)
                oechem.OEAddMols(cmplx, altlig)

                # make solvent-accessible surface of the complex
                oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4)

                # surface area atom contributions of the whole complex
                cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf)

                ## extract the atom surface areas in the bound state through slices
                binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site.
                                                                 NumAtoms()]
                ligand_atom_areas_bound = cmplx_atom_areas[binding_site.
                                                           NumAtoms():]

                # difference between apo and bound state per polymer atom
                binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound
                ligand_delta = ligand_atom_areas - ligand_atom_areas_bound

                # boolean map indicating for which atom the surface area has changed
                binding_site_atom_map = binding_site_delta != 0
                ligand_atom_map = ligand_delta != 0

                if args.dry_run: continue

                # only record the atoms where the solvent-accessible surface
                # area has actually changed
                write_atoms(atomwriter, binding_site, binding_site_atom_map,
                            pdb, assembly_serial, entity_serial,
                            binding_site_atom_areas,
                            binding_site_atom_areas_bound)

                # only record the atoms where the solvent-accessible surface area
                # has actually changed
                write_atoms(atomwriter, altlig, ligand_atom_map, pdb,
                            assembly_serial, entity_serial, ligand_atom_areas,
                            ligand_atom_areas_bound)

                app.log.debug("wrote buried surface areas for all ligands in "
                              "biomolecule {} to {}.".format(
                                  pdb, surface_areas_path))

            atomfs.flush()
        atomfs.close()

    if args.progressbar:
        bar.finish()
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData, argv)

    mol = itf.GetOEGraphMol("-mol")
    print("Number of heavy atoms in molecule = %d" %
          oechem.OECount(mol, oechem.OEIsHeavy()))
示例#22
0
def get_canonical_torsions(mol):
    '''
    Return unique torsions in canonical order.
    Only one torsion containing the same central two atoms are return
    Cannonical ordering is determined using the order of atoms
    in canonical smiles representation
    1. generate a canonical smiles representation from the input molecule
    2. create a list of (min(b_idx, c_idx), min(a_idx, d_idx), max(a_idx, d_idx), OETorsion)
    3. sort the list in #2, extract subset with unique rotatable bonds
    :param mol: OEGraphMol
    :return: list[OEGraphMol]
    '''
    CANONICAL_IDX_TAG = 'can_idx'

    def assign_canonical_idx(mol):
        for atom in mol.GetAtoms():
            atom.SetMapIdx(0)
        for map_idx, atom in enumerate(mol.GetAtoms(oechem.OEIsHeavy())):
            atom.SetMapIdx(map_idx + 1)

        can_smiles = oechem.OEMolToSmiles(mol)

        can_mol = oechem.OEGraphMol()
        # smiles_opt = OEParseSmilesOptions(canon=True)
        # OEParseSmiles(can_mol, can_smiles, smiles_opt)
        oechem.OESmilesToMol(can_mol, can_smiles)

        for can_atom in can_mol.GetAtoms(oechem.OEIsHeavy()):
            atom = mol.GetAtom(oechem.OEHasMapIdx(can_atom.GetMapIdx()))
            atom.SetData(CANONICAL_IDX_TAG, can_atom.GetIdx())

    try:
        assign_canonical_idx(mol)
    except Exception as e:
        print('Error GetCanonicalizedTorsions. ', e)
        return None

    torsions = []
    for torsion in oechem.OEGetTorsions(mol, oechem.OEIsRotor()):
        if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \
            torsion.c.IsHydrogen() or torsion.d.IsHydrogen():
            continue

        sum_bc = torsion.b.GetData(CANONICAL_IDX_TAG) + torsion.c.GetData(
            CANONICAL_IDX_TAG)
        min_bc = min(torsion.b.GetData(CANONICAL_IDX_TAG),
                     torsion.c.GetData(CANONICAL_IDX_TAG))
        max_bc = max(torsion.b.GetData(CANONICAL_IDX_TAG),
                     torsion.c.GetData(CANONICAL_IDX_TAG))
        min_ad = min(torsion.a.GetData(CANONICAL_IDX_TAG),
                     torsion.d.GetData(CANONICAL_IDX_TAG))
        max_ad = max(torsion.a.GetData(CANONICAL_IDX_TAG),
                     torsion.d.GetData(CANONICAL_IDX_TAG))

        torsions.append((sum_bc, min_bc, max_bc, min_ad, max_ad, torsion))

    # sort
    torsions.sort(key=operator.itemgetter(0, 1, 2, 3, 4))

    seen = {}
    unique_torsions = []
    for _, _, _, _, _, torsion in torsions:
        bond = mol.GetBond(torsion.b, torsion.c)
        if bond is not None and bond.GetIdx() not in seen:
            unique_torsions.append(torsion)
            seen[bond.GetIdx()] = True

    # revert mol to original state
    for atom in mol.GetAtoms(oechem.OEIsHeavy()):
        atom.SetMapIdx(0)
        atom.DeleteData(CANONICAL_IDX_TAG)

    return unique_torsions
示例#23
0
def all_info_df(ffdirectorylist, all_ff_df):
    """
    This is the all_info_df function. It takes in the list of forcefields,
    as well as the dataframe of all molecule names, and runs TFD and Tanimoto
    Combo on all molecules. Its output is a dataframe of all this data.

    Args: 
        ffdirectorylist (list) list of ff to compare 
        all_ff_df (dataframe) dataframe created by make_molname_df func above.

    Returns: 
        all_ff_df (dataframe) same dataframe with appended columns. 
    """
    # Creating empty dictionaries that TFD and TANI scores will go in later,
    # As well as a heavyatomlist for putting heavy atoms in
    heavyatomdict = {}
    TFDdict = {}
    TANIdict = {}
    # Creates combinations of forcefields and puts them into dictionaries
    for i, j in list(itertools.combinations(ffdirectorylist, 2)):
        TFDdict['%s %s' % (i, j)] = {}
        TANIdict['%s %s' % (i, j)] = {}
    # Generates all the data
    for molname in all_ff_df['MolNames']:
        print(molname)
        mol_file = '%s' % molname + '.mol2'
        try:
            refmolin = oechem.oemolistream(
                '%s/%s/%s' % (directory, ffdirectorylist[0], mol_file))
            refmolhev = oechem.OEGraphMol()
            oechem.OEReadMolecule(refmolin, refmolhev)
            heavyvalue = oechem.OECount(refmolhev, oechem.OEIsHeavy())
            heavyatomdict[molname] = heavyvalue
            refmolin.close()
            # Gets TanimotoCombo and TFD values
            for i, j in list(itertools.combinations(ffdirectorylist, 2)):
                refmolin = oechem.oemolistream('%s/%s/%s' %
                                               (directory, i, mol_file))
                refmol = oechem.OEGraphMol()
                oechem.OEReadMolecule(refmolin, refmol)
                qmolin = oechem.oemolistream('%s/%s/%s' %
                                             (directory, j, mol_file))
                qmol = oechem.OEGraphMol()
                oechem.OEReadMolecule(qmolin, qmol)
                # Getting TFD
                TFDvalue = TFD_for_oemols(refmol, qmol)
                TFDdict['%s %s' % (i, j)]['%s' % molname] = TFDvalue
                # Getting TanimotoCombo
                TANIvalue = tanimotocombo(refmol, qmol)
                TANIdict['%s %s' % (i, j)][molname] = TANIvalue
                qmolin.close()
                refmolin.close()
        except:
            heavyatomdict[molname] = -1
            for i, j in list(itertools.combinations(ffdirectorylist, 2)):
                TANIdict['%s %s' % (i, j)][molname] = -1
                TFDdict['%s %s' % (i, j)][molname] = -1
            qmolin.close()
            refmolin.close()
    # Loads data into dataframe
    for key in TFDdict:
        tempdf = pd.DataFrame.from_dict(TFDdict['%s' % key], 'index')
        tempdf = tempdf.rename({0: 'TFD %s' % key}, axis='columns')
        tempdf['MolNames'] = tempdf.index
        all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    for key in TANIdict:
        tempdf = pd.DataFrame.from_dict(TANIdict['%s' % key], 'index')
        tempdf = tempdf.rename({0: 'TANI %s' % key}, axis='columns')
        tempdf['MolNames'] = tempdf.index
        all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    tempdf = pd.DataFrame.from_dict(heavyatomdict, orient="index")
    tempdf = tempdf.rename({0: 'HeavyAtomCount'}, axis='columns')
    tempdf['MolNames'] = tempdf.index
    all_ff_df = all_ff_df.merge(tempdf, on='MolNames')
    return all_ff_df
示例#24
0
    def process(self, record, port):
        if record.has_value(self.args.in_mol_field):
            mol = record.get_value(self.args.in_mol_field)
        else:
            self.log.error("Could not find molecules in OEMolRecord")
            self.failure.emit(record)
            return

        parent_torsion_tag = "TORSION_ATOMS_ParentMol"
        torsion_atoms_in_parent = get_sd_data(mol, parent_torsion_tag).split()
        dih_name = mol.GetTitle() + "_" + "_".join(torsion_atoms_in_parent)

        torsion_tag = "TORSION_ATOMS_FRAGMENT"
        torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split()
        dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment]
        if dihedral_atom_indices is None:
            self.log.warn("Unable to find labelled torsion in %s" % dih_name)
            self.failure.emit(record)
            return

        opt_basis = self.args.opt_basis
        spe_basis = self.args.spe_basis

        # If fragment contains S
        #     use 6-31+G* instead of 6-31G*
        #     use 6-31+G** instead of 6-31G**
        need_diffuse = False
        if oechem.OECount(mol, oechem.OEIsSulfur()) > 0:
            need_diffuse = True

        for atom in mol.GetAtoms(oechem.OEIsHeavy()):
            if atom.GetFormalCharge() < 0:
                need_diffuse = True

        if need_diffuse:
            if opt_basis == "6-31G*":
                self.log.warn(
                    "Using 6-31+G* instead of 6-31G* as opt basis because fragment contains S."
                )
                opt_basis = "6-31+G*"

            if spe_basis == "6-31G*":
                self.log.warn(
                    "Using 6-31+G* instead of 6-31G* as spe basis because fragment contains S."
                )
                spe_basis = "6-31+G*"

            if opt_basis == "6-31G**":
                self.log.warn(
                    "Using 6-31+G** instead of 6-31G** as opt basis because fragment contains S."
                )
                opt_basis = "6-31+G**"

            if spe_basis == "6-31G**":
                self.log.warn(
                    "Using 6-31+G** instead of 6-31G** as spe basis because fragment contains S."
                )
                spe_basis = "6-31+G**"

        try:
            if self.args.only_selected_conformer:
                conf_selection_tag = "SELECTED_CONFORMER"
                key_conf_id = mol.GetIntData(conf_selection_tag)
                for conf in mol.GetConfs():
                    if conf.GetIdx() != key_conf_id:
                        continue
                conf_name = get_sd_data(conf, "CONFORMER_LABEL")
            else:
                conf_name = get_sd_data(mol, "CONFORMER_LABEL")
            time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now())
            hostname = socket.gethostname()
            self.log.info("Starting psi4 calculation for %s on %s at %s" %
                          (conf_name, hostname, time_stamp))

            if self.args.only_selected_conformer:
                oechem.OESetSDData(conf, "%s start time" % self.name,
                                   time_stamp)
            else:
                oechem.OESetSDData(mol, "%s start time" % self.name,
                                   time_stamp)

            dih, _ = get_dihedral(mol, dihedral_atom_indices)
            calculate_energy(
                mol,
                dih,
                spe_method=self.args.spe_method,
                spe_basis=spe_basis,
                geom_opt_technique=self.args.geom_opt_technique,
                opt_method=self.args.opt_method,
                opt_basis=opt_basis,
                geom_maxiter=self.args.geom_maxiter,
                only_selected_conf=self.args.only_selected_conformer,
                molden_output=self.args.molden_output,
                **self.psi4opts)

            if self.args.only_selected_conformer:
                conf_selection_tag = "SELECTED_CONFORMER"
                key_conf_id = mol.GetIntData(conf_selection_tag)
                for conf in mol.GetConfs():
                    if conf.GetIdx() != key_conf_id:
                        continue
                conf_name = get_sd_data(conf, "CONFORMER_LABEL")
            else:
                conf_name = get_sd_data(mol, "CONFORMER_LABEL")
            time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now())
            hostname = socket.gethostname()
            self.log.info("Completed psi4 calculation for %s on %s at %s" %
                          (conf_name, hostname, time_stamp))

            if self.args.only_selected_conformer:
                oechem.OESetSDData(conf, "%s end time" % self.name, time_stamp)
            else:
                oechem.OESetSDData(mol, "%s end time" % self.name, time_stamp)

            optimized_mol_record = OEMolRecord()
            optimized_mol_record.set_mol(mol)
            self.success.emit(optimized_mol_record)
        except Exception as e:
            print(e)
            #            traceback.print_stack()
            self.log.error("Error with {} {}".format(mol.GetTitle(), e))
            self.failure.emit(record)