def PrintAltGroupInfo(mol):
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)  # create factory for mol

    print("%s\t(%s groups)" % (mol.GetTitle(), alf.GetGroupCount()))

    for grp in alf.GetGroups():
        print("\t%s locs:%s" %
              (grp.GetLocationCount(), alf.GetLocationCodes(grp)))
Exemplo n.º 2
0
def PrintGroups(mol):
    """summarize alternate location group info"""
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)

    print("%s\t(%s groups)" % (mol.GetTitle(), alf.GetGroupCount()))

    for grp in alf.GetGroups():
        print("\t%s locs:%s" %
              (grp.GetLocationCount(), alf.GetLocationCodes(grp)))
Exemplo n.º 3
0
def PrintResidues(mol):
    """list alternate location code and occupancy by group and residue"""
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)

    print("%s - %d alternate location groups:" %
          (mol.GetTitle(), alf.GetGroupCount()))

    for grp in alf.GetGroups():
        print("%d) %d alternate locations" %
              (grp.GetGroupID() + 1, grp.GetLocationCount()),
              end=" ")

        prev = oechem.OEResidue()
        prevCodes = ""
        sumOcc = []
        atNum = []

        for atom in alf.GetAltAtoms(grp):
            res = oechem.OEAtomGetResidue(atom)
            if not oechem.OESameResidue(res, prev):
                for i, code in enumerate(prevCodes):
                    print("%c(%.0f%c) " %
                          (code, (sumOcc[i] * 100.0) / atNum[i], "%"),
                          end=" ")
                print()
                prevCodes = ""
                sumOcc = []
                atNum = []

                print("\t%s%d%c chain '%c': " %
                      (res.GetName(), res.GetResidueNumber(),
                       res.GetInsertCode(), res.GetChainID()),
                      end=" ")
                prev = res

            code = res.GetAlternateLocation()
            whichCode = prevCodes.find(code)
            if whichCode < 0:
                prevCodes += code
                sumOcc.append(res.GetOccupancy())
                atNum.append(1)
            else:
                sumOcc[whichCode] += res.GetOccupancy()
                atNum[whichCode] += 1

        for i, code in enumerate(prevCodes):
            print("%c(%.0f%c) " % (code, (sumOcc[i] * 100.0) / atNum[i], "%"),
                  end=" ")
        print()
Exemplo n.º 4
0
def main(argv=[__name__]):

    itf = oechem.OEInterface(InterfaceData)

    if not oechem.OEParseCommandLine(itf, argv):
        oechem.OEThrow.Fatal("Unable to interpret command line!")

    # @ <SNIPPET-ALTLOCFACT-FLAVOR>
    ims = oechem.oemolistream()
    ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC)
    # @ </SNIPPET-ALTLOCFACT-FLAVOR>

    inputFile = itf.GetString("-in")
    if not ims.open(inputFile):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % inputFile)

    if not oechem.OEIs3DFormat(ims.GetFormat()):
        oechem.OEThrow.Fatal("%s is not in a 3D format." % inputFile)

    mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ims, mol):
        oechem.OEThrow.Fatal("Unable to read %s." % inputFile)

    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    # @ <SNIPPET-ALTLOCFACT-PRIMARY>
    alf = oechem.OEAltLocationFactory(mol)
    if alf.GetGroupCount() != 0:
        alf.MakePrimaryAltMol(mol)
    # @ </SNIPPET-ALTLOCFACT-PRIMARY>

    # @ <SNIPPET-PLACE-HYDROGENS-BASIC>
    if oechem.OEPlaceHydrogens(mol):
        # ...
        # @ </SNIPPET-PLACE-HYDROGENS-BASIC>
        print("success")

    # @ <SNIPPET-PLACE-HYDROGENS-OPTIONS>
    opt = oechem.OEPlaceHydrogensOptions()
    opt.SetStandardizeBondLen(False)
    # @ </SNIPPET-PLACE-HYDROGENS-OPTIONS>

    # @ <SNIPPET-PLACE-HYDROGENS-DETAILS>
    # given molecule mol and OEPlaceHydrogensOptions opt...
    details = oechem.OEPlaceHydrogensDetails()
    if oechem.OEPlaceHydrogens(mol, details, opt):
        print(details.Describe())
    # @ </SNIPPET-PLACE-HYDROGENS-DETAILS>

    ims.close()
Exemplo n.º 5
0
    def addh(self, altProcess='occupancy', processName='fullsearch', \
            ihopt=T, standardize=T, badclash=0.4, flipbias=1.0, maxStates=20):#7):

        imol = self.mol.CreateCopy()

        wp = oechem.OEPlaceHydrogensWaterProcessing_Ignore
        if processName == 'fullsearch':
            wp = oechem.OEPlaceHydrogensWaterProcessing_FullSearch
        elif processName == 'focused':
            wp = oechem.OEPlaceHydrogensWaterProcessing_Focused

        keepAlts = (altProcess != "a")
        highestOcc = (altProcess == "occupancy")
        compareAlts = (altProcess == "compare")
        print('#1')
        if highestOcc or compareAlts:
            alf = oechem.OEAltLocationFactory(imol)
            if alf.GetGroupCount() != 0:
                if highestOcc:
                    oechem.OEThrow.Verbose(
                        "Dropping alternate locations from input.")
                    alf.MakePrimaryAltMol(imol)
                elif compareAlts:
                    oechem.OEThrow.Verbose("Fixing alternate location issues.")
                    imol = alf.GetSourceMol()
        omol = imol
        print('#2')
        oechem.OEThrow.Verbose("Adding hydrogens to complex.")

        hopt = oechem.OEPlaceHydrogensOptions()
        if ihopt:
            hopt.SetAltsMustBeCompatible(compareAlts)
            hopt.SetStandardizeBondLen(standardize)
            hopt.SetWaterProcessing(wp)
            hopt.SetBadClashOverlapDistance(badclash)
            hopt.SetFlipBiasScale(flipbias)
            hopt.SetMaxSubstateCutoff(maxStates)

        if self.verbose:
            print('#3')
            details = oechem.OEPlaceHydrogensDetails()
            if not oechem.OEPlaceHydrogens(omol, details, hopt):
                oechem.OEThrow.Fatal(
                    "Unable to place hydrogens and get details on %s." %
                    self.inmol.GetTitle())
            oechem.OEThrow.Verbose(details.Describe())
        else:
            if not oechem.OEPlaceHydrogens(omol, hopt):
                oechem.OEThrow.Fatal("Unable to place hydrogens on %s." %
                                     self.inmol.GetTitle())
        self.mol = omol
def ReadFromPDB(pdb_file, mol):
    ifs = oechem.oemolistream()
    ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_DATA | oechem.OEIFlavor_PDB_ALTLOC)  # noqa

    if not ifs.open(pdb_file):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % pdb_file)

    temp_mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ifs, temp_mol):
        oechem.OEThrow.Fatal("Unable to read molecule from %s." % pdb_file)
    ifs.close()

    fact = oechem.OEAltLocationFactory(temp_mol)
    mol.Clear()
    fact.MakePrimaryAltMol(mol)
    return (mol)
Exemplo n.º 7
0
def main(argv=[__name__]):

    itf = oechem.OEInterface(InterfaceData)

    if not oechem.OEParseCommandLine(itf, argv):
        oechem.OEThrow.Fatal("Unable to interpret command line!")

    ims = oechem.oemolistream()
    ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC)

    inputFile = itf.GetString("-in")
    if not ims.open(inputFile):
        oechem.OEThrow.Fatal("Unable to open %s for reading." % inputFile)

    if not oechem.OEIs3DFormat(ims.GetFormat()):
        oechem.OEThrow.Fatal("%s is not in a 3D format." % inputFile)

    mol = oechem.OEGraphMol()
    if not oechem.OEReadMolecule(ims, mol):
        oechem.OEThrow.Fatal("Unable to read %s." % inputFile)

    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)
    if alf.GetGroupCount() != 0:
        alf.MakePrimaryAltMol(mol)

    # in our example, we will select the first histidine
    selectedResidue = oechem.OEResidue()
    for atom in mol.GetAtoms():
        res = oechem.OEAtomGetResidue(atom)
        if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HIS:
            selectedResidue = res
            break

    # @ <SNIPPET-PLACE-HYDROGENS-PRED>
    # given predicate IsSameResidue, residue selectedResidue and molecule mol...
    opt = oechem.OEPlaceHydrogensOptions()
    opt.SetNoFlipPredicate(IsSameResidue(selectedResidue))

    if oechem.OEPlaceHydrogens(mol, opt):
        # selectedResidue will not be flipped...
        # @ </SNIPPET-PLACE-HYDROGENS-PRED>
        print("success")

    ims.close()
def PrintLocations(mol, hideAtoms):
    """list alternate location codes and atom info (unless hideAtoms is True)"""
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)

    print("%s" % mol.GetTitle())

    print("grp-cnt=%d" % alf.GetGroupCount(), end=" ")
    if alf.GetGroupCount() > 0:
        print("{")
    else:
        print()

    for grp in alf.GetGroups():
        print(" grp=%d loc-cnt=%d grp-codes='%s'" %
              (grp.GetGroupID(), grp.GetLocationCount(),
               alf.GetLocationCodes(grp)))
        for loc in grp.GetLocations():
            print(" grp=%d loc=%d loc-codes='%s'" %
                  (loc.GetGroupID(), loc.GetLocationID(),
                   alf.GetLocationCodes(loc)),
                  end=" ")
            if not hideAtoms:
                print("[", end=" ")
            num_atoms = 0
            for atom in alf.GetAltAtoms(loc):
                num_atoms += 1
                if not hideAtoms:
                    res = oechem.OEAtomGetResidue(atom)
                    print("%s:%c:%s%d%c:c%cm%d;" %
                          (atom.GetName(), res.GetAlternateLocation(),
                           res.GetName(), res.GetResidueNumber(),
                           res.GetInsertCode(), res.GetChainID(),
                           res.GetModelNumber()),
                          end=" ")
            if not hideAtoms:
                print("]", end=" ")
            print(num_atoms)

    if alf.GetGroupCount() > 0:
        print("}")
Exemplo n.º 9
0
def PrintStates(mol):
    """list alternate location state information"""
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    alf = oechem.OEAltLocationFactory(mol)

    print("%s\t%d groups" % (mol.GetTitle(), alf.GetGroupCount()), end=" ")

    tot = 1
    totexp = 0.0
    for grp in alf.GetGroups():
        tot *= grp.GetLocationCount()
        totexp += math.log10(grp.GetLocationCount())

    if totexp > 7.0:
        print("\tover 10^%.0f states" % totexp)
        print("too many states to enumerate")
    else:
        print("\t%d states" % tot)
        locs = [grp.GetLocations() for grp in alf.GetGroups()]
        EnumerateStates(alf, locs, 0, len(locs))
Exemplo n.º 10
0
def do(controller):
    """
    """
    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    # predicate to remove non-polymer atoms from structure
    nonpolymers = oechem.OEOrAtom(
        OEAtomHasIntData(('entity_type_bm', 0)),
        OEAtomBinaryAndIntData(('entity_type_bm', 3)))

    assemblysets = get_assembly_sets(args)

    # directory containing all the biological assemblies in OEB format
    OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb')

    # directory where surface areas will be written
    CREDO_DATA_DIR = app.config.get('directories', 'credo_data')

    ifs = oechem.oemolistream()
    ifs.SetFormat(oechem.OEFormat_OEB)

    # initialize progressbar
    if args.progressbar:
        bar = ProgressBar(widgets=[
            'PDB entries: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(assemblysets)).start()

    # iterate through assembly sets
    for counter, (pdb, assemblyset) in enumerate(assemblysets, 1):
        if args.progressbar: bar.update(counter)

        # create a data directory for this structure to which all data will be written
        struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(),
                                       pdb.lower())

        # make necessary directories recursively if they do not exist yet
        if not exists(struct_data_dir):
            os.makedirs(struct_data_dir)

        # path to the file where the atom surface areas of all atoms will be written
        surface_areas_path = os.path.join(
            struct_data_dir, 'binding_site_atom_surface_areas.credo')

        # do not recalculate atom surface area contributions if incremental
        if args.incremental and exists(
                surface_areas_path) and getsize(surface_areas_path) > 0:
            continue
        elif (args.update and exists(surface_areas_path)
              and getmtime(surface_areas_path) >= time() -
              (args.update * 60 * 60 * 24) and getsize(surface_areas_path)):
            app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\
                         .format(pdb, args.update))
            continue

        # output file stream and CSV writer
        atomfs = open(surface_areas_path, 'w')
        atomwriter = csv.writer(atomfs, dialect='tabs')

        # deal with each found assembly separately
        # some pdb entries consist of more than one
        for assembly in assemblyset:
            if args.quat:
                path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(),
                                    pdb.lower(), assembly)

            else:
                app.log.error("the calculation of buried ligand surface areas "
                              "is only supported for quaternary structures.")
                sys.exit(1)

            if not os.path.isfile(path):
                app.log.warn("cannot calculate buried surface areas: "
                             "file {} does not exist!".format(path))

            # get the quaternary structure
            ifs.open(str(path))

            try:
                assembly = ifs.GetOEGraphMols().next()
            except StopIteration:
                assembly = None

            if not assembly:
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} does not contain a valid molecule!".format(path))
                continue

            if not assembly.GetListData('ligands'):
                continue

            # identifier of the assembly
            assembly_serial = assembly.GetIntData('assembly_serial')

            # remove all non-polymers from assembly
            for atom in assembly.GetAtoms(nonpolymers):
                assembly.DeleteAtom(atom)

            # ignore bizarre assemblies
            if not assembly.NumAtoms():
                app.log.warn(
                    "cannot calculate buried surface areas: "
                    "file {} contains assembly with no atoms!".format(path))
                continue

            # keep only the location state with the largest average occupancy
            assembly_hi_occ = oechem.OEGraphMol()
            altlocfactory = oechem.OEAltLocationFactory(assembly)
            altlocfactory.MakeCurrentAltMol(assembly_hi_occ)

            # get the ligands
            ligands = assembly_hi_occ.GetListData('ligands')

            # iterate through all ligands of the biomolecule and calculate the buried
            # surface area atom contributions for all involved atoms
            for ligand in ligands:

                # ignore small ligands
                if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue

                entity_serial = ligand.GetIntData('entity_serial')

                # keep only the location state with the largest average occupancy
                altlig = oechem.OEGraphMol()
                altlocfactory = oechem.OEAltLocationFactory(ligand)
                altlocfactory.MakeCurrentAltMol(altlig)

                cmplx_srf = oespicoli.OESurface()
                ligand_srf = oespicoli.OESurface()

                # make solvent-accessible surface of ligand
                oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf)

                # extract the binding site of the assembly to speed up surface
                # area calculation
                binding_site = get_binding_site(assembly_hi_occ, altlig)

                # make solvent-accessible surface of binding site
                binding_site_srf = oespicoli.OESurface()
                oespicoli.OEMakeAccessibleSurface(binding_site_srf,
                                                  binding_site, 0.5, 1.4)

                # get the atom contributions of the assembly surface
                binding_site_atom_areas = get_atom_surface_areas(
                    binding_site, binding_site_srf)

                # create complex
                cmplx = oechem.OEGraphMol()
                oechem.OEAddMols(cmplx, binding_site)
                oechem.OEAddMols(cmplx, altlig)

                # make solvent-accessible surface of the complex
                oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4)

                # surface area atom contributions of the whole complex
                cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf)

                ## extract the atom surface areas in the bound state through slices
                binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site.
                                                                 NumAtoms()]
                ligand_atom_areas_bound = cmplx_atom_areas[binding_site.
                                                           NumAtoms():]

                # difference between apo and bound state per polymer atom
                binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound
                ligand_delta = ligand_atom_areas - ligand_atom_areas_bound

                # boolean map indicating for which atom the surface area has changed
                binding_site_atom_map = binding_site_delta != 0
                ligand_atom_map = ligand_delta != 0

                if args.dry_run: continue

                # only record the atoms where the solvent-accessible surface
                # area has actually changed
                write_atoms(atomwriter, binding_site, binding_site_atom_map,
                            pdb, assembly_serial, entity_serial,
                            binding_site_atom_areas,
                            binding_site_atom_areas_bound)

                # only record the atoms where the solvent-accessible surface area
                # has actually changed
                write_atoms(atomwriter, altlig, ligand_atom_map, pdb,
                            assembly_serial, entity_serial, ligand_atom_areas,
                            ligand_atom_areas_bound)

                app.log.debug("wrote buried surface areas for all ligands in "
                              "biomolecule {} to {}.".format(
                                  pdb, surface_areas_path))

            atomfs.flush()
        atomfs.close()

    if args.progressbar:
        bar.finish()
    oechem.OEThrow.Usage("%s <mol-infile>" % sys.argv[0])

ims = oechem.oemolistream()
if not ims.open(sys.argv[1]):
    oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1])

# @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-FLAVOR>
ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC)
# @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-FLAVOR>

for mol in ims.GetOEGraphMols():
    if not oechem.OEHasResidues(mol):
        oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All)

    # @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-ALF>
    alf = oechem.OEAltLocationFactory(mol)
    # @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-ALF>
    print("# atoms original.. %d" % alf.GetSourceMol().GetMaxAtomIdx())

    atom = alf.GetAltAtoms()
    if atom.IsValid():
        # @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-SSMOL>
        # given OEAltLocationFactory alf and OEAtomBaseIter atom ...
        loc = alf.GetLocation(atom.next(), 'B')
        ssmol = oechem.OEGraphMol()
        if alf.MakeAltMol(ssmol, loc):
            # use the subset mol...
            # @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-SSMOL>
            print("# atoms subset.... %d" % ssmol.GetMaxAtomIdx())
    else:
        print("no alternate location atoms")