def PrintAltGroupInfo(mol): if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) # create factory for mol print("%s\t(%s groups)" % (mol.GetTitle(), alf.GetGroupCount())) for grp in alf.GetGroups(): print("\t%s locs:%s" % (grp.GetLocationCount(), alf.GetLocationCodes(grp)))
def PrintGroups(mol): """summarize alternate location group info""" if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) print("%s\t(%s groups)" % (mol.GetTitle(), alf.GetGroupCount())) for grp in alf.GetGroups(): print("\t%s locs:%s" % (grp.GetLocationCount(), alf.GetLocationCodes(grp)))
def PrintResidues(mol): """list alternate location code and occupancy by group and residue""" if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) print("%s - %d alternate location groups:" % (mol.GetTitle(), alf.GetGroupCount())) for grp in alf.GetGroups(): print("%d) %d alternate locations" % (grp.GetGroupID() + 1, grp.GetLocationCount()), end=" ") prev = oechem.OEResidue() prevCodes = "" sumOcc = [] atNum = [] for atom in alf.GetAltAtoms(grp): res = oechem.OEAtomGetResidue(atom) if not oechem.OESameResidue(res, prev): for i, code in enumerate(prevCodes): print("%c(%.0f%c) " % (code, (sumOcc[i] * 100.0) / atNum[i], "%"), end=" ") print() prevCodes = "" sumOcc = [] atNum = [] print("\t%s%d%c chain '%c': " % (res.GetName(), res.GetResidueNumber(), res.GetInsertCode(), res.GetChainID()), end=" ") prev = res code = res.GetAlternateLocation() whichCode = prevCodes.find(code) if whichCode < 0: prevCodes += code sumOcc.append(res.GetOccupancy()) atNum.append(1) else: sumOcc[whichCode] += res.GetOccupancy() atNum[whichCode] += 1 for i, code in enumerate(prevCodes): print("%c(%.0f%c) " % (code, (sumOcc[i] * 100.0) / atNum[i], "%"), end=" ") print()
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line!") # @ <SNIPPET-ALTLOCFACT-FLAVOR> ims = oechem.oemolistream() ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC) # @ </SNIPPET-ALTLOCFACT-FLAVOR> inputFile = itf.GetString("-in") if not ims.open(inputFile): oechem.OEThrow.Fatal("Unable to open %s for reading." % inputFile) if not oechem.OEIs3DFormat(ims.GetFormat()): oechem.OEThrow.Fatal("%s is not in a 3D format." % inputFile) mol = oechem.OEGraphMol() if not oechem.OEReadMolecule(ims, mol): oechem.OEThrow.Fatal("Unable to read %s." % inputFile) if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) # @ <SNIPPET-ALTLOCFACT-PRIMARY> alf = oechem.OEAltLocationFactory(mol) if alf.GetGroupCount() != 0: alf.MakePrimaryAltMol(mol) # @ </SNIPPET-ALTLOCFACT-PRIMARY> # @ <SNIPPET-PLACE-HYDROGENS-BASIC> if oechem.OEPlaceHydrogens(mol): # ... # @ </SNIPPET-PLACE-HYDROGENS-BASIC> print("success") # @ <SNIPPET-PLACE-HYDROGENS-OPTIONS> opt = oechem.OEPlaceHydrogensOptions() opt.SetStandardizeBondLen(False) # @ </SNIPPET-PLACE-HYDROGENS-OPTIONS> # @ <SNIPPET-PLACE-HYDROGENS-DETAILS> # given molecule mol and OEPlaceHydrogensOptions opt... details = oechem.OEPlaceHydrogensDetails() if oechem.OEPlaceHydrogens(mol, details, opt): print(details.Describe()) # @ </SNIPPET-PLACE-HYDROGENS-DETAILS> ims.close()
def addh(self, altProcess='occupancy', processName='fullsearch', \ ihopt=T, standardize=T, badclash=0.4, flipbias=1.0, maxStates=20):#7): imol = self.mol.CreateCopy() wp = oechem.OEPlaceHydrogensWaterProcessing_Ignore if processName == 'fullsearch': wp = oechem.OEPlaceHydrogensWaterProcessing_FullSearch elif processName == 'focused': wp = oechem.OEPlaceHydrogensWaterProcessing_Focused keepAlts = (altProcess != "a") highestOcc = (altProcess == "occupancy") compareAlts = (altProcess == "compare") print('#1') if highestOcc or compareAlts: alf = oechem.OEAltLocationFactory(imol) if alf.GetGroupCount() != 0: if highestOcc: oechem.OEThrow.Verbose( "Dropping alternate locations from input.") alf.MakePrimaryAltMol(imol) elif compareAlts: oechem.OEThrow.Verbose("Fixing alternate location issues.") imol = alf.GetSourceMol() omol = imol print('#2') oechem.OEThrow.Verbose("Adding hydrogens to complex.") hopt = oechem.OEPlaceHydrogensOptions() if ihopt: hopt.SetAltsMustBeCompatible(compareAlts) hopt.SetStandardizeBondLen(standardize) hopt.SetWaterProcessing(wp) hopt.SetBadClashOverlapDistance(badclash) hopt.SetFlipBiasScale(flipbias) hopt.SetMaxSubstateCutoff(maxStates) if self.verbose: print('#3') details = oechem.OEPlaceHydrogensDetails() if not oechem.OEPlaceHydrogens(omol, details, hopt): oechem.OEThrow.Fatal( "Unable to place hydrogens and get details on %s." % self.inmol.GetTitle()) oechem.OEThrow.Verbose(details.Describe()) else: if not oechem.OEPlaceHydrogens(omol, hopt): oechem.OEThrow.Fatal("Unable to place hydrogens on %s." % self.inmol.GetTitle()) self.mol = omol
def ReadFromPDB(pdb_file, mol): ifs = oechem.oemolistream() ifs.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_Default | oechem.OEIFlavor_PDB_DATA | oechem.OEIFlavor_PDB_ALTLOC) # noqa if not ifs.open(pdb_file): oechem.OEThrow.Fatal("Unable to open %s for reading." % pdb_file) temp_mol = oechem.OEGraphMol() if not oechem.OEReadMolecule(ifs, temp_mol): oechem.OEThrow.Fatal("Unable to read molecule from %s." % pdb_file) ifs.close() fact = oechem.OEAltLocationFactory(temp_mol) mol.Clear() fact.MakePrimaryAltMol(mol) return (mol)
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line!") ims = oechem.oemolistream() ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC) inputFile = itf.GetString("-in") if not ims.open(inputFile): oechem.OEThrow.Fatal("Unable to open %s for reading." % inputFile) if not oechem.OEIs3DFormat(ims.GetFormat()): oechem.OEThrow.Fatal("%s is not in a 3D format." % inputFile) mol = oechem.OEGraphMol() if not oechem.OEReadMolecule(ims, mol): oechem.OEThrow.Fatal("Unable to read %s." % inputFile) if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) if alf.GetGroupCount() != 0: alf.MakePrimaryAltMol(mol) # in our example, we will select the first histidine selectedResidue = oechem.OEResidue() for atom in mol.GetAtoms(): res = oechem.OEAtomGetResidue(atom) if oechem.OEGetResidueIndex(res) == oechem.OEResidueIndex_HIS: selectedResidue = res break # @ <SNIPPET-PLACE-HYDROGENS-PRED> # given predicate IsSameResidue, residue selectedResidue and molecule mol... opt = oechem.OEPlaceHydrogensOptions() opt.SetNoFlipPredicate(IsSameResidue(selectedResidue)) if oechem.OEPlaceHydrogens(mol, opt): # selectedResidue will not be flipped... # @ </SNIPPET-PLACE-HYDROGENS-PRED> print("success") ims.close()
def PrintLocations(mol, hideAtoms): """list alternate location codes and atom info (unless hideAtoms is True)""" if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) print("%s" % mol.GetTitle()) print("grp-cnt=%d" % alf.GetGroupCount(), end=" ") if alf.GetGroupCount() > 0: print("{") else: print() for grp in alf.GetGroups(): print(" grp=%d loc-cnt=%d grp-codes='%s'" % (grp.GetGroupID(), grp.GetLocationCount(), alf.GetLocationCodes(grp))) for loc in grp.GetLocations(): print(" grp=%d loc=%d loc-codes='%s'" % (loc.GetGroupID(), loc.GetLocationID(), alf.GetLocationCodes(loc)), end=" ") if not hideAtoms: print("[", end=" ") num_atoms = 0 for atom in alf.GetAltAtoms(loc): num_atoms += 1 if not hideAtoms: res = oechem.OEAtomGetResidue(atom) print("%s:%c:%s%d%c:c%cm%d;" % (atom.GetName(), res.GetAlternateLocation(), res.GetName(), res.GetResidueNumber(), res.GetInsertCode(), res.GetChainID(), res.GetModelNumber()), end=" ") if not hideAtoms: print("]", end=" ") print(num_atoms) if alf.GetGroupCount() > 0: print("}")
def PrintStates(mol): """list alternate location state information""" if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) alf = oechem.OEAltLocationFactory(mol) print("%s\t%d groups" % (mol.GetTitle(), alf.GetGroupCount()), end=" ") tot = 1 totexp = 0.0 for grp in alf.GetGroups(): tot *= grp.GetLocationCount() totexp += math.log10(grp.GetLocationCount()) if totexp > 7.0: print("\tover 10^%.0f states" % totexp) print("too many states to enumerate") else: print("\t%d states" % tot) locs = [grp.GetLocations() for grp in alf.GetGroups()] EnumerateStates(alf, locs, 0, len(locs))
def do(controller): """ """ # get the controller command cmd = controller.command # get the command line arguments and options args = controller.pargs # predicate to remove non-polymer atoms from structure nonpolymers = oechem.OEOrAtom( OEAtomHasIntData(('entity_type_bm', 0)), OEAtomBinaryAndIntData(('entity_type_bm', 3))) assemblysets = get_assembly_sets(args) # directory containing all the biological assemblies in OEB format OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb') # directory where surface areas will be written CREDO_DATA_DIR = app.config.get('directories', 'credo_data') ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_OEB) # initialize progressbar if args.progressbar: bar = ProgressBar(widgets=[ 'PDB entries: ', SimpleProgress(), ' ', Percentage(), Bar() ], maxval=len(assemblysets)).start() # iterate through assembly sets for counter, (pdb, assemblyset) in enumerate(assemblysets, 1): if args.progressbar: bar.update(counter) # create a data directory for this structure to which all data will be written struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(), pdb.lower()) # make necessary directories recursively if they do not exist yet if not exists(struct_data_dir): os.makedirs(struct_data_dir) # path to the file where the atom surface areas of all atoms will be written surface_areas_path = os.path.join( struct_data_dir, 'binding_site_atom_surface_areas.credo') # do not recalculate atom surface area contributions if incremental if args.incremental and exists( surface_areas_path) and getsize(surface_areas_path) > 0: continue elif (args.update and exists(surface_areas_path) and getmtime(surface_areas_path) >= time() - (args.update * 60 * 60 * 24) and getsize(surface_areas_path)): app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\ .format(pdb, args.update)) continue # output file stream and CSV writer atomfs = open(surface_areas_path, 'w') atomwriter = csv.writer(atomfs, dialect='tabs') # deal with each found assembly separately # some pdb entries consist of more than one for assembly in assemblyset: if args.quat: path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(), pdb.lower(), assembly) else: app.log.error("the calculation of buried ligand surface areas " "is only supported for quaternary structures.") sys.exit(1) if not os.path.isfile(path): app.log.warn("cannot calculate buried surface areas: " "file {} does not exist!".format(path)) # get the quaternary structure ifs.open(str(path)) try: assembly = ifs.GetOEGraphMols().next() except StopIteration: assembly = None if not assembly: app.log.warn( "cannot calculate buried surface areas: " "file {} does not contain a valid molecule!".format(path)) continue if not assembly.GetListData('ligands'): continue # identifier of the assembly assembly_serial = assembly.GetIntData('assembly_serial') # remove all non-polymers from assembly for atom in assembly.GetAtoms(nonpolymers): assembly.DeleteAtom(atom) # ignore bizarre assemblies if not assembly.NumAtoms(): app.log.warn( "cannot calculate buried surface areas: " "file {} contains assembly with no atoms!".format(path)) continue # keep only the location state with the largest average occupancy assembly_hi_occ = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(assembly) altlocfactory.MakeCurrentAltMol(assembly_hi_occ) # get the ligands ligands = assembly_hi_occ.GetListData('ligands') # iterate through all ligands of the biomolecule and calculate the buried # surface area atom contributions for all involved atoms for ligand in ligands: # ignore small ligands if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue entity_serial = ligand.GetIntData('entity_serial') # keep only the location state with the largest average occupancy altlig = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(ligand) altlocfactory.MakeCurrentAltMol(altlig) cmplx_srf = oespicoli.OESurface() ligand_srf = oespicoli.OESurface() # make solvent-accessible surface of ligand oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4) # get the atom contributions of the assembly surface ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf) # extract the binding site of the assembly to speed up surface # area calculation binding_site = get_binding_site(assembly_hi_occ, altlig) # make solvent-accessible surface of binding site binding_site_srf = oespicoli.OESurface() oespicoli.OEMakeAccessibleSurface(binding_site_srf, binding_site, 0.5, 1.4) # get the atom contributions of the assembly surface binding_site_atom_areas = get_atom_surface_areas( binding_site, binding_site_srf) # create complex cmplx = oechem.OEGraphMol() oechem.OEAddMols(cmplx, binding_site) oechem.OEAddMols(cmplx, altlig) # make solvent-accessible surface of the complex oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4) # surface area atom contributions of the whole complex cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf) ## extract the atom surface areas in the bound state through slices binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site. NumAtoms()] ligand_atom_areas_bound = cmplx_atom_areas[binding_site. NumAtoms():] # difference between apo and bound state per polymer atom binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound ligand_delta = ligand_atom_areas - ligand_atom_areas_bound # boolean map indicating for which atom the surface area has changed binding_site_atom_map = binding_site_delta != 0 ligand_atom_map = ligand_delta != 0 if args.dry_run: continue # only record the atoms where the solvent-accessible surface # area has actually changed write_atoms(atomwriter, binding_site, binding_site_atom_map, pdb, assembly_serial, entity_serial, binding_site_atom_areas, binding_site_atom_areas_bound) # only record the atoms where the solvent-accessible surface area # has actually changed write_atoms(atomwriter, altlig, ligand_atom_map, pdb, assembly_serial, entity_serial, ligand_atom_areas, ligand_atom_areas_bound) app.log.debug("wrote buried surface areas for all ligands in " "biomolecule {} to {}.".format( pdb, surface_areas_path)) atomfs.flush() atomfs.close() if args.progressbar: bar.finish()
oechem.OEThrow.Usage("%s <mol-infile>" % sys.argv[0]) ims = oechem.oemolistream() if not ims.open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % sys.argv[1]) # @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-FLAVOR> ims.SetFlavor(oechem.OEFormat_PDB, oechem.OEIFlavor_PDB_ALTLOC) # @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-FLAVOR> for mol in ims.GetOEGraphMols(): if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) # @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-ALF> alf = oechem.OEAltLocationFactory(mol) # @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-ALF> print("# atoms original.. %d" % alf.GetSourceMol().GetMaxAtomIdx()) atom = alf.GetAltAtoms() if atom.IsValid(): # @ <SNIPPET-ALTLOCFACT-MAKEALTMOL-SSMOL> # given OEAltLocationFactory alf and OEAtomBaseIter atom ... loc = alf.GetLocation(atom.next(), 'B') ssmol = oechem.OEGraphMol() if alf.MakeAltMol(ssmol, loc): # use the subset mol... # @ </SNIPPET-ALTLOCFACT-MAKEALTMOL-SSMOL> print("# atoms subset.... %d" % ssmol.GetMaxAtomIdx()) else: print("no alternate location atoms")