def keep_molecule(mol, max_heavy_atoms = 100, remove_smirks = list(), max_metals = 0, elements = [], check_type = None): if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals: return False if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms: return False # Remove very small molecules that are not interesting if oechem.OECount(mol, oechem.OEIsHeavy()) < 5: return False for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False if elements != None: elements_list = read_Elements(elements) if not check_element(mol, elements_list): return False if check_type != None: types = check_type.split(",") if not check_atomtype(mol, types): return False return check_valence(mol)
def extract_molecule_torsion_data(parent_mol, frag_mols=None): """ extract dihedral angle associated with each torsion motif in the input molecule Torsion motifs are represented using generic modified inchi (central two atoms) and specific modified inchi (4 torsion atoms) @param parent_mol: @type parent_mol: oechem.OEGraphMol @return: tuple(str, dict[str, list[float]]) """ if frag_mols is None: frag_mols = get_molecule_torsion_fragments(parent_mol) torsion_data = collections.defaultdict(list) for frag_mol in frag_mols: inchi_key = oechem.OECreateInChIKey(frag_mol) atom_map = get_fragment_to_parent_atom_mapping(parent_mol, frag_mol) try: _, b, c, _ = get_torsion_oeatom_list(frag_mol) for a in b.GetAtoms(oechem.OEIsHeavy()): for d in c.GetAtoms(oechem.OEIsHeavy()): if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx(): continue ap = atom_map[a] bp = atom_map[b] cp = atom_map[c] dp = atom_map[d] if (a.GetAtomicNum() == ap.GetAtomicNum() and b.GetAtomicNum() == bp.GetAtomicNum() and c.GetAtomicNum() == cp.GetAtomicNum() and d.GetAtomicNum() == dp.GetAtomicNum()): angle = ( oechem.OEGetTorsion(parent_mol, ap, bp, cp, dp) * oechem.Rad2Deg) torsion_inchi = inchi_key + get_modified_inchi_key( frag_mol, [a, b, c, d]) torsion_data[torsion_inchi].append( (ap.GetIdx(), bp.GetIdx(), cp.GetIdx(), dp.GetIdx(), angle)) except Exception as e: logging.warning(e) continue parent_inchi = get_modified_molecule_inchi(parent_mol) return (parent_inchi, torsion_data)
def keep_molecule(mol, remove_smirks = list()): """ Determines if the molecule will be stored. Parameters ---------- mol - OEMol remove_smirks - list of SMIRKS strings you don't want in your molecules Returns ------- boolean - True (molecule meets the requirements below) - has no metal atoms - no more than 200 heavy atoms - has none of the SMIRKS in remove_smirks list - molecule has appropriate valency """ # Check number of metal atoms if oechem.OECount(mol, oechem.OEIsMetal()) > 0: return False # Check number of heavy atoms if oechem.OECount(mol, oechem.OEIsHeavy()) > 200: return False # Check for patterns in remove smirks list for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False # check valency return check_valence(mol)
def assign_canonical_idx(mol): for atom in mol.GetAtoms(): atom.SetMapIdx(0) for map_idx, atom in enumerate(mol.GetAtoms(oechem.OEIsHeavy())): atom.SetMapIdx(map_idx + 1) can_smiles = oechem.OEMolToSmiles(mol) can_mol = oechem.OEGraphMol() # smiles_opt = OEParseSmilesOptions(canon=True) # OEParseSmiles(can_mol, can_smiles, smiles_opt) oechem.OESmilesToMol(can_mol, can_smiles) for can_atom in can_mol.GetAtoms(oechem.OEIsHeavy()): atom = mol.GetAtom(oechem.OEHasMapIdx(can_atom.GetMapIdx())) atom.SetData(CANONICAL_IDX_TAG, can_atom.GetIdx())
def get_dihedrals(mol, itag): """ Iterates over rotatable bonds and identifies their dihedral atoms. These atoms are added to the molecule in a group using the given tag. :type mol: oechem.OEMol :type itag: int :return: Number of dihedral angles identified :rtype: int """ nrdihedrals = 0 for bond in mol.GetBonds(IsRotatableOrMacroCycleBond()): atomB = bond.GetBgn() atomE = bond.GetEnd() neighB = None neighE = None for atom in atomB.GetAtoms(oechem.OEIsHeavy()): if atom != atomE: neighB = atom break for atom in atomE.GetAtoms(oechem.OEIsHeavy()): if atom != atomB: neighE = atom break if neighB is None or neighE is None: continue atomorder = [neighB, atomB, atomE, neighE] bondorder = [ mol.GetBond(neighB, atomB), bond, mol.GetBond(neighE, atomE) ] if neighB.GetIdx() < neighE.GetIdx(): atomorder.reverse() bondorder.reverse() atoms = oechem.OEAtomVector(atomorder) bonds = oechem.OEBondVector(bondorder) nrdihedrals += 1 mol.NewGroup(itag, atoms, bonds) return nrdihedrals
def main(argv=[__name__]): if len(argv) != 4: oechem.OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0]) # input - preserve rotor-offset-compression ifs = oechem.oemolistream() oechem.OEPreserveRotCompress(ifs) ifname = argv[1] if not ifs.open(ifname): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) # output prefix = argv[2] ext = oechem.OEGetFileExtension(prefix) extstrt = len(prefix) if ext: extstrt = -(len(ext) + 1) else: ext = oechem.OEGetFileExtension(ifname) base = prefix[:extstrt] fmt = base + "_%i." + ext nservers = int(argv[3]) outstrms = [] for i in range(1, nservers + 1): ofs = oechem.oemolostream() if not ofs.open(fmt % i): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2]) outstrms.append(ofs) dots = oechem.OEDots(10000, 200, "molecules") for mol in ifs.GetOEMols(): oefastrocs.OEPrepareFastROCSMol(mol) nhvyatoms = oechem.OECount(mol, oechem.OEIsHeavy()) ofs = outstrms[nhvyatoms % nservers] oechem.OEWriteMolecule(ofs, mol) dots.Update() dots.Total() for strm in outstrms: fname = strm.GetFileName() strm.close() oechem.OEThrow.Info("Indexing %s" % fname) if not oechem.OECreateMolDatabaseIdx(fname): oechem.OEThrow.Fatal("Failed to index %s" % fname) return 0
def heavy_atom_count(self): """ Counts the number of heavy atoms in an oemol Parameters ---------- Returns ------- int, number of heavy atoms in molecule """ return oechem.OECount(self.mol, oechem.OEIsHeavy())
def GetMinAndMaxBFactor(ligand, protein, maxdistance=4.0): minbfactor = float("inf") maxbfactor = float("-inf") # Ligand atoms for latom in ligand.GetAtoms(oechem.OEIsHeavy()): res = oechem.OEAtomGetResidue(latom) minbfactor = min(minbfactor, res.GetBFactor()) maxbfactor = max(maxbfactor, res.GetBFactor()) # Protein atoms close to ligand atoms nn = oechem.OENearestNbrs(protein, maxdistance) for latom in ligand.GetAtoms(oechem.OEIsHeavy()): for neigh in nn.GetNbrs(latom): ratom = neigh.GetBgn() res = oechem.OEAtomGetResidue(ratom) if ConsiderResidueAtom(ratom, res): minbfactor = min(minbfactor, res.GetBFactor()) maxbfactor = max(maxbfactor, res.GetBFactor()) return minbfactor, maxbfactor
def get_labeled_mol(smiles, label='heavy'): """ returns an OEMol with heavy atoms labeled with a specific indice """ mol = oechem.OEMol() if not oechem.OESmilesToMol(mol, smiles): print("Couldn't parse smiles (%s) returning None" % smiles) return None for idx, a in enumerate(mol.GetAtomIter(oechem.OEIsHeavy())): a.SetData('heavy', idx + 1) return mol
def MarkBridgingAtoms(BRIDGE_ATOM_IDX, mol, torsionSet): NorOorS = oechem.OEOrAtom( oechem.OEOrAtom(oechem.OEIsNitrogen(), oechem.OEIsOxygen()), oechem.OEIsSulfur()) for atom in mol.GetAtoms( oechem.OEAndAtom(oechem.OEHasMapIdx(2), NorOorS)): for nbr in atom.GetAtoms(oechem.OEIsHeavy()): if not torsionSet.HasAtom(nbr): if nbr.GetMapIdx() == 0: torsionSet.AddAtom(nbr) if nbr.GetHvyDegree() == 1: nbr.SetMapIdx(3) continue nbr.SetMapIdx(BRIDGE_ATOM_IDX)
def SetAverageBFactorOfNearbyProteinAtoms(ligand, protein, itag, maxdistance=4.0): nn = oechem.OENearestNbrs(protein, maxdistance) for latom in ligand.GetAtoms(oechem.OEIsHeavy()): sumbfactor = 0.0 neighs = [] for neigh in nn.GetNbrs(latom): ratom = neigh.GetBgn() res = oechem.OEAtomGetResidue(ratom) if ConsiderResidueAtom(ratom, res): sumbfactor += res.GetBFactor() neighs.append(ratom) avgbfactor = 0.0 if len(neighs) > 0: avgbfactor = sumbfactor / len(neighs) latom.SetDoubleData(itag, avgbfactor)
def GetFuncGroups(mol): ''' :param mol: :return: ''' funcGrps = [] for funcGrp in oemedchem.OEGetFuncGroupFragments(mol): if oechem.OECount(funcGrp, oechem.OEIsHeavy()) > 5: continue if oechem.OECount(funcGrp, oechem.OEIsHetero()) == 0: continue if oechem.OECount(funcGrp, oechem.OEAtomIsInRing()) > 0: continue funcGrps.append(oechem.OEAtomBondSet(funcGrp)) return funcGrps
def main(argv=[__name__]): if len(argv) != 2: oechem.OEThrow.Usage("%s <infile>" % argv[0]) ifs = oechem.oemolistream() if not ifs.open(argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) print("Title MolWt NumAtoms NumHeavyAtoms NumRingAtoms NumRotors NumConfs") for mol in ifs.GetOEMols(): title = mol.GetTitle() if not title: title = "Untitled" print("%s %.3f %d %d %d %d %d" % (title, oechem.OECalculateMolecularWeight(mol), mol.NumAtoms(), oechem.OECount(mol, oechem.OEIsHeavy()), oechem.OECount(mol, oechem.OEAtomIsInRing()), oechem.OECount(mol, oechem.OEIsRotor()), mol.NumConfs()))
def construct_dihedral_energy_profile(torsion_conformers, num_points=24): angle_list = np.array([360 * i / num_points for i in range(num_points)]) num_confs = 0 profile = np.full(num_points, np.nan) for mol in torsion_conformers: if not mol: continue num_confs += 1 conf = mol.GetActive() conf_title = get_sd_data(conf, "CONFORMER_LABEL") tor_atoms = get_sd_data(mol, "TORSION_ATOMS_ParentMol").split() parent_name = conf_title[:-3] dih_label = "_".join(str(x) for x in tor_atoms) fragment_label = parent_name + "_" + dih_label angle_idx = int(conf_title[-2:]) profile[angle_idx] = np.float(get_sd_data(conf, "PSI4_ENERGY")) logging.debug("angle_idx: %d", angle_idx) logging.debug("Psi4 Energy: %f", float(get_sd_data(conf, "PSI4_ENERGY"))) # check for angles where no energies are available for angle in angle_list[np.all(np.isnan(profile))]: logging.warning( "Warning: No energies found for angle {:.1f} for fragment: {}". format(angle, fragment_label)) # calculate relative energies min_energy = np.nanmin(profile) profile -= min_energy profile[np.isnan(profile)] = -1 # set nans to -1 torsional_strain = np.column_stack((angle_list, profile)) # combine conformers output_conformers = oechem.OEMol(torsion_conformers[0]) output_conformers.DeleteConfs() title = fragment_label output_conformers.SetTitle(title) # setup normalization torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() print(torsion_atoms_in_fragment) dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment] dih, _ = get_dihedral(output_conformers, dihedral_atom_indices) for old_conf in torsion_conformers: if old_conf: new_conf = output_conformers.NewConf(old_conf) normalize_coordinates(new_conf, dih) oechem.OEClearSDData(new_conf) for dp in oechem.OEGetSDDataPairs(old_conf.GetActive()): if dp.GetTag() not in ["OEConfTitle", "CONFORMER_LABEL"]: oechem.OESetSDData(new_conf, dp.GetTag(), dp.GetValue()) torsion_angle = get_sd_data(old_conf, "TORSION_ANGLE") title = fragment_label + ": Angle " + torsion_angle new_conf.SetTitle(title) write_energy_profile_to_sddata(output_conformers, torsional_strain.copy()) # Calculate all possible torsion inchi keys for this fragment torsion_inchi_list = [] inchi_key = oechem.OECreateInChIKey(output_conformers) _, b, c, _ = get_torsion_oeatom_list(output_conformers) for a in b.GetAtoms(oechem.OEIsHeavy()): for d in c.GetAtoms(oechem.OEIsHeavy()): if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx(): continue torsion_inchi = inchi_key + get_modified_inchi_key( output_conformers, [a, b, c, d]) torsion_inchi_list.append(torsion_inchi) return output_conformers, torsional_strain, torsion_inchi_list
def IsMoleculeInHeavyAtomCountRange(min, max, mol): count = oechem.OECount(mol, oechem.OEIsHeavy()) return IsBetween(min, max, count)
def GetTorsions(mol): ''' Goes through each rotatable bond in the molecule and extracts torsion atoms (a-b-c-d) Core torsion atoms are extended by one bond If core or extended atoms are part of a ring, then entire ring is kept Keep ortho substitution Keep functional groups that have at least one atom overlap with the core/extended torsion atoms Functional group inclusion criteria: - <= 5 heavy atoms - must contain at least one hetero atom - non-ring Add methyl cap if bond involving hetero atom is broken @param mol: OEGraphMol @type mol: OEGraphMol @return: list[OEGraphMol] ''' # mol = OEGraphMol(input_mol) oechem.OEAssignHybridization(mol) funcGrps = TorsionGenerator.GetFuncGroups(mol) includedTorsions = oechem.OEAtomBondSet() torsionMols = [] for atom in mol.GetAtoms(): atom.SetData("idx", atom.GetIdx() + 1) torsions = get_canonical_torsions(mol) if torsions is None: torsions = oechem.OEGetTorsions(mol, oechem.OEIsRotor()) for torsion in torsions: if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \ torsion.c.IsHydrogen() or torsion.d.IsHydrogen(): continue torsion_bond = mol.GetBond(torsion.b, torsion.c) if includedTorsions.HasBond(torsion_bond): continue # if includedTorsions.HasAtom(torsion.b) and \ # includedTorsions.HasAtom(torsion.c): # continue # revert map idx to zero in original mol for atom in mol.GetAtoms(): atom.SetMapIdx(0) # includedTorsions.AddAtom(torsion.b) # includedTorsions.AddAtom(torsion.c) includedTorsions.AddBond(torsion_bond) torsionSet = oechem.OEAtomBondSet(mol.GetBonds()) torsionSet.AddAtoms([torsion.a, torsion.b, torsion.c, torsion.d]) for atom in torsionSet.GetAtoms(): atom.SetMapIdx(1) # extend core torsion atoms by one bond nbrs = TorsionGenerator.GetNbrs(torsionSet) torsionSet.AddAtoms(nbrs) # include ring atoms ringAtoms = TorsionGenerator.GetSameRingAtoms(mol, torsionSet) torsionSet.AddAtoms(ringAtoms) for atom in torsionSet.GetAtoms(): if not atom.GetMapIdx() == 1: atom.SetMapIdx(2) # add functional groups that overlap with torsion set TorsionGenerator.AddFuncGroupAtoms(funcGrps, torsionSet) # add relevant ring atoms (ortho substituents and ring H) TorsionGenerator.AddRelevantRingAtoms(mol, torsion, torsionSet) # special treatment for C=O for atom in torsionSet.GetAtoms( oechem.OEAndAtom( oechem.OEIsOxygen(), oechem.OEIsAtomHybridization( oechem.OEHybridization_sp2))): for nbr in atom.GetAtoms(): if torsionSet.HasAtom(nbr): for nbr2 in nbr.GetAtoms(oechem.OEIsHeavy()): if not torsionSet.HasAtom(nbr2): nbr2.SetMapIdx(2) torsionSet.AddAtom(nbr2) # mark bridging atom and cap if needed BRIDGE_ATOM_IDX = 4 TorsionGenerator.MarkBridgingAtoms(BRIDGE_ATOM_IDX, mol, torsionSet) A_IDX = 11 B_IDX = 12 C_IDX = 13 D_IDX = 14 torsion.a.SetMapIdx(A_IDX) torsion.b.SetMapIdx(B_IDX) torsion.c.SetMapIdx(C_IDX) torsion.d.SetMapIdx(D_IDX) torsionMol = oechem.OEGraphMol() oechem.OESubsetMol(torsionMol, mol, torsionSet, True) torsionMol.Sweep() torsionMols.append(torsionMol) # change bridge atom to Carbon for atom in torsionMol.GetAtoms( oechem.OEHasMapIdx(BRIDGE_ATOM_IDX)): atom.SetAtomicNum(oechem.OEElemNo_C) explicit_valence = atom.GetExplicitValence() if explicit_valence < 4: atom.SetImplicitHCount(4 - explicit_valence) TorsionGenerator.SetSDData(A_IDX, B_IDX, C_IDX, D_IDX, torsion, torsionMol) # set map idx to zero in torsion mol for atom in torsionMol.GetAtoms(): atom.SetMapIdx(0) # revert map idx to zero in original mol for atom in mol.GetAtoms(): atom.SetMapIdx(0) return torsionMols
# create subdirectory for this set if not os.path.exists(fileprefix): os.makedirs(fileprefix) os.chdir(fileprefix) # copy temporary files copyfile('../../frcmod.Frosst_AlkEthOH', './frcmod.Frosst_AlkEthOH') copyfile('../../leaprc.Frosst_AlkEthOH', './leaprc.Frosst_AlkEthOH') copyfile('../../' + fileprefix + '.oeb', './' + fileprefix + '.oeb') ifs = oechem.oemolistream(fileprefix + '.oeb') mol = oechem.OEMol() for mol in ifs.GetOEMols(): # add atom names c0 (methane) and c1302 (water) if (oechem.OECount(mol, oechem.OEIsHeavy()) == 1): oechem.OETriposAtomNames(mol) # generate input files if hasAmberParams(mol, cmd_string): print('%s successful writing amber .mol2, .top, and .crd file' % mol.GetTitle()) # treat water with diff pre-existing tleap input file elif mol.GetTitle().split("_")[1] == 'c1302': copyfile('../../files_for_c1302/frcmod.tip3p', './frcmod.tip3p') copyfile('../../files_for_c1302/AlkEthOH_c1302_edited.leap_in', './AlkEthOH_c1302_edited.leap_in') os.system( 'tleap -f leaprc.Frosst_AlkEthOH -f AlkEthOH_c1302_edited.leap_in >| leap_lig.stdout' ) print('%s successful writing amber .mol2, .top, and .crd file' %
if not frag in frags: print('{} not in {}'.format(frag, bond)) failures[ser_bond] = frag continue idx = frags.index(frag) sqrt_mmd = np.sqrt(np.asarray(mmd_scores)) norm = plt.Normalize(min(sqrt_mmd), max(sqrt_mmd)) normed_scores = norm(sqrt_mmd) score = sqrt_mmd[idx] normed_score = normed_scores[idx] print(f.fragments) if tuple(bond) not in f.fragments: bond = tuple(reversed(bond)) mol = f.fragments[tuple(bond)] size = oechem.OECount(mol, oechem.OEIsHeavy()) score_size[ser_bond] = [frag, score, normed_score, size] if ser_bond not in frag_scores_2: continue frags_2 = frag_scores_2[ser_bond]['frags'] mmd_scores_2 = frag_scores_2[ser_bond]['mmd_scores'] sqrt_mmd_2 = np.sqrt(np.asarray(mmd_scores_2)) idx_2 = frags_2.index(frag) score_2 = sqrt_mmd_2[idx_2] norm_2 = plt.Normalize(min(sqrt_mmd_2), max(sqrt_mmd_2)) normed_scores_2 = norm_2(sqrt_mmd_2) normed_score_2 = normed_scores_2[idx_2] if tuple(bond) not in f.fragments:
#!/usr/bin/env python # (C) 2017 OpenEye Scientific Software Inc. All rights reserved. # # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2") print("Number of heavy atoms =", oechem.OECount(mol, oechem.OEIsHeavy())) print("Number of ring atoms =", oechem.OECount(mol, oechem.OEAtomIsInRing())) # @ </SNIPPET>
def do(controller): """ """ # get the controller command cmd = controller.command # get the command line arguments and options args = controller.pargs # predicate to remove non-polymer atoms from structure nonpolymers = oechem.OEOrAtom( OEAtomHasIntData(('entity_type_bm', 0)), OEAtomBinaryAndIntData(('entity_type_bm', 3))) assemblysets = get_assembly_sets(args) # directory containing all the biological assemblies in OEB format OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb') # directory where surface areas will be written CREDO_DATA_DIR = app.config.get('directories', 'credo_data') ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_OEB) # initialize progressbar if args.progressbar: bar = ProgressBar(widgets=[ 'PDB entries: ', SimpleProgress(), ' ', Percentage(), Bar() ], maxval=len(assemblysets)).start() # iterate through assembly sets for counter, (pdb, assemblyset) in enumerate(assemblysets, 1): if args.progressbar: bar.update(counter) # create a data directory for this structure to which all data will be written struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(), pdb.lower()) # make necessary directories recursively if they do not exist yet if not exists(struct_data_dir): os.makedirs(struct_data_dir) # path to the file where the atom surface areas of all atoms will be written surface_areas_path = os.path.join( struct_data_dir, 'binding_site_atom_surface_areas.credo') # do not recalculate atom surface area contributions if incremental if args.incremental and exists( surface_areas_path) and getsize(surface_areas_path) > 0: continue elif (args.update and exists(surface_areas_path) and getmtime(surface_areas_path) >= time() - (args.update * 60 * 60 * 24) and getsize(surface_areas_path)): app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\ .format(pdb, args.update)) continue # output file stream and CSV writer atomfs = open(surface_areas_path, 'w') atomwriter = csv.writer(atomfs, dialect='tabs') # deal with each found assembly separately # some pdb entries consist of more than one for assembly in assemblyset: if args.quat: path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(), pdb.lower(), assembly) else: app.log.error("the calculation of buried ligand surface areas " "is only supported for quaternary structures.") sys.exit(1) if not os.path.isfile(path): app.log.warn("cannot calculate buried surface areas: " "file {} does not exist!".format(path)) # get the quaternary structure ifs.open(str(path)) try: assembly = ifs.GetOEGraphMols().next() except StopIteration: assembly = None if not assembly: app.log.warn( "cannot calculate buried surface areas: " "file {} does not contain a valid molecule!".format(path)) continue if not assembly.GetListData('ligands'): continue # identifier of the assembly assembly_serial = assembly.GetIntData('assembly_serial') # remove all non-polymers from assembly for atom in assembly.GetAtoms(nonpolymers): assembly.DeleteAtom(atom) # ignore bizarre assemblies if not assembly.NumAtoms(): app.log.warn( "cannot calculate buried surface areas: " "file {} contains assembly with no atoms!".format(path)) continue # keep only the location state with the largest average occupancy assembly_hi_occ = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(assembly) altlocfactory.MakeCurrentAltMol(assembly_hi_occ) # get the ligands ligands = assembly_hi_occ.GetListData('ligands') # iterate through all ligands of the biomolecule and calculate the buried # surface area atom contributions for all involved atoms for ligand in ligands: # ignore small ligands if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue entity_serial = ligand.GetIntData('entity_serial') # keep only the location state with the largest average occupancy altlig = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(ligand) altlocfactory.MakeCurrentAltMol(altlig) cmplx_srf = oespicoli.OESurface() ligand_srf = oespicoli.OESurface() # make solvent-accessible surface of ligand oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4) # get the atom contributions of the assembly surface ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf) # extract the binding site of the assembly to speed up surface # area calculation binding_site = get_binding_site(assembly_hi_occ, altlig) # make solvent-accessible surface of binding site binding_site_srf = oespicoli.OESurface() oespicoli.OEMakeAccessibleSurface(binding_site_srf, binding_site, 0.5, 1.4) # get the atom contributions of the assembly surface binding_site_atom_areas = get_atom_surface_areas( binding_site, binding_site_srf) # create complex cmplx = oechem.OEGraphMol() oechem.OEAddMols(cmplx, binding_site) oechem.OEAddMols(cmplx, altlig) # make solvent-accessible surface of the complex oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4) # surface area atom contributions of the whole complex cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf) ## extract the atom surface areas in the bound state through slices binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site. NumAtoms()] ligand_atom_areas_bound = cmplx_atom_areas[binding_site. NumAtoms():] # difference between apo and bound state per polymer atom binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound ligand_delta = ligand_atom_areas - ligand_atom_areas_bound # boolean map indicating for which atom the surface area has changed binding_site_atom_map = binding_site_delta != 0 ligand_atom_map = ligand_delta != 0 if args.dry_run: continue # only record the atoms where the solvent-accessible surface # area has actually changed write_atoms(atomwriter, binding_site, binding_site_atom_map, pdb, assembly_serial, entity_serial, binding_site_atom_areas, binding_site_atom_areas_bound) # only record the atoms where the solvent-accessible surface area # has actually changed write_atoms(atomwriter, altlig, ligand_atom_map, pdb, assembly_serial, entity_serial, ligand_atom_areas, ligand_atom_areas_bound) app.log.debug("wrote buried surface areas for all ligands in " "biomolecule {} to {}.".format( pdb, surface_areas_path)) atomfs.flush() atomfs.close() if args.progressbar: bar.finish()
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) mol = itf.GetOEGraphMol("-mol") print("Number of heavy atoms in molecule = %d" % oechem.OECount(mol, oechem.OEIsHeavy()))
def get_canonical_torsions(mol): ''' Return unique torsions in canonical order. Only one torsion containing the same central two atoms are return Cannonical ordering is determined using the order of atoms in canonical smiles representation 1. generate a canonical smiles representation from the input molecule 2. create a list of (min(b_idx, c_idx), min(a_idx, d_idx), max(a_idx, d_idx), OETorsion) 3. sort the list in #2, extract subset with unique rotatable bonds :param mol: OEGraphMol :return: list[OEGraphMol] ''' CANONICAL_IDX_TAG = 'can_idx' def assign_canonical_idx(mol): for atom in mol.GetAtoms(): atom.SetMapIdx(0) for map_idx, atom in enumerate(mol.GetAtoms(oechem.OEIsHeavy())): atom.SetMapIdx(map_idx + 1) can_smiles = oechem.OEMolToSmiles(mol) can_mol = oechem.OEGraphMol() # smiles_opt = OEParseSmilesOptions(canon=True) # OEParseSmiles(can_mol, can_smiles, smiles_opt) oechem.OESmilesToMol(can_mol, can_smiles) for can_atom in can_mol.GetAtoms(oechem.OEIsHeavy()): atom = mol.GetAtom(oechem.OEHasMapIdx(can_atom.GetMapIdx())) atom.SetData(CANONICAL_IDX_TAG, can_atom.GetIdx()) try: assign_canonical_idx(mol) except Exception as e: print('Error GetCanonicalizedTorsions. ', e) return None torsions = [] for torsion in oechem.OEGetTorsions(mol, oechem.OEIsRotor()): if torsion.a.IsHydrogen() or torsion.b.IsHydrogen() or \ torsion.c.IsHydrogen() or torsion.d.IsHydrogen(): continue sum_bc = torsion.b.GetData(CANONICAL_IDX_TAG) + torsion.c.GetData( CANONICAL_IDX_TAG) min_bc = min(torsion.b.GetData(CANONICAL_IDX_TAG), torsion.c.GetData(CANONICAL_IDX_TAG)) max_bc = max(torsion.b.GetData(CANONICAL_IDX_TAG), torsion.c.GetData(CANONICAL_IDX_TAG)) min_ad = min(torsion.a.GetData(CANONICAL_IDX_TAG), torsion.d.GetData(CANONICAL_IDX_TAG)) max_ad = max(torsion.a.GetData(CANONICAL_IDX_TAG), torsion.d.GetData(CANONICAL_IDX_TAG)) torsions.append((sum_bc, min_bc, max_bc, min_ad, max_ad, torsion)) # sort torsions.sort(key=operator.itemgetter(0, 1, 2, 3, 4)) seen = {} unique_torsions = [] for _, _, _, _, _, torsion in torsions: bond = mol.GetBond(torsion.b, torsion.c) if bond is not None and bond.GetIdx() not in seen: unique_torsions.append(torsion) seen[bond.GetIdx()] = True # revert mol to original state for atom in mol.GetAtoms(oechem.OEIsHeavy()): atom.SetMapIdx(0) atom.DeleteData(CANONICAL_IDX_TAG) return unique_torsions
def all_info_df(ffdirectorylist, all_ff_df): """ This is the all_info_df function. It takes in the list of forcefields, as well as the dataframe of all molecule names, and runs TFD and Tanimoto Combo on all molecules. Its output is a dataframe of all this data. Args: ffdirectorylist (list) list of ff to compare all_ff_df (dataframe) dataframe created by make_molname_df func above. Returns: all_ff_df (dataframe) same dataframe with appended columns. """ # Creating empty dictionaries that TFD and TANI scores will go in later, # As well as a heavyatomlist for putting heavy atoms in heavyatomdict = {} TFDdict = {} TANIdict = {} # Creates combinations of forcefields and puts them into dictionaries for i, j in list(itertools.combinations(ffdirectorylist, 2)): TFDdict['%s %s' % (i, j)] = {} TANIdict['%s %s' % (i, j)] = {} # Generates all the data for molname in all_ff_df['MolNames']: print(molname) mol_file = '%s' % molname + '.mol2' try: refmolin = oechem.oemolistream( '%s/%s/%s' % (directory, ffdirectorylist[0], mol_file)) refmolhev = oechem.OEGraphMol() oechem.OEReadMolecule(refmolin, refmolhev) heavyvalue = oechem.OECount(refmolhev, oechem.OEIsHeavy()) heavyatomdict[molname] = heavyvalue refmolin.close() # Gets TanimotoCombo and TFD values for i, j in list(itertools.combinations(ffdirectorylist, 2)): refmolin = oechem.oemolistream('%s/%s/%s' % (directory, i, mol_file)) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(refmolin, refmol) qmolin = oechem.oemolistream('%s/%s/%s' % (directory, j, mol_file)) qmol = oechem.OEGraphMol() oechem.OEReadMolecule(qmolin, qmol) # Getting TFD TFDvalue = TFD_for_oemols(refmol, qmol) TFDdict['%s %s' % (i, j)]['%s' % molname] = TFDvalue # Getting TanimotoCombo TANIvalue = tanimotocombo(refmol, qmol) TANIdict['%s %s' % (i, j)][molname] = TANIvalue qmolin.close() refmolin.close() except: heavyatomdict[molname] = -1 for i, j in list(itertools.combinations(ffdirectorylist, 2)): TANIdict['%s %s' % (i, j)][molname] = -1 TFDdict['%s %s' % (i, j)][molname] = -1 qmolin.close() refmolin.close() # Loads data into dataframe for key in TFDdict: tempdf = pd.DataFrame.from_dict(TFDdict['%s' % key], 'index') tempdf = tempdf.rename({0: 'TFD %s' % key}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') for key in TANIdict: tempdf = pd.DataFrame.from_dict(TANIdict['%s' % key], 'index') tempdf = tempdf.rename({0: 'TANI %s' % key}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') tempdf = pd.DataFrame.from_dict(heavyatomdict, orient="index") tempdf = tempdf.rename({0: 'HeavyAtomCount'}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') return all_ff_df
def process(self, record, port): if record.has_value(self.args.in_mol_field): mol = record.get_value(self.args.in_mol_field) else: self.log.error("Could not find molecules in OEMolRecord") self.failure.emit(record) return parent_torsion_tag = "TORSION_ATOMS_ParentMol" torsion_atoms_in_parent = get_sd_data(mol, parent_torsion_tag).split() dih_name = mol.GetTitle() + "_" + "_".join(torsion_atoms_in_parent) torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment] if dihedral_atom_indices is None: self.log.warn("Unable to find labelled torsion in %s" % dih_name) self.failure.emit(record) return opt_basis = self.args.opt_basis spe_basis = self.args.spe_basis # If fragment contains S # use 6-31+G* instead of 6-31G* # use 6-31+G** instead of 6-31G** need_diffuse = False if oechem.OECount(mol, oechem.OEIsSulfur()) > 0: need_diffuse = True for atom in mol.GetAtoms(oechem.OEIsHeavy()): if atom.GetFormalCharge() < 0: need_diffuse = True if need_diffuse: if opt_basis == "6-31G*": self.log.warn( "Using 6-31+G* instead of 6-31G* as opt basis because fragment contains S." ) opt_basis = "6-31+G*" if spe_basis == "6-31G*": self.log.warn( "Using 6-31+G* instead of 6-31G* as spe basis because fragment contains S." ) spe_basis = "6-31+G*" if opt_basis == "6-31G**": self.log.warn( "Using 6-31+G** instead of 6-31G** as opt basis because fragment contains S." ) opt_basis = "6-31+G**" if spe_basis == "6-31G**": self.log.warn( "Using 6-31+G** instead of 6-31G** as spe basis because fragment contains S." ) spe_basis = "6-31+G**" try: if self.args.only_selected_conformer: conf_selection_tag = "SELECTED_CONFORMER" key_conf_id = mol.GetIntData(conf_selection_tag) for conf in mol.GetConfs(): if conf.GetIdx() != key_conf_id: continue conf_name = get_sd_data(conf, "CONFORMER_LABEL") else: conf_name = get_sd_data(mol, "CONFORMER_LABEL") time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) hostname = socket.gethostname() self.log.info("Starting psi4 calculation for %s on %s at %s" % (conf_name, hostname, time_stamp)) if self.args.only_selected_conformer: oechem.OESetSDData(conf, "%s start time" % self.name, time_stamp) else: oechem.OESetSDData(mol, "%s start time" % self.name, time_stamp) dih, _ = get_dihedral(mol, dihedral_atom_indices) calculate_energy( mol, dih, spe_method=self.args.spe_method, spe_basis=spe_basis, geom_opt_technique=self.args.geom_opt_technique, opt_method=self.args.opt_method, opt_basis=opt_basis, geom_maxiter=self.args.geom_maxiter, only_selected_conf=self.args.only_selected_conformer, molden_output=self.args.molden_output, **self.psi4opts) if self.args.only_selected_conformer: conf_selection_tag = "SELECTED_CONFORMER" key_conf_id = mol.GetIntData(conf_selection_tag) for conf in mol.GetConfs(): if conf.GetIdx() != key_conf_id: continue conf_name = get_sd_data(conf, "CONFORMER_LABEL") else: conf_name = get_sd_data(mol, "CONFORMER_LABEL") time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) hostname = socket.gethostname() self.log.info("Completed psi4 calculation for %s on %s at %s" % (conf_name, hostname, time_stamp)) if self.args.only_selected_conformer: oechem.OESetSDData(conf, "%s end time" % self.name, time_stamp) else: oechem.OESetSDData(mol, "%s end time" % self.name, time_stamp) optimized_mol_record = OEMolRecord() optimized_mol_record.set_mol(mol) self.success.emit(optimized_mol_record) except Exception as e: print(e) # traceback.print_stack() self.log.error("Error with {} {}".format(mol.GetTitle(), e)) self.failure.emit(record)