def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) oemolprop.OEConfigureFilterParams(itf) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line!") iname = itf.GetString("-in") ifs = oechem.oemolistream() if not ifs.open(iname): oechem.OEThrow.Fatal("Cannot open input file!") ftype = oemolprop.OEGetFilterType(itf) filt = oemolprop.OEFilter(ftype) ver = itf.GetInt("-verbose") oechem.OEThrow.SetLevel(ver) ostr = oechem.oeosstream() pwnd = False filt.SetTable(ostr, pwnd) headers = ostr.str().split(b'\t') ostr.clear() # remove the header row from the stream for mol in ifs.GetOEGraphMols(): filt(mol) fields = ostr.str().decode("UTF-8").split('\t') ostr.clear() # remove this row from the stream tmpdct = dict(zip(headers, fields)) print(mol.GetTitle(), tmpdct[b"Lipinski violations"])
def __init__(self, *args, **kwargs): super(MoleculeSerializerMixin, self).__init__(*args, **kwargs) self._ifs = oemolistream() self._ifs.SetFormat(OEFormat_OEB) self._ifs.Setgz(True) errs = oeosstream() self._ofs = oemolostream(errs, False) self._ofs.openstring() self._ofs.Setgz(True) self._ofs.SetFormat(OEFormat_OEB)
def depict(mol, width=500, height=200): from IPython.display import Image dopt = oedepict.OEPrepareDepictionOptions() dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal) oedepict.OEPrepareDepiction(mol, dopt) opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) disp = oedepict.OE2DMolDisplay(mol, opts) ofs = oechem.oeosstream() oedepict.OERenderMolecule(ofs, 'png', disp) ofs.flush() return Image(data = "".join(ofs.str()))
def _oe_capture_warnings(): # pragma: no cover from openeye import oechem output_stream = oechem.oeosstream() oechem.OEThrow.SetOutputStream(output_stream) oechem.OEThrow.Clear() yield oechem.OEThrow.SetOutputStream(oechem.oeerr)
def depict(mol, width=500, height=200): from IPython.display import Image dopt = oedepict.OEPrepareDepictionOptions() dopt.SetDepictOrientation(oedepict.OEDepictOrientation_Horizontal) oedepict.OEPrepareDepiction(mol, dopt) opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) disp = oedepict.OE2DMolDisplay(mol, opts) ofs = oechem.oeosstream() oedepict.OERenderMolecule(ofs, 'png', disp) ofs.flush() return Image(data="".join(ofs.str()))
def filter_molecules(input_molstream, output_molstream, allow_repeats=False, allow_warnings=False, max_heavy_atoms=100, remove_smirks=list(), max_metals=0, explicitHs=True, elements=None, check_type=None): """ Takes input file and removes molecules using given criteria then writes a new output file """ errs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(errs) molecule = oechem.OECreateOEGraphMol() smiles = list() count = 0 warnings = 0 smile_count = 0 saved = 0 while oechem.OEReadMolecule(input_molstream, molecule): count += 1 if ("warning" in errs.str().lower()) and not allow_warnings: warnings += 1 errs.clear() continue smi = oechem.OECreateIsoSmiString(molecule) mol_copy = oechem.OEMol(molecule) if explicitHs: oechem.OEAddExplicitHydrogens(mol_copy) new_smile = smi not in smiles if not new_smile: smile_count += 1 if new_smile or allow_repeats: keep = keep_molecule(mol_copy, max_heavy_atoms, remove_smirks, max_metals, elements, check_type) if keep: smiles.append(smi) oechem.OEWriteMolecule(output_molstream, mol_copy) saved += 1 errs.clear() print(f"{count} molecules in input stream") print(f"{warnings} molecules resulted in warnings when parsing") print(f"{smile_count} molecules were had repeated isomeric SMILES") print(f"{saved} molecules saved")
def depictMatch(mol, match, width=500, height=200): from IPython.display import Image dopt = oedepict.OEPrepareDepictionOptions() dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal) dopt.SetSuppressHydrogens(True) oedepict.OEPrepareDepiction(mol, dopt) opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) disp = oedepict.OE2DMolDisplay(mol, opts) hstyle = oedepict.OEHighlightStyle_Color hcolor = oechem.OEColor(oechem.OELightBlue) oedepict.OEAddHighlighting(disp, hcolor, hstyle, match) ofs = oechem.oeosstream() oedepict.OERenderMolecule(ofs, 'png', disp) ofs.flush() return Image(data = "".join(ofs.str()))
def call_openeye( oe_callable: Callable[[T], bool], *args: T, exception_type: Type[RechargeException] = RuntimeError, exception_kwargs: Dict[str, Any] = None, ): """Wraps a call to an OpenEye function, either capturing the output in an exception if the function does not complete successfully, or redirecting it to the logger. Parameters ---------- oe_callable The OpenEye function to call. args The arguments to pass to the OpenEye function. exception_type: The type of exception to raise when the function does not successfully complete. exception_kwargs The keyword arguments to pass to the exception. """ from openeye import oechem if exception_kwargs is None: exception_kwargs = {} output_stream = oechem.oeosstream() oechem.OEThrow.SetOutputStream(output_stream) oechem.OEThrow.Clear() status = oe_callable(*args) oechem.OEThrow.SetOutputStream(oechem.oeerr) output_string = output_stream.str().decode("UTF-8") output_string = output_string.replace("Warning: ", "") output_string = re.sub("^: +", "", output_string, flags=re.MULTILINE) output_string = re.sub("\n$", "", output_string) if not status: # noinspection PyArgumentList raise exception_type("\n" + output_string, **exception_kwargs) elif len(output_string) > 0: logging.debug(output_string)
def depictMatch(mol, match, width=500, height=200): from IPython.display import Image dopt = oedepict.OEPrepareDepictionOptions() dopt.SetDepictOrientation(oedepict.OEDepictOrientation_Horizontal) dopt.SetSuppressHydrogens(True) oedepict.OEPrepareDepiction(mol, dopt) opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale) disp = oedepict.OE2DMolDisplay(mol, opts) hstyle = oedepict.OEHighlightStyle_Color hcolor = oechem.OEColor(oechem.OELightBlue) oedepict.OEAddHighlighting(disp, hcolor, hstyle, match) ofs = oechem.oeosstream() oedepict.OERenderMolecule(ofs, 'png', disp) ofs.flush() return Image(data="".join(ofs.str()))
def GetUrlSDF2SMI(url, fout, ntries=20, poll_wait=10): '''Get PubChem SDF.GZ, convert to SMILES using the SD tag PUBCHEM_OPENEYE_CAN_SMILES or PUBCHEM_OPENEYE_ISO_SMILES, and PUBCHEM_COMPOUND_CID or PUBCHEM_COMPOUND_SID for name.''' import openeye.oechem as oechem def HandleOEErrors(oeerrs, nowarn): errstr = oeerrs.str() for line in errstr.split('\n'): if not line.rstrip(): continue if re.search('Warning', line, re.I) and nowarn: continue sys.stderr.write("%s\n" % line) oeerrs.clear() fout_tmp = tempfile.NamedTemporaryFile(prefix='pubchem_ftp_', suffix='.sdf.gz', delete=False) GetUrl(url, fout_tmp, ntries, poll_wait) fpath_tmp = fout_tmp.name logging.debug('fpath_tmp = %s' % fpath_tmp) fout_tmp.close() ims = oechem.oemolistream(fpath_tmp) ims.SetFormat(oechem.OEFormat_SDF) ims.Setgz(True) mol = oechem.OEGraphMol() nbytes = 0 oeerrs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(oeerrs) while oechem.OEReadMolecule(ims, mol): cid = oechem.OEGetSDData(mol, 'PUBCHEM_COMPOUND_CID') cansmi = oechem.OEGetSDData(mol, 'PUBCHEM_OPENEYE_CAN_SMILES') isosmi = oechem.OEGetSDData(mol, 'PUBCHEM_OPENEYE_ISO_SMILES') buff = ("%s %s\n" % (isosmi, cid)) fout.write(buff) nbytes += len(buff) HandleOEErrors(oeerrs, True) os.remove(fpath_tmp) return nbytes
def SeqAlign(ref, fit, ofs): sa = oechem.OEGetAlignment(ref, fit) print() print("Alignment of %s to %s" % (fit.GetTitle(), ref.GetTitle())) print() print(" Method: %s" % oechem.OEGetAlignmentMethodName(sa.GetMethod())) print(" Gap : %d" % sa.GetGap()) print(" Extend: %d" % sa.GetExtend()) print(" Score : %d" % sa.GetScore()) print() oss = oechem.oeosstream() oechem.OEWriteAlignment(oss, sa) print(oss.str().decode("UTF-8")) onlyCAlpha = True overlay = True rot = oechem.OEDoubleArray(9) trans = oechem.OEDoubleArray(3) rmsd = oechem.OERMSD(ref, fit, sa, onlyCAlpha, overlay, rot, trans) print(" RMSD = %.1f" % rmsd) oechem.OERotate(fit, rot) oechem.OETranslate(fit, trans) oechem.OEWriteMolecule(ofs, fit)
def enumerate_conformations(name, smiles=None, pdbname=None, user_mol2=None): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') user_mol2 : str User prepared mol2 file to use as input instead of PDB retrieved files """ # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) if pdbname: # Make sure to only use one entry if there are mutliple if ' ' in pdbname: pdbnames = pdbname.split(' ') print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0])) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname) pdb_filename = output_basepath + '-input.pdb' retrieve_url(url, pdb_filename) pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname) sdf_filename = output_basepath + '-input.sdf' retrieve_url(url, sdf_filename) sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname elif smiles: # Generate molecule geometry with OpenEye print("Generating molecule {}".format(name)) oe_molecule = openeye.smiles_to_oemol(smiles) # Assign Tripos atom types oechem.OETriposAtomTypeNames(oe_molecule) oechem.OETriposBondTypeNames(oe_molecule) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print("Skipping molecule " + name) return residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] else: raise Exception('Must provide SMILES string or pdbname') # Handling of OpenEye output oehandler = oechem.OEThrow # String stream output oss = oechem.oeosstream() oehandler.SetOutputStream(oss) # Save mol2 file, preserving atom names print("Running epik on molecule {}".format(name)) mol2_file_path = output_basepath + '-input.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4) # Convert maestro file to sdf and mol2 output_sdf_filename = output_basepath + '-epik.sdf' output_mol2_filename = output_basepath + '-epik.mol2' schrodinger.run_structconvert(mae_file_path, output_sdf_filename) schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Allow user to provide custom file instead, if located in the right location. if user_mol2 is not None: if os.path.isfile(user_mol2): output_mol2_filename = user_mol2 else: raise IOError("No such file: {}".format(user_mol2)) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. failed_molecules = dict() charged_molecules = list() index = 0 while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 print("Charging molecule %d" % (index)) try: # Charge molecule. oehandler.Clear() # fix bonds oechem.OEAssignAromaticFlags( mol2_molecule) # Assign Tripos types oechem.OETriposAtomTypeNames( mol2_molecule) oechem.OETriposBondTypeNames( mol2_molecule) charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None, legacy=True) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) except Exception as e: identifier = "{:s}_{:04d}".format(name, index) OEOutput = str(oss) failed_molecules[identifier] = tuple([deepcopy(mol2_molecule), str(oss) + "\n" + str(e)]) print(e) print("Skipping protomer/tautomer because of failed charging.") oehandler.Clear() # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalites. outfile = open(output_basepath + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = output_basepath + '-epik-charged.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) #oechem.OEWritePDBFile(ofs, charged_molecule, flavor) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = output_basepath + '-epik-charged.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name) os.makedirs("Failed_molecules", exist_ok=True) if len(failed_molecules) > 0: for name_state, (state_oemol, error_message) in failed_molecules.items(): write_mol2_preserving_atomnames("Failed_molecules/{}.mol2".format(name_state), state_oemol, name_state) with open("Failed_molecules/{}.err".format(name_state), 'w') as error_file: error_file.write(error_message)
def enumerate_conformations(name, pdbfile=None, smiles=None, pdbname=None, pH=7.4): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder # output_basepath = os.path.join(output_dir, name) # if not os.path.isdir(output_basepath): # os.mkdir(output_basepath) # output_basepath = os.path.join(output_basepath, name) oehandler = openeye.oechem.OEThrow # String stream output oss = oechem.oeosstream() oehandler.SetOutputStream(oss) log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format( **locals()) success_status = True if pdbname: # Make sure to only use one entry if there are multiple if ' ' in pdbname: pdbnames = pdbname.split(' ') log += "Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % ( pdbname[0], pdbname, pdbname) pdb_filename = name + '-rcsb_download.pdb' log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format( pdb_filename) retrieve_url(url, pdb_filename) log += "Parsing PDB file.\n" pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % ( pdbname[0], pdbname, pdbname) sdf_filename = name + '-rcsb_download.sdf' log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format( sdf_filename) retrieve_url(url, sdf_filename) log += "Parsing SDF file.\n" sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF log += "Canonicalizing atom names.\n" for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types log += "Assign atom type names.\n" oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname # For the moment, disabling these two types of input # elif smiles: # # Generate molecule geometry with OpenEye # logging.info(("Generating molecule {}".format(name))) # oe_molecule = openeye.smiles_to_oemol(smiles) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("resname = %s", residue_name) # oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert # elif pdbfile: # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile)) # oe_molecule = read_molecule(pdbfile) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return else: raise Exception('Must provide SMILES string or pdbname, or pdbfile') # Save mol2 file, preserving atom names log += "Running Epik.\n" mol2_file_path = name + '-before_epik.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = name + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=50, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=pH) log += "Epik run completed.\n" # Convert maestro file to sdf and mol2 output_sdf_filename = name + '-after_epik.sdf' output_mol2_filename = name + '-after_epik.mol2' # logging.info("Creating sdf") schrodinger.run_structconvert(mae_file_path, output_sdf_filename) # logging.info("Creating mol2") schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. # reset count of error handler oehandler.Clear() log += "Assigning charges to protonation states.\n" charged_molecules = list() index = 0 failed_states = set() while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 log += "State {0:d}\n".format(index) try: # Charge molecule. charged_molecule_conformers = omtoe.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=-1) log += "Charging stage output:\n" OEOutput = str(oss) log += OEOutput log += "\nCharging state completed.\n" # Restore coordinates to original charged_molecule = select_conformers(charged_molecule_conformers, mol2_molecule, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) # Check for failure in the log openeye_charge_log_parser(OEOutput, True) oehandler.Clear() except Exception as e: failed_states.add(index) logging.info(e) log += "State failed charging.\n" log += str(e) log += "\n" filename_failure = name + '-conformers-failed-state-{}-.mol2'.format( index) try: write_mol2_preserving_atomnames(filename_failure, charged_molecule_conformers, residue_name) except: log += "Could not store result, most likely failed during Omega step!\n" success_status = False oehandler.Clear() # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalties. outfile = open(name + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. log += "Writing Epik data for state {:d}\n".format(index + 1) epik_Ionization_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = name + '-charged_output.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = name + '-charged_output.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name) log += "Run completed.\n" if success_status: log += "Status: Success\n" else: log += "Status: Failure\n" log += "Failed states: {}\n".format(" ".join( [str(state) for state in sorted(list(failed_states))])) with open("log.txt", 'w') as logfile: logfile.write(log) return log, success_status
import sys from openeye import oechem as oe import common class MyAromaticSmilesWriter(common.AromaticSmilesWriter): def getoutput(self, smi): mol = oe.OEGraphMol() ok = oe.OEParseSmiles(mol, smi) assert ok oe.OEAssignAromaticFlags(mol) return oe.OECreateSmiString(mol, 0) msgstream = oe.oeosstream() oe.OEThrow.SetOutputStream(msgstream) class MyHydrogenCounter(common.HydrogenCounter): def getoutput(self, smi): mol = oe.OEGraphMol() msgstream.clear() ok = oe.OEParseSmiles(mol, smi) if not ok: msg = msgstream.str().decode("utf-8") if "Kekul" in msg: return None, "Kekulization_failure" else: return None, "Parse_error"
skip_oechem = False from openeye import oechem if has_oechem: from chemfp.commandline import oe2fps import chemfp.openeye chemfp.openeye._USE_SELECT = False # Grrr. Needed to automate testing. real_stdout = sys.stdout real_stderr = sys.stderr PUBCHEM_SDF = support.fullpath("pubchem.sdf") PUBCHEM_SDF_GZ = support.fullpath("pubchem.sdf.gz") PUBCHEM_ANOTHER_EXT = support.fullpath("pubchem.should_be_sdf_but_is_not") oeerrs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(oeerrs) def _check_for_oe_errors(): lines = oeerrs.str().splitlines() for line in lines: if line.startswith( "Warning: Stereochemistry corrected on atom number"): continue if line.startswith("Warning: Unknown file format set in input stream"): # There's a bug in OEChem where it generates this warning on unknown # file extensions even after SetFormat has been called continue raise AssertionError("Unexpected message from OEChem: %r" % (line, ))
def prepare_receptor(complex_pdb_filename, output_basepath, dimer=False, retain_water=False): """ Parameters ---------- complex_pdb_filename : str The complex PDB file to read in output_basepath : str Base path for output dimer : bool, optional, default=False If True, generate the dimer as the biological unit retain_water : bool, optional, default=False If True, will retain waters """ # Check whether this is a diamond SARS-CoV-2 Mpro structure or not import re is_diamond_structure = (re.search('-x\d+_', complex_pdb_filename) is not None) import os basepath, filename = os.path.split(complex_pdb_filename) prefix, extension = os.path.splitext(filename) prefix = os.path.join(output_basepath, prefix) # Check if receptor already exists receptor_filename = f'{prefix}-receptor.oeb.gz' thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz' if os.path.exists(receptor_filename) and os.path.exists( thiolate_receptor_filename): return # Read in PDB file, skipping UNK atoms (left over from processing covalent ligands) pdbfile_lines = [ line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line ] # Check if biological symmetry header is present has_biological_symmetry_header = False for line in pdbfile_lines: if 'REMARK 350' in line: has_biological_symmetry_header = True break # Prepend REMARK 350 (biological symmetry) header lines for Mpro (from 5RGG) if not present if is_diamond_structure and (not has_biological_symmetry_header): pdbfile_lines = [ line + '\n' for line in BIOLOGICAL_SYMMETRY_HEADER.split('\n') ] + pdbfile_lines # If monomer is specified, drop crystal symmetry lines if not dimer: pdbfile_lines = [ line for line in pdbfile_lines if 'REMARK 350' not in line ] # Filter out waters if not retain_water: pdbfile_lines = [line for line in pdbfile_lines if 'HOH' not in line] # Filter out LINK records to covalent inhibitors so we can model non-covalent complex pdbfile_lines = [line for line in pdbfile_lines if 'LINK' not in line] # Reconstruct PDBFile contents pdbfile_contents = ''.join(pdbfile_lines) # Append SEQRES to all structures if they do not have it seqres = """\ SEQRES 1 A 306 SER GLY PHE ARG LYS MET ALA PHE PRO SER GLY LYS VAL SEQRES 2 A 306 GLU GLY CYS MET VAL GLN VAL THR CYS GLY THR THR THR SEQRES 3 A 306 LEU ASN GLY LEU TRP LEU ASP ASP VAL VAL TYR CYS PRO SEQRES 4 A 306 ARG HIS VAL ILE CYS THR SER GLU ASP MET LEU ASN PRO SEQRES 5 A 306 ASN TYR GLU ASP LEU LEU ILE ARG LYS SER ASN HIS ASN SEQRES 6 A 306 PHE LEU VAL GLN ALA GLY ASN VAL GLN LEU ARG VAL ILE SEQRES 7 A 306 GLY HIS SER MET GLN ASN CYS VAL LEU LYS LEU LYS VAL SEQRES 8 A 306 ASP THR ALA ASN PRO LYS THR PRO LYS TYR LYS PHE VAL SEQRES 9 A 306 ARG ILE GLN PRO GLY GLN THR PHE SER VAL LEU ALA CYS SEQRES 10 A 306 TYR ASN GLY SER PRO SER GLY VAL TYR GLN CYS ALA MET SEQRES 11 A 306 ARG PRO ASN PHE THR ILE LYS GLY SER PHE LEU ASN GLY SEQRES 12 A 306 SER CYS GLY SER VAL GLY PHE ASN ILE ASP TYR ASP CYS SEQRES 13 A 306 VAL SER PHE CYS TYR MET HIS HIS MET GLU LEU PRO THR SEQRES 14 A 306 GLY VAL HIS ALA GLY THR ASP LEU GLU GLY ASN PHE TYR SEQRES 15 A 306 GLY PRO PHE VAL ASP ARG GLN THR ALA GLN ALA ALA GLY SEQRES 16 A 306 THR ASP THR THR ILE THR VAL ASN VAL LEU ALA TRP LEU SEQRES 17 A 306 TYR ALA ALA VAL ILE ASN GLY ASP ARG TRP PHE LEU ASN SEQRES 18 A 306 ARG PHE THR THR THR LEU ASN ASP PHE ASN LEU VAL ALA SEQRES 19 A 306 MET LYS TYR ASN TYR GLU PRO LEU THR GLN ASP HIS VAL SEQRES 20 A 306 ASP ILE LEU GLY PRO LEU SER ALA GLN THR GLY ILE ALA SEQRES 21 A 306 VAL LEU ASP MET CYS ALA SER LEU LYS GLU LEU LEU GLN SEQRES 22 A 306 ASN GLY MET ASN GLY ARG THR ILE LEU GLY SER ALA LEU SEQRES 23 A 306 LEU GLU ASP GLU PHE THR PRO PHE ASP VAL VAL ARG GLN SEQRES 24 A 306 CYS SER GLY VAL THR PHE GLN """ has_seqres = 'SEQRES' in pdbfile_contents if not has_seqres: #print('Adding SEQRES') pdbfile_contents = seqres + pdbfile_contents # Read the receptor and identify design units from openeye import oespruce, oechem from tempfile import NamedTemporaryFile with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile: pdbfile.write(pdbfile_contents) pdbfile.close() complex = read_pdb_file(pdbfile.name) # TODO: Clean up # Strip protons from structure to allow SpruceTK to add these back # See: 6wnp, 6wtj, 6wtk, 6xb2, 6xqs, 6xqt, 6xqu, 6m2n #print('Suppressing hydrogens') #print(f' Initial: {sum([1 for atom in complex.GetAtoms()])} atoms') for atom in complex.GetAtoms(): if atom.GetAtomicNum() > 1: oechem.OESuppressHydrogens(atom) #print(f' Final: {sum([1 for atom in complex.GetAtoms()])} atoms') # Delete and rebuild C-terminal residue because Spruce causes issues with this # See: 6m2n 6lze #print('Deleting C-terminal residue O') pred = oechem.OEIsCTerminalAtom() for atom in complex.GetAtoms(): if pred(atom): for nbor in atom.GetAtoms(): if oechem.OEGetPDBAtomIndex(nbor) == oechem.OEPDBAtomName_O: complex.DeleteAtom(nbor) #pred = oechem.OEAtomMatchResidue(["GLN:306:.*:.*:.*"]) #for atom in complex.GetAtoms(pred): # if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_O: # print('Deleting O') # complex.DeleteAtom(atom) #het = oespruce.OEHeterogenMetadata() #het.SetTitle("LIG") # real ligand 3 letter code #het.SetID("CovMoonShot1234") # in case you have corporate IDs #het.SetType(oespruce.OEHeterogenType_Ligand) # mdata.AddHeterogenMetadata(het) #print('Identifying design units...') # Produce zero design units if we fail to protonate # Log warnings errfs = oechem.oeosstream( ) # create a stream that writes internally to a stream oechem.OEThrow.SetOutputStream(errfs) oechem.OEThrow.Clear() oechem.OEThrow.SetLevel( oechem.OEErrorLevel_Verbose) # capture verbose error output opts = oespruce.OEMakeDesignUnitOptions() #print(f'ligand atoms: min {opts.GetSplitOptions().GetMinLigAtoms()}, max {opts.GetSplitOptions().GetMaxLigAtoms()}') opts.GetSplitOptions().SetMinLigAtoms( 7) # minimum fragment size (in heavy atoms) mdata = oespruce.OEStructureMetadata() opts.GetPrepOptions().SetStrictProtonationMode(True) # Both N- and C-termini should be zwitterionic # Mpro cleaves its own N- and C-termini # See https://www.pnas.org/content/113/46/12997 opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False) opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False) # Don't allow truncation of termini, since force fields don't have parameters for this opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions( ).SetAllowTruncate(False) # Build loops and sidechains opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True) opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True) # Don't flip Gln189 #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"]) pred = oechem.OEAtomMatchResidue(["GLN:189:.*:.*:.*"]) protonate_opts = opts.GetPrepOptions().GetProtonateOptions() place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions() #place_hydrogens_opts.SetBypassPredicate(pred) place_hydrogens_opts.SetNoFlipPredicate(pred) #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts) #opts.GetPrepOptions().SetProtonateOptions(protonate_options); # Make design units design_units = list(oespruce.OEMakeDesignUnits(complex, mdata, opts)) # Restore error stream oechem.OEThrow.SetOutputStream(oechem.oeerr) # Capture the warnings to a string warnings = errfs.str().decode("utf-8") if len(design_units) >= 1: design_unit = design_units[0] print('') print('') print(f'{complex_pdb_filename} : SUCCESS') print(warnings) elif len(design_units) == 0: print('') print('') print(f'{complex_pdb_filename} : FAILURE') print(warnings) msg = f'No design units found for {complex_pdb_filename}\n' msg += warnings msg += '\n' raise Exception(msg) # Prepare the receptor #print('Preparing receptor...') from openeye import oedocking protein = oechem.OEGraphMol() design_unit.GetProtein(protein) ligand = oechem.OEGraphMol() design_unit.GetLigand(ligand) # Create receptor and other files receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, receptor_filename) with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs: oechem.OEWriteMolecule(ofs, ligand) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines)) # Adjust protonation state of CYS145 to generate thiolate form #print('Deprotonating CYS145...') # DEBUG #pred = oechem.OEAtomMatchResidue(["CYS:145: :A"]) pred = oechem.OEAtomMatchResidue(["CYS:145:.*:.*:.*"]) place_hydrogens_opts.SetBypassPredicate(pred) for atom in protein.GetAtoms(pred): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG: #print('Modifying CYS 145 SG') oechem.OESuppressHydrogens(atom) atom.SetFormalCharge(-1) atom.SetImplicitHCount(0) #print('Protonating HIS41...') # DEBUG #pred = oechem.OEAtomMatchResidue(["HIS:41: :A"]) pred = oechem.OEAtomMatchResidue(["HIS:41:.*:.*:.*"]) place_hydrogens_opts.SetBypassPredicate(pred) for atom in protein.GetAtoms(pred): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_ND1: #print('Protonating HIS 41 ND1') oechem.OESuppressHydrogens(atom) # strip hydrogens from residue atom.SetFormalCharge(+1) atom.SetImplicitHCount(1) # Update the design unit with the modified formal charge for CYS 145 SG oechem.OEUpdateDesignUnit(design_unit, protein, oechem.OEDesignUnitComponents_Protein) # Don't flip Gln189 #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"]) #protonate_opts = opts.GetPrepOptions().GetProtonateOptions(); #place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions() #place_hydrogens_opts.SetNoFlipPredicate(pred) # Adjust protonation states #print('Re-optimizing hydrogen positions...') # DEBUG #place_hydrogens_opts = oechem.OEPlaceHydrogensOptions() #place_hydrogens_opts.SetBypassPredicate(pred) #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts) success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts) design_unit.GetProtein(protein) # Write thiolate form of receptor receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename) with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein-thiolate.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines))
def MMPTransform(itf): # input structure(s) to process ifsmols = oechem.oemolistream() if not ifsmols.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-input")) # check MMP index mmpimport = itf.GetString("-mmpindex") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport): oechem.OEThrow.Fatal( 'Not a valid matched pair index input file, {}'.format(mmpimport)) # load MMP index mmp = oemedchem.OEMatchedPairAnalyzer() if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp): oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport)) if not mmp.NumMols(): oechem.OEThrow.Fatal( 'No records in loaded MMP index file: {}'.format(mmpimport)) if not mmp.NumMatchedPairs(): oechem.OEThrow.Fatal( 'No matched pairs found in MMP index file, ' + 'use -fragGe,-fragLe options to extend indexing range') # output (transformed) structure(s) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-output")): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-output")) # request a specific context for the transform activity, here 0-bonds chemctxt = oemedchem.OEMatchedPairContext_Bond0 askcontext = itf.GetString("-context")[:1] if askcontext == '0': chemctxt = oemedchem.OEMatchedPairContext_Bond0 elif askcontext == '1': chemctxt = oemedchem.OEMatchedPairContext_Bond1 elif askcontext == '2': chemctxt = oemedchem.OEMatchedPairContext_Bond2 elif askcontext == '3': chemctxt = oemedchem.OEMatchedPairContext_Bond3 elif askcontext == 'a' or askcontext == 'A': chemctxt = oemedchem.OEMatchedPairContext_AllBonds else: oechem.OEThrow.Fatal("Invalid context specified: " + askcontext + ", only 0|1|2|3|A allowed") verbose = itf.GetBool("-verbose") # return some status information if verbose: oechem.OEThrow.Info("{}: molecules: {:d}, matched pairs: {:,d}".format( mmpimport, mmp.NumMols(), mmp.NumMatchedPairs())) minpairs = itf.GetInt("-minpairs") if minpairs > 1 and verbose: oechem.OEThrow.Info( 'Requiring at least %d matched pairs to apply transformations' % minpairs) errs = None if itf.GetBool("-nowarnings"): errs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(errs) orec = 0 ocnt = 0 for mol in ifsmols.GetOEGraphMols(): orec += 1 iter = oemedchem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt, minpairs) if not iter.IsValid(): if verbose: # as minpairs increases, fewer transformed mols are generated - output if requested name = mol.GetTitle() if not mol.GetTitle(): name = 'Record ' + str(orec) oechem.OEThrow.Info("%s did not produce any output" % name) continue if errs is not None: errs.clear() for outmol in iter: ocnt += 1 oechem.OEWriteMolecule(ofs, outmol) if errs is not None: errs.clear() if not orec: oechem.OEThrow.Fatal('No records in input structure file to transform') if not ocnt: oechem.OEThrow.Warning('No transformed structures generated') print("Input molecules={} Output molecules={}".format(orec, ocnt)) return 0
def MMPIndex(itf): # checking input structures ifsindex = oechem.oemolistream() if not ifsindex.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open {} for reading".format( itf.GetString("-input"))) ifsindex.close() verbose = itf.GetBool("-verbose") vverbose = itf.GetBool("-vverbose") if vverbose: verbose = True # output index file mmpindexfile = itf.GetString("-output") if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile): oechem.OEThrow.Fatal("Output file is not a matched pair index type - \ needs .mmpidx extension: {}".format(mmpindexfile)) # create options class with defaults mmpopts = oemedchem.OEMatchedPairAnalyzerOptions() # set up options from command line if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf): oechem.OEThrow.Fatal("Error setting matched pair indexing options!") if verbose: if not mmpopts.HasIndexableFragmentHeavyAtomRange(): oechem.OEThrow.Info("Indexing all fragments") else: oechem.OEThrow.Info( "Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules" .format(mmpopts.GetIndexableFragmentRangeMin(), mmpopts.GetIndexableFragmentRangeMax())) if itf.GetInt("-maxrec") and verbose: oechem.OEThrow.Info("Indexing a maximum of {} records".format( itf.GetInt("-maxrec"))) if itf.GetBool("-exportcompress"): if verbose: oechem.OEThrow.Info("Removing singleton index nodes from index") if not mmpopts.SetOptions( mmpopts.GetOptions() | oemedchem.OEMatchedPairOptions_ExportCompression): oechem.OEThrow.Warning("Error enabling export compression!") # set indexing options indexopts = oemedchem.OECreateMMPIndexOptions(mmpopts) # set requested verbosity setting if vverbose: indexopts.SetVerbose(2) elif verbose: indexopts.SetVerbose(1) # limit number of records to process indexopts.SetMaxRecord(itf.GetInt("-maxrec")) # set number of threads to use indexopts.SetNumThreads(itf.GetInt("-threads")) if verbose: if not indexopts.GetNumThreads(): oechem.OEThrow.Info( "Using the maximum number of threads available") else: oechem.OEThrow.Info("Limiting indexing to {} thread(s)".format( indexopts.GetNumThreads())) errs = None if itf.GetBool("-nowarnings"): errs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(errs) if verbose: oechem.OEThrow.Info( "Threaded indexing of {}, all SD data will be preserved".format( itf.GetString("-input"))) # create index indexstatus = oemedchem.OECreateMMPIndexFile(mmpindexfile, itf.GetString("-input"), indexopts) dupes = 0 if errs is not None: oechem.OEThrow.SetOutputStream(oechem.oeout) for err in errs.str().decode().split('\n'): err = err.rstrip() if not err: continue if verbose: oechem.OEThrow.Info(err) if 'ignoring duplicate molecule,' in err: dupes += 1 if not indexstatus.IsValid(): oechem.OEThrow.Fatal('Invalid status returned from indexing!') if not indexstatus.GetTotalMols(): oechem.OEThrow.Fatal('No records in index structure file: {}'.format( itf.GetString("-input"))) if dupes: oechem.OEThrow.Info( 'Found {} duplicate structures during indexing'.format(dupes)) if not indexstatus.GetNumMatchedPairs(): oechem.OEThrow.Fatal( 'No matched pairs found from indexing, ' + 'use -fragGe,-fragLe options to extend indexing range') # return some status information oechem.OEThrow.Info( "Records: {}, Indexed: {}, matched pairs: {:,d}".format( indexstatus.GetTotalMols(), indexstatus.GetNumMols(), indexstatus.GetNumMatchedPairs())) return 0
def eMolecules_filtering(input_f, current_smiles = list()): """ This function was used to filter eMolecules database and the eMolecules_incremental database. It creates all the filtered output files with 1000 molecules in each sdf file and 1,000,000 molecule-ID to smiles strings in each text file Parameter --------- input_f : string "path/to/inputfile.sdf" current_smiles : list of strings; smiles already in your molecule sets """ set_name = input_f.split('.')[0] output_f = set_name+"_%i.sdf" smiles_base = set_name+"_%i.txt" molecule_name = set_name+"_%i_%i" # Load and check input file ifs = oechem.oemolistream(input_f) if not ifs.IsValid(): raise Exception("Error: input_file (%s) was not valid" % input_f) errs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(errs) molecule = oechem.OECreateOEGraphMol() count = 0 smile_count = 0 saved = 0 switch = False # first output file current_letter = 1000 ofs_file = output_f%current_letter ofs = oechem.oemolostream(ofs_file) if not ofs.IsValid(): raise Exception("output file %s is not valid" % ofs_file) add_smiles = open(smiles_base % current_letter, 'a') while oechem.OEReadMolecule(ifs, molecule): # count input file molecules count +=1 if switch: # If True create new output file switch = False ofs.close() current_letter += 1 ofs_file = output_f % current_letter # Load and check output file ofs = oechem.oemolostream(ofs_file) if not ofs.IsValid(): raise Exception("output file %s is not valid" % ofs_file) print("Switching to file %s, currently saved %i molecules" % (ofs_file, saved)) if current_letter%100 == 0: add_smiles.close() add_smiles = open(smiles_base % current_letter, 'a') # IF smiles in current list skip the molecule smi = oechem.OECreateIsoSmiString(molecule) if smi in current_smiles: smile_count += 1 continue # Make copy of molecule before making changes mol_copy = oechem.OEMol(molecule) oechem.OEAddExplicitHydrogens(mol_copy) # if the molecule meets our requirements save to current output if keep_molecule(mol_copy): mol_title = molecule_name % (current_letter,count) mol_copy.SetTitle(mol_title) add_smiles.writelines("%s\t\t%s\n" % (mol_title, smi)) oechem.OEWriteMolecule(ofs, mol_copy) saved += 1 if saved%1000 == 0: switch = True print("%i molecules in input file" % (count)) print("%i molecules were had repeated isomeric SMILES" % smile_count) print("%i molecules saved to output files" % (saved)) ifs.close() ofs.close()
if has_oechem: from chemfp.commandline import oe2fps import chemfp.openeye OEGRAPHSIM_API_VERSION = chemfp.openeye.OEGRAPHSIM_API_VERSION chemfp.openeye._USE_SELECT = False # Grrr. Needed to automate testing. real_stdout = sys.stdout real_stderr = sys.stderr PUBCHEM_SDF = support.fullpath("pubchem.sdf") PUBCHEM_SDF_GZ = support.fullpath("pubchem.sdf.gz") PUBCHEM_ANOTHER_EXT = support.fullpath("pubchem.should_be_sdf_but_is_not") oeerrs = oechem.oeosstream() oechem.OEThrow.SetOutputStream(oeerrs) def convert_v1_atom_names_to_v2(s): return (s.replace("Aromaticity", "Arom") .replace("AtomicNumber", "AtmNum") .replace("EqAromatic", "EqArom") .replace("EqHalogen", "EqHalo") .replace("FormalCharge", "FCharge") .replace("HvyDegree", "HvyDeg") .replace("Hybridization", "Hyb") .replace("DefaultAtom", "Arom|AtmNum|Chiral|EqHalo|FCharge|HvyDeg|Hyb")) def convert_v1_bond_names_to_v2(s): return (s.replace("DefaultBond", "Order|Chiral") .replace("BondOrder", "Order"))