def enumerate_conformations(name, smiles): """Generate geometry and run epik.""" # Generate molecule geometry with OpenEye print "Generating molecule {}".format(name) oe_molecule = openeye.smiles_to_oemol(smiles) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print "Skipping molecule " + name return # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) # Save mol2 file with residue name = first three uppercase letters print "Running epik on molecule {}".format(name) mol2_file_path = output_basepath + '-input.mol2' residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=True, max_structures=32, ph_tolerance=10.0) # Convert maestro file to sdf and mol2 schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.sdf') schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.mol2')
def enumerate_conformations(name, smiles=None, pdbname=None): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) if pdbname: # Make sure to only use one entry if there are mutliple if ' ' in pdbname: pdbnames = pdbname.split(' ') print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0])) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname) pdb_filename = output_basepath + '-input.pdb' retrieve_url(url, pdb_filename) pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname) sdf_filename = output_basepath + '-input.sdf' retrieve_url(url, sdf_filename) sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname elif smiles: # Generate molecule geometry with OpenEye print("Generating molecule {}".format(name)) oe_molecule = openeye.smiles_to_oemol(smiles) # Assign Tripos atom types oechem.OETriposAtomTypeNames(oe_molecule) oechem.OETriposBondTypeNames(oe_molecule) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print("Skipping molecule " + name) return residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] else: raise Exception('Must provide SMILES string or pdbname') # Save mol2 file, preserving atom names print("Running epik on molecule {}".format(name)) mol2_file_path = output_basepath + '-input.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4) # Convert maestro file to sdf and mol2 output_sdf_filename = output_basepath + '-epik.sdf' output_mol2_filename = output_basepath + '-epik.mol2' schrodinger.run_structconvert(mae_file_path, output_sdf_filename) schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. charged_molecules = list() index = 0 while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 print("Charging molecule %d" % (index)) try: # Charge molecule. charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) except Exception as e: print(e) print("Skipping protomer/tautomer because of failed charging.") # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalites. outfile = open(output_basepath + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = output_basepath + '-epik-charged.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) #oechem.OEWritePDBFile(ofs, charged_molecule, flavor) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = output_basepath + '-epik-charged.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
mae_directory_path = "./mae_files" if not os.path.exists(mae_directory_path): os.makedirs(mae_directory_path) print("{} directory created.".format(mae_directory_path)) # Sequencial pKa calculation method is used starting form pH 7.0. for key in eMolID_oemol_dict.keys(): print("Running Epik for molecule {} ...".format(key)) mol2_file_path = mol2_directory_path + "/" + str(key) + ".mol2" mae_file_path = mae_directory_path + "/" + str(key) + ".mae" schrodinger.run_epik(mol2_file_path, mae_file_path, max_structures=100, ph=7.0, ph_tolerance=None, tautomerize=True, extract_range=None, max_atoms=150, scan=True) print("\n") ##### CONVERT EPIK OUTPUT (.MAE FILE) TO SDF ##### sdf_directory_path = "./sdf_files" if not os.path.exists(sdf_directory_path): os.makedirs(sdf_directory_path) print("{} directory created.".format(sdf_directory_path)) for key in eMolID_oemol_dict.keys():
def enumerate_conformations(name, pdbfile=None, smiles=None, pdbname=None, pH=7.4): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder # output_basepath = os.path.join(output_dir, name) # if not os.path.isdir(output_basepath): # os.mkdir(output_basepath) # output_basepath = os.path.join(output_basepath, name) oehandler = openeye.oechem.OEThrow # String stream output oss = oechem.oeosstream() oehandler.SetOutputStream(oss) log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format( **locals()) success_status = True if pdbname: # Make sure to only use one entry if there are multiple if ' ' in pdbname: pdbnames = pdbname.split(' ') log += "Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % ( pdbname[0], pdbname, pdbname) pdb_filename = name + '-rcsb_download.pdb' log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format( pdb_filename) retrieve_url(url, pdb_filename) log += "Parsing PDB file.\n" pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % ( pdbname[0], pdbname, pdbname) sdf_filename = name + '-rcsb_download.sdf' log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format( sdf_filename) retrieve_url(url, sdf_filename) log += "Parsing SDF file.\n" sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF log += "Canonicalizing atom names.\n" for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types log += "Assign atom type names.\n" oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname # For the moment, disabling these two types of input # elif smiles: # # Generate molecule geometry with OpenEye # logging.info(("Generating molecule {}".format(name))) # oe_molecule = openeye.smiles_to_oemol(smiles) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("resname = %s", residue_name) # oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert # elif pdbfile: # residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] # logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile)) # oe_molecule = read_molecule(pdbfile) # # Assign Tripos atom types # oechem.OETriposAtomTypeNames(oe_molecule) # oechem.OETriposBondTypeNames(oe_molecule) # try: # logging.info("Charging initial") # write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug') # oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) # except RuntimeError as e: # traceback.print_exc() # logging.info(("Skipping molecule " + name)) # return else: raise Exception('Must provide SMILES string or pdbname, or pdbfile') # Save mol2 file, preserving atom names log += "Running Epik.\n" mol2_file_path = name + '-before_epik.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = name + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=50, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=pH) log += "Epik run completed.\n" # Convert maestro file to sdf and mol2 output_sdf_filename = name + '-after_epik.sdf' output_mol2_filename = name + '-after_epik.mol2' # logging.info("Creating sdf") schrodinger.run_structconvert(mae_file_path, output_sdf_filename) # logging.info("Creating mol2") schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. # reset count of error handler oehandler.Clear() log += "Assigning charges to protonation states.\n" charged_molecules = list() index = 0 failed_states = set() while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 log += "State {0:d}\n".format(index) try: # Charge molecule. charged_molecule_conformers = omtoe.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=-1) log += "Charging stage output:\n" OEOutput = str(oss) log += OEOutput log += "\nCharging state completed.\n" # Restore coordinates to original charged_molecule = select_conformers(charged_molecule_conformers, mol2_molecule, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) # Check for failure in the log openeye_charge_log_parser(OEOutput, True) oehandler.Clear() except Exception as e: failed_states.add(index) logging.info(e) log += "State failed charging.\n" log += str(e) log += "\n" filename_failure = name + '-conformers-failed-state-{}-.mol2'.format( index) try: write_mol2_preserving_atomnames(filename_failure, charged_molecule_conformers, residue_name) except: log += "Could not store result, most likely failed during Omega step!\n" success_status = False oehandler.Clear() # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalties. outfile = open(name + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. log += "Writing Epik data for state {:d}\n".format(index + 1) epik_Ionization_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float( oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float( oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = name + '-charged_output.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = name + '-charged_output.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name) log += "Run completed.\n" if success_status: log += "Status: Success\n" else: log += "Status: Failure\n" log += "Failed states: {}\n".format(" ".join( [str(state) for state in sorted(list(failed_states))])) with open("log.txt", 'w') as logfile: logfile.write(log) return log, success_status
def run_epik(name, filename, residue_name, perceive_bonds=False): """Generate conformer with OpenEye omega, protonation states with Schrodinger Epik, and charges with OpenEye AM1-BCC. Parameters ---------- name : str The name of the output directory to generate. filename : str The mol2, PDB, or SDF file to read in. residue_name : str Three uppercase letters to name residue. perceive_bonds : bool, optional, default=False If True, will use geometry to perceive connectivity. This is necessary for PDB files. """ # Generate molecule geometry with OpenEye print("Generating molecule %s from %s" % (name, filename)) oe_molecule = read_molecules(filename) if perceive_bonds: oechem.OEDetermineConnectivity(oe_molecule) # Assign geometry and charges with Omega oe_molecule = openeye.get_charges(oe_molecule, max_confs=1, strictStereo=False, normalize=True, keep_confs=1) # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) # Save mol2 file with residue name = first three uppercase letters print "Running epik on molecule {}".format(name) mol2_file_path = output_basepath + '-input.mol2' residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] #openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name) from openeye import oechem ofs = oechem.oemolostream(mol2_file_path) oechem.OEWriteMol2File(ofs, oe_molecule, True, False) ofs.close() # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=100, min_probability=np.exp(-6), ph=7.4) # Convert maestro file to sdf and mol2 output_sdf_filename = output_basepath + '-epik.sdf' output_mol2_filename = output_basepath + '-epik.mol2' schrodinger.run_structconvert(mae_file_path, output_sdf_filename) schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEMol() uncharged_molecules = read_molecules(output_sdf_filename) # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() uncharged_molecules = read_molecules(output_sdf_filename) # Assign charges. charged_molecules = list() index = 0 while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): molecule = oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 print "Charging molecule %d / %d" % (index, len(uncharged_molecules)) try: # Charge molecule. charged_molecule = openeye.get_charges(sdf_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) charged_molecules.append(charged_molecule) except Exception as e: print(e) print("Skipping protomer/tautomer because of failed charging.") # Clean up ifs_sdf.close() ifs_mol2.close() # Write molecules. charged_mol2_filename = output_basepath + '-epik-charged.mol2' ofs = oechem.oemolostream(charged_mol2_filename) for (index, charged_molecule) in enumerate(charged_molecules): oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write state penalites. outfile = open(output_basepath + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close()