示例#1
0
def enumerate_conformations(name, smiles):
    """Generate geometry and run epik."""
    # Generate molecule geometry with OpenEye
    print "Generating molecule {}".format(name)
    oe_molecule = openeye.smiles_to_oemol(smiles)
    try:
        oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    except RuntimeError as e:
        traceback.print_exc()
        print "Skipping molecule " + name
        return

    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    # Save mol2 file with residue name = first three uppercase letters
    print "Running epik on molecule {}".format(name)
    mol2_file_path = output_basepath + '-input.mol2'
    residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=True,
                         max_structures=32, ph_tolerance=10.0)

    # Convert maestro file to sdf and mol2
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.sdf')
    schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.mol2')
def enumerate_conformations(name, smiles=None, pdbname=None):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    if pdbname:
        # Make sure to only use one entry if there are mutliple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]))
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname)
        pdb_filename = output_basepath + '-input.pdb'
        retrieve_url(url, pdb_filename)
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname)
        sdf_filename = output_basepath + '-input.sdf'
        retrieve_url(url, sdf_filename)
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname
    elif smiles:
        # Generate molecule geometry with OpenEye
        print("Generating molecule {}".format(name))
        oe_molecule = openeye.smiles_to_oemol(smiles)
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(oe_molecule)
        oechem.OETriposBondTypeNames(oe_molecule)
        try:
            oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
        except RuntimeError as e:
            traceback.print_exc()
            print("Skipping molecule " + name)
            return
        residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    else:
        raise Exception('Must provide SMILES string or pdbname')

    # Save mol2 file, preserving atom names
    print("Running epik on molecule {}".format(name))
    mol2_file_path = output_basepath + '-input.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        print("Charging molecule %d" % (index))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None)
            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = output_basepath + '-epik-charged.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)

        #oechem.OEWritePDBFile(ofs, charged_molecule, flavor)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
示例#3
0
mae_directory_path = "./mae_files"
if not os.path.exists(mae_directory_path):
    os.makedirs(mae_directory_path)
    print("{} directory created.".format(mae_directory_path))

# Sequencial pKa calculation method is used starting form pH 7.0.

for key in eMolID_oemol_dict.keys():
    print("Running Epik for molecule {} ...".format(key))
    mol2_file_path = mol2_directory_path + "/" + str(key) + ".mol2"
    mae_file_path = mae_directory_path + "/" + str(key) + ".mae"
    schrodinger.run_epik(mol2_file_path,
                         mae_file_path,
                         max_structures=100,
                         ph=7.0,
                         ph_tolerance=None,
                         tautomerize=True,
                         extract_range=None,
                         max_atoms=150,
                         scan=True)

print("\n")

##### CONVERT EPIK OUTPUT (.MAE FILE) TO SDF #####

sdf_directory_path = "./sdf_files"
if not os.path.exists(sdf_directory_path):
    os.makedirs(sdf_directory_path)
    print("{} directory created.".format(sdf_directory_path))

for key in eMolID_oemol_dict.keys():
示例#4
0
def enumerate_conformations(name,
                            pdbfile=None,
                            smiles=None,
                            pdbname=None,
                            pH=7.4):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    # output_basepath = os.path.join(output_dir, name)
    # if not os.path.isdir(output_basepath):
    #     os.mkdir(output_basepath)
    # output_basepath = os.path.join(output_basepath, name)

    oehandler = openeye.oechem.OEThrow
    # String stream output
    oss = oechem.oeosstream()
    oehandler.SetOutputStream(oss)
    log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format(
        **locals())
    success_status = True

    if pdbname:
        # Make sure to only use one entry if there are multiple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            log += "Splitting '%s' into first entry only: '%s'" % (pdbname,
                                                                   pdbnames[0])
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (
            pdbname[0], pdbname, pdbname)
        pdb_filename = name + '-rcsb_download.pdb'
        log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format(
            pdb_filename)
        retrieve_url(url, pdb_filename)
        log += "Parsing PDB file.\n"
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (
            pdbname[0], pdbname, pdbname)
        sdf_filename = name + '-rcsb_download.sdf'
        log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format(
            sdf_filename)
        retrieve_url(url, sdf_filename)
        log += "Parsing SDF file.\n"
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        log += "Canonicalizing atom names.\n"
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(),
                                        pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        log += "Assign atom type names.\n"
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname

    # For the moment, disabling these two types of input
    # elif smiles:
    #     # Generate molecule geometry with OpenEye
    #     logging.info(("Generating molecule {}".format(name)))
    #     oe_molecule = openeye.smiles_to_oemol(smiles)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("resname = %s", residue_name)
    #     oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert
    # elif pdbfile:
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile))
    #     oe_molecule = read_molecule(pdbfile)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    else:
        raise Exception('Must provide SMILES string or pdbname, or pdbfile')

    # Save mol2 file, preserving atom names
    log += "Running Epik.\n"
    mol2_file_path = name + '-before_epik.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = name + '-epik.mae'
    schrodinger.run_epik(mol2_file_path,
                         mae_file_path,
                         tautomerize=False,
                         max_structures=50,
                         min_probability=np.exp(-MAX_ENERGY_PENALTY),
                         ph=pH)

    log += "Epik run completed.\n"
    # Convert maestro file to sdf and mol2
    output_sdf_filename = name + '-after_epik.sdf'
    output_mol2_filename = name + '-after_epik.mol2'
    # logging.info("Creating sdf")
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    # logging.info("Creating mol2")
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.

    # reset count of error handler
    oehandler.Clear()
    log += "Assigning charges to protonation states.\n"
    charged_molecules = list()
    index = 0
    failed_states = set()
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        log += "State {0:d}\n".format(index)
        try:
            # Charge molecule.
            charged_molecule_conformers = omtoe.get_charges(mol2_molecule,
                                                            max_confs=800,
                                                            strictStereo=False,
                                                            normalize=True,
                                                            keep_confs=-1)

            log += "Charging stage output:\n"
            OEOutput = str(oss)
            log += OEOutput
            log += "\nCharging state completed.\n"

            # Restore coordinates to original
            charged_molecule = select_conformers(charged_molecule_conformers,
                                                 mol2_molecule,
                                                 keep_confs=None)

            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)

            # Check for failure in the log
            openeye_charge_log_parser(OEOutput, True)

            oehandler.Clear()

        except Exception as e:
            failed_states.add(index)
            logging.info(e)
            log += "State failed charging.\n"
            log += str(e)
            log += "\n"

            filename_failure = name + '-conformers-failed-state-{}-.mol2'.format(
                index)
            try:
                write_mol2_preserving_atomnames(filename_failure,
                                                charged_molecule_conformers,
                                                residue_name)
            except:
                log += "Could not store result, most likely failed during Omega step!\n"

            success_status = False
            oehandler.Clear()

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalties.
    outfile = open(name + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        log += "Writing Epik data for state {:d}\n".format(index + 1)
        epik_Ionization_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = name + '-charged_output.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = name + '-charged_output.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules,
                                    residue_name)
    log += "Run completed.\n"
    if success_status:
        log += "Status: Success\n"
    else:
        log += "Status: Failure\n"
        log += "Failed states: {}\n".format(" ".join(
            [str(state) for state in sorted(list(failed_states))]))

    with open("log.txt", 'w') as logfile:
        logfile.write(log)

    return log, success_status
示例#5
0
def run_epik(name, filename, residue_name, perceive_bonds=False):
    """Generate conformer with OpenEye omega, protonation states with Schrodinger Epik, and charges with OpenEye AM1-BCC.

    Parameters
    ----------
    name : str
       The name of the output directory to generate.
    filename : str
       The mol2, PDB, or SDF file to read in.
    residue_name : str
       Three uppercase letters to name residue.
    perceive_bonds : bool, optional, default=False
       If True, will use geometry to perceive connectivity.
       This is necessary for PDB files.

    """
    # Generate molecule geometry with OpenEye
    print("Generating molecule %s from %s" % (name, filename))
    oe_molecule = read_molecules(filename)
    if perceive_bonds:
        oechem.OEDetermineConnectivity(oe_molecule)

    # Assign geometry and charges with Omega
    oe_molecule = openeye.get_charges(oe_molecule, max_confs=1, strictStereo=False, normalize=True, keep_confs=1)

    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    # Save mol2 file with residue name = first three uppercase letters
    print "Running epik on molecule {}".format(name)
    mol2_file_path = output_basepath + '-input.mol2'
    residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name)
    from openeye import oechem
    ofs = oechem.oemolostream(mol2_file_path)
    oechem.OEWriteMol2File(ofs, oe_molecule, True, False)
    ofs.close()

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-6), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEMol()
    uncharged_molecules = read_molecules(output_sdf_filename)

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()
    uncharged_molecules = read_molecules(output_sdf_filename)

    # Assign charges.
    charged_molecules = list()
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        molecule = oechem.OEReadMolecule(ifs_mol2, mol2_molecule)
        index += 1
        print "Charging molecule %d / %d" % (index, len(uncharged_molecules))
        try:
            # Charge molecule.
            charged_molecule = openeye.get_charges(sdf_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None)

            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)

            charged_molecules.append(charged_molecule)
        except Exception as e:
            print(e)
            print("Skipping protomer/tautomer because of failed charging.")

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write molecules.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    ofs = oechem.oemolostream(charged_mol2_filename)
    for (index, charged_molecule) in enumerate(charged_molecules):
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):

        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()