Exemplo n.º 1
0
def main(argv=[__name__]):
    itf = oechem.OEInterface(InterfaceData)
    oemolprop.OEConfigureFilterParams(itf)

    if not oechem.OEParseCommandLine(itf, argv):
        oechem.OEThrow.Fatal("Unable to interpret command line!")

    iname = itf.GetString("-in")

    ifs = oechem.oemolistream()
    if not ifs.open(iname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    ftype = oemolprop.OEGetFilterType(itf)
    filt = oemolprop.OEFilter(ftype)

    ver = itf.GetInt("-verbose")
    oechem.OEThrow.SetLevel(ver)

    ostr = oechem.oeosstream()
    pwnd = False
    filt.SetTable(ostr, pwnd)

    headers = ostr.str().split(b'\t')
    ostr.clear()  # remove the header row from the stream

    for mol in ifs.GetOEGraphMols():
        filt(mol)

        fields = ostr.str().decode("UTF-8").split('\t')
        ostr.clear()  # remove this row from the stream

        tmpdct = dict(zip(headers, fields))
        print(mol.GetTitle(), tmpdct[b"Lipinski violations"])
Exemplo n.º 2
0
 def __init__(self, *args, **kwargs):
     super(MoleculeSerializerMixin, self).__init__(*args, **kwargs)
     self._ifs = oemolistream()
     self._ifs.SetFormat(OEFormat_OEB)
     self._ifs.Setgz(True)
     errs = oeosstream()
     self._ofs = oemolostream(errs, False)
     self._ofs.openstring()
     self._ofs.Setgz(True)
     self._ofs.SetFormat(OEFormat_OEB)
def depict(mol, width=500, height=200):
    from IPython.display import Image
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
    oedepict.OEPrepareDepiction(mol, dopt)
    opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    ofs = oechem.oeosstream()
    oedepict.OERenderMolecule(ofs, 'png', disp)
    ofs.flush()
    return Image(data = "".join(ofs.str()))
Exemplo n.º 4
0
def _oe_capture_warnings():  # pragma: no cover

    from openeye import oechem

    output_stream = oechem.oeosstream()

    oechem.OEThrow.SetOutputStream(output_stream)
    oechem.OEThrow.Clear()

    yield

    oechem.OEThrow.SetOutputStream(oechem.oeerr)
def depict(mol, width=500, height=200):
    from IPython.display import Image
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation(oedepict.OEDepictOrientation_Horizontal)
    oedepict.OEPrepareDepiction(mol, dopt)
    opts = oedepict.OE2DMolDisplayOptions(width, height,
                                          oedepict.OEScale_AutoScale)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    ofs = oechem.oeosstream()
    oedepict.OERenderMolecule(ofs, 'png', disp)
    ofs.flush()
    return Image(data="".join(ofs.str()))
Exemplo n.º 6
0
def filter_molecules(input_molstream,
                     output_molstream,
                     allow_repeats=False,
                     allow_warnings=False,
                     max_heavy_atoms=100,
                     remove_smirks=list(),
                     max_metals=0,
                     explicitHs=True,
                     elements=None,
                     check_type=None):
    """
    Takes input file and removes molecules using given criteria then
    writes a new output file
    """
    errs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(errs)

    molecule = oechem.OECreateOEGraphMol()
    smiles = list()

    count = 0
    warnings = 0
    smile_count = 0
    saved = 0

    while oechem.OEReadMolecule(input_molstream, molecule):
        count += 1
        if ("warning" in errs.str().lower()) and not allow_warnings:
            warnings += 1
            errs.clear()
            continue

        smi = oechem.OECreateIsoSmiString(molecule)
        mol_copy = oechem.OEMol(molecule)
        if explicitHs:
            oechem.OEAddExplicitHydrogens(mol_copy)
        new_smile = smi not in smiles
        if not new_smile:
            smile_count += 1

        if new_smile or allow_repeats:
            keep = keep_molecule(mol_copy, max_heavy_atoms, remove_smirks,
                                 max_metals, elements, check_type)
            if keep:
                smiles.append(smi)
                oechem.OEWriteMolecule(output_molstream, mol_copy)
                saved += 1
        errs.clear()

    print(f"{count} molecules in input stream")
    print(f"{warnings} molecules resulted in warnings when parsing")
    print(f"{smile_count} molecules were had repeated isomeric SMILES")
    print(f"{saved} molecules saved")
def depictMatch(mol, match, width=500, height=200):
    from IPython.display import Image
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
    dopt.SetSuppressHydrogens(True)
    oedepict.OEPrepareDepiction(mol, dopt)
    opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    hstyle = oedepict.OEHighlightStyle_Color
    hcolor = oechem.OEColor(oechem.OELightBlue)
    oedepict.OEAddHighlighting(disp, hcolor, hstyle, match)
    ofs = oechem.oeosstream()
    oedepict.OERenderMolecule(ofs, 'png', disp)
    ofs.flush()
    return Image(data = "".join(ofs.str()))
Exemplo n.º 8
0
def call_openeye(
    oe_callable: Callable[[T], bool],
    *args: T,
    exception_type: Type[RechargeException] = RuntimeError,
    exception_kwargs: Dict[str, Any] = None,
):
    """Wraps a call to an OpenEye function, either capturing the output in an
    exception if the function does not complete successfully, or redirecting it
    to the logger.
    Parameters
    ----------
    oe_callable
        The OpenEye function to call.
    args
        The arguments to pass to the OpenEye function.
    exception_type:
        The type of exception to raise when the function does not
        successfully complete.
    exception_kwargs
        The keyword arguments to pass to the exception.
    """
    from openeye import oechem

    if exception_kwargs is None:
        exception_kwargs = {}

    output_stream = oechem.oeosstream()

    oechem.OEThrow.SetOutputStream(output_stream)
    oechem.OEThrow.Clear()

    status = oe_callable(*args)

    oechem.OEThrow.SetOutputStream(oechem.oeerr)

    output_string = output_stream.str().decode("UTF-8")

    output_string = output_string.replace("Warning: ", "")
    output_string = re.sub("^: +", "", output_string, flags=re.MULTILINE)
    output_string = re.sub("\n$", "", output_string)

    if not status:

        # noinspection PyArgumentList
        raise exception_type("\n" + output_string, **exception_kwargs)

    elif len(output_string) > 0:
        logging.debug(output_string)
Exemplo n.º 9
0
def depictMatch(mol, match, width=500, height=200):
    from IPython.display import Image
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation(oedepict.OEDepictOrientation_Horizontal)
    dopt.SetSuppressHydrogens(True)
    oedepict.OEPrepareDepiction(mol, dopt)
    opts = oedepict.OE2DMolDisplayOptions(width, height,
                                          oedepict.OEScale_AutoScale)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    hstyle = oedepict.OEHighlightStyle_Color
    hcolor = oechem.OEColor(oechem.OELightBlue)
    oedepict.OEAddHighlighting(disp, hcolor, hstyle, match)
    ofs = oechem.oeosstream()
    oedepict.OERenderMolecule(ofs, 'png', disp)
    ofs.flush()
    return Image(data="".join(ofs.str()))
Exemplo n.º 10
0
def GetUrlSDF2SMI(url, fout, ntries=20, poll_wait=10):
    '''Get PubChem SDF.GZ, convert to SMILES using the SD tag PUBCHEM_OPENEYE_CAN_SMILES
or PUBCHEM_OPENEYE_ISO_SMILES, and PUBCHEM_COMPOUND_CID or PUBCHEM_COMPOUND_SID for name.'''
    import openeye.oechem as oechem

    def HandleOEErrors(oeerrs, nowarn):
        errstr = oeerrs.str()
        for line in errstr.split('\n'):
            if not line.rstrip(): continue
            if re.search('Warning', line, re.I) and nowarn: continue
            sys.stderr.write("%s\n" % line)
        oeerrs.clear()

    fout_tmp = tempfile.NamedTemporaryFile(prefix='pubchem_ftp_',
                                           suffix='.sdf.gz',
                                           delete=False)
    GetUrl(url, fout_tmp, ntries, poll_wait)
    fpath_tmp = fout_tmp.name
    logging.debug('fpath_tmp = %s' % fpath_tmp)
    fout_tmp.close()

    ims = oechem.oemolistream(fpath_tmp)
    ims.SetFormat(oechem.OEFormat_SDF)
    ims.Setgz(True)

    mol = oechem.OEGraphMol()
    nbytes = 0
    oeerrs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(oeerrs)
    while oechem.OEReadMolecule(ims, mol):
        cid = oechem.OEGetSDData(mol, 'PUBCHEM_COMPOUND_CID')
        cansmi = oechem.OEGetSDData(mol, 'PUBCHEM_OPENEYE_CAN_SMILES')
        isosmi = oechem.OEGetSDData(mol, 'PUBCHEM_OPENEYE_ISO_SMILES')
        buff = ("%s %s\n" % (isosmi, cid))
        fout.write(buff)
        nbytes += len(buff)
        HandleOEErrors(oeerrs, True)
    os.remove(fpath_tmp)
    return nbytes
Exemplo n.º 11
0
def SeqAlign(ref, fit, ofs):
    sa = oechem.OEGetAlignment(ref, fit)
    print()
    print("Alignment of %s to %s" % (fit.GetTitle(), ref.GetTitle()))
    print()
    print("  Method: %s" % oechem.OEGetAlignmentMethodName(sa.GetMethod()))
    print("  Gap   : %d" % sa.GetGap())
    print("  Extend: %d" % sa.GetExtend())
    print("  Score : %d" % sa.GetScore())
    print()

    oss = oechem.oeosstream()
    oechem.OEWriteAlignment(oss, sa)
    print(oss.str().decode("UTF-8"))

    onlyCAlpha = True
    overlay = True
    rot = oechem.OEDoubleArray(9)
    trans = oechem.OEDoubleArray(3)
    rmsd = oechem.OERMSD(ref, fit, sa, onlyCAlpha, overlay, rot, trans)
    print("  RMSD = %.1f" % rmsd)
    oechem.OERotate(fit, rot)
    oechem.OETranslate(fit, trans)
    oechem.OEWriteMolecule(ofs, fit)
Exemplo n.º 12
0
def enumerate_conformations(name, smiles=None, pdbname=None, user_mol2=None):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    user_mol2 : str
        User prepared mol2 file to use as input instead of PDB retrieved files

    """
    # Create output subfolder
    output_basepath = os.path.join(output_dir, name)
    if not os.path.isdir(output_basepath):
        os.mkdir(output_basepath)
    output_basepath = os.path.join(output_basepath, name)

    if pdbname:
        # Make sure to only use one entry if there are mutliple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0]))
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname)
        pdb_filename = output_basepath + '-input.pdb'
        retrieve_url(url, pdb_filename)
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname)
        sdf_filename = output_basepath + '-input.sdf'
        retrieve_url(url, sdf_filename)
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname
    elif smiles:
        # Generate molecule geometry with OpenEye
        print("Generating molecule {}".format(name))
        oe_molecule = openeye.smiles_to_oemol(smiles)
        # Assign Tripos atom types
        oechem.OETriposAtomTypeNames(oe_molecule)
        oechem.OETriposBondTypeNames(oe_molecule)
        try:
            oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
        except RuntimeError as e:
            traceback.print_exc()
            print("Skipping molecule " + name)
            return
        residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    else:
        raise Exception('Must provide SMILES string or pdbname')

    # Handling of OpenEye output
    oehandler = oechem.OEThrow
    # String stream output
    oss = oechem.oeosstream()
    oehandler.SetOutputStream(oss)

    # Save mol2 file, preserving atom names
    print("Running epik on molecule {}".format(name))
    mol2_file_path = output_basepath + '-input.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = output_basepath + '-epik.mae'
    schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False,
                         max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4)

    # Convert maestro file to sdf and mol2
    output_sdf_filename = output_basepath + '-epik.sdf'
    output_mol2_filename = output_basepath + '-epik.mol2'
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Allow user to provide custom file instead, if located in the right location.    
    if user_mol2 is not None:
        if os.path.isfile(user_mol2):
            output_mol2_filename = user_mol2
        else:
            raise IOError("No such file: {}".format(user_mol2))    

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.
    failed_molecules = dict()
    charged_molecules = list()    
    index = 0
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        print("Charging molecule %d" % (index))
        try:
            # Charge molecule.
            oehandler.Clear()
            # fix bonds            
            oechem.OEAssignAromaticFlags( mol2_molecule)            
            # Assign Tripos types
            oechem.OETriposAtomTypeNames( mol2_molecule)
            oechem.OETriposBondTypeNames( mol2_molecule)

            charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None, legacy=True)
            
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)
        except Exception as e:
            identifier = "{:s}_{:04d}".format(name, index)
            OEOutput = str(oss)
            failed_molecules[identifier] = tuple([deepcopy(mol2_molecule), str(oss) + "\n" + str(e)])
            print(e)            
            print("Skipping protomer/tautomer because of failed charging.")
    oehandler.Clear()
    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalites.
    outfile = open(output_basepath + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = output_basepath + '-epik-charged.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)

        #oechem.OEWritePDBFile(ofs, charged_molecule, flavor)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = output_basepath + '-epik-charged.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)

    
    os.makedirs("Failed_molecules", exist_ok=True)
    if len(failed_molecules) > 0:
        for name_state, (state_oemol, error_message) in failed_molecules.items():
            write_mol2_preserving_atomnames("Failed_molecules/{}.mol2".format(name_state), state_oemol, name_state)
            with open("Failed_molecules/{}.err".format(name_state), 'w') as error_file:
                error_file.write(error_message)
Exemplo n.º 13
0
def enumerate_conformations(name,
                            pdbfile=None,
                            smiles=None,
                            pdbname=None,
                            pH=7.4):
    """Run Epik to get protonation states using PDB residue templates for naming.

    Parameters
    ----------
    name : str
       Common name of molecule (used to create subdirectory)
    smiles : str
       Isomeric SMILES string
    pdbname : str
       Three-letter PDB code (e.g. 'DB8')
    """
    # Create output subfolder
    # output_basepath = os.path.join(output_dir, name)
    # if not os.path.isdir(output_basepath):
    #     os.mkdir(output_basepath)
    # output_basepath = os.path.join(output_basepath, name)

    oehandler = openeye.oechem.OEThrow
    # String stream output
    oss = oechem.oeosstream()
    oehandler.SetOutputStream(oss)
    log = "New run:\nPDB code: {pdbname}; Molecule: {name}; pH {pH}\n".format(
        **locals())
    success_status = True

    if pdbname:
        # Make sure to only use one entry if there are multiple
        if ' ' in pdbname:
            pdbnames = pdbname.split(' ')
            log += "Splitting '%s' into first entry only: '%s'" % (pdbname,
                                                                   pdbnames[0])
            pdbname = pdbnames[0]

        # Retrieve PDB (for atom names)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (
            pdbname[0], pdbname, pdbname)
        pdb_filename = name + '-rcsb_download.pdb'
        log += "Retrieving PDB structure from RCSB ligand expo: {}.\n".format(
            pdb_filename)
        retrieve_url(url, pdb_filename)
        log += "Parsing PDB file.\n"
        pdb_molecule = read_molecule(pdb_filename)

        # Retrieve SDF (for everything else)
        url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (
            pdbname[0], pdbname, pdbname)
        sdf_filename = name + '-rcsb_download.sdf'
        log += "Retrieving SDF structure from RCSB ligand expo: {}.\n".format(
            sdf_filename)
        retrieve_url(url, sdf_filename)
        log += "Parsing SDF file.\n"
        sdf_molecule = read_molecule(sdf_filename)

        # Replace atom names in SDF
        log += "Canonicalizing atom names.\n"
        for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(),
                                        pdb_molecule.GetAtoms()):
            sdf_atom.SetName(pdb_atom.GetName())
        # Assign Tripos atom types
        log += "Assign atom type names.\n"
        oechem.OETriposAtomTypeNames(sdf_molecule)
        oechem.OETriposBondTypeNames(sdf_molecule)

        oe_molecule = sdf_molecule

        # We already know the residue name
        residue_name = pdbname

    # For the moment, disabling these two types of input
    # elif smiles:
    #     # Generate molecule geometry with OpenEye
    #     logging.info(("Generating molecule {}".format(name)))
    #     oe_molecule = openeye.smiles_to_oemol(smiles)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("resname = %s", residue_name)
    #     oe_molecule.SetTitle(residue_name) # fix iupac name issue with mol2convert
    # elif pdbfile:
    #     residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3]
    #     logging.info("Loading molecule molecule {0} from {1}".format(name, pdbfile))
    #     oe_molecule = read_molecule(pdbfile)
    #     # Assign Tripos atom types
    #     oechem.OETriposAtomTypeNames(oe_molecule)
    #     oechem.OETriposBondTypeNames(oe_molecule)
    #     try:
    #         logging.info("Charging initial")
    #         write_mol2_preserving_atomnames(name + '-debug.mol2', oe_molecule, 'debug')
    #         oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1)
    #     except RuntimeError as e:
    #         traceback.print_exc()
    #         logging.info(("Skipping molecule " + name))
    #         return
    else:
        raise Exception('Must provide SMILES string or pdbname, or pdbfile')

    # Save mol2 file, preserving atom names
    log += "Running Epik.\n"
    mol2_file_path = name + '-before_epik.mol2'
    write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name)

    # Run epik on mol2 file
    mae_file_path = name + '-epik.mae'
    schrodinger.run_epik(mol2_file_path,
                         mae_file_path,
                         tautomerize=False,
                         max_structures=50,
                         min_probability=np.exp(-MAX_ENERGY_PENALTY),
                         ph=pH)

    log += "Epik run completed.\n"
    # Convert maestro file to sdf and mol2
    output_sdf_filename = name + '-after_epik.sdf'
    output_mol2_filename = name + '-after_epik.mol2'
    # logging.info("Creating sdf")
    schrodinger.run_structconvert(mae_file_path, output_sdf_filename)
    # logging.info("Creating mol2")
    schrodinger.run_structconvert(mae_file_path, output_mol2_filename)

    # Read SDF file.
    ifs_sdf = oechem.oemolistream()
    ifs_sdf.SetFormat(oechem.OEFormat_SDF)
    ifs_sdf.open(output_sdf_filename)
    sdf_molecule = oechem.OEGraphMol()

    # Read MOL2 file.
    ifs_mol2 = oechem.oemolistream()
    ifs_mol2.open(output_mol2_filename)
    mol2_molecule = oechem.OEMol()

    # Assign charges.

    # reset count of error handler
    oehandler.Clear()
    log += "Assigning charges to protonation states.\n"
    charged_molecules = list()
    index = 0
    failed_states = set()
    while oechem.OEReadMolecule(ifs_sdf, sdf_molecule):
        oechem.OEReadMolecule(ifs_mol2, mol2_molecule)

        index += 1
        log += "State {0:d}\n".format(index)
        try:
            # Charge molecule.
            charged_molecule_conformers = omtoe.get_charges(mol2_molecule,
                                                            max_confs=800,
                                                            strictStereo=False,
                                                            normalize=True,
                                                            keep_confs=-1)

            log += "Charging stage output:\n"
            OEOutput = str(oss)
            log += OEOutput
            log += "\nCharging state completed.\n"

            # Restore coordinates to original
            charged_molecule = select_conformers(charged_molecule_conformers,
                                                 mol2_molecule,
                                                 keep_confs=None)

            # Assign Tripos types
            oechem.OETriposAtomTypeNames(charged_molecule)
            oechem.OETriposBondTypeNames(charged_molecule)
            # Store tags.
            oechem.OECopySDData(charged_molecule, sdf_molecule)
            # Store molecule
            charged_molecules.append(charged_molecule)

            # Check for failure in the log
            openeye_charge_log_parser(OEOutput, True)

            oehandler.Clear()

        except Exception as e:
            failed_states.add(index)
            logging.info(e)
            log += "State failed charging.\n"
            log += str(e)
            log += "\n"

            filename_failure = name + '-conformers-failed-state-{}-.mol2'.format(
                index)
            try:
                write_mol2_preserving_atomnames(filename_failure,
                                                charged_molecule_conformers,
                                                residue_name)
            except:
                log += "Could not store result, most likely failed during Omega step!\n"

            success_status = False
            oehandler.Clear()

    # Clean up
    ifs_sdf.close()
    ifs_mol2.close()

    # Write state penalties.
    outfile = open(name + '-state-penalties.out', 'w')
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Get Epik data.
        log += "Writing Epik data for state {:d}\n".format(index + 1)
        epik_Ionization_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty"))
        epik_Ionization_Penalty_Charging = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Charging"))
        epik_Ionization_Penalty_Neutral = float(
            oechem.OEGetSDData(charged_molecule,
                               "r_epik_Ionization_Penalty_Neutral"))
        epik_State_Penalty = float(
            oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty"))
        epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q"))

        outfile.write('%16.8f\n' % epik_State_Penalty)
    outfile.close()

    # Write as PDB
    charged_pdb_filename = name + '-charged_output.pdb'
    ofs = oechem.oemolostream(charged_pdb_filename)
    flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH
    ofs.SetFlavor(oechem.OEFormat_PDB, flavor)
    for (index, charged_molecule) in enumerate(charged_molecules):
        # Fix residue names
        for atom in charged_molecule.GetAtoms():
            residue = oechem.OEAtomGetResidue(atom)
            residue.SetName(residue_name)
            oechem.OEAtomSetResidue(atom, residue)
        oechem.OEWriteMolecule(ofs, charged_molecule)
    ofs.close()

    # Write molecules as mol2.
    charged_mol2_filename = name + '-charged_output.mol2'
    write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules,
                                    residue_name)
    log += "Run completed.\n"
    if success_status:
        log += "Status: Success\n"
    else:
        log += "Status: Failure\n"
        log += "Failed states: {}\n".format(" ".join(
            [str(state) for state in sorted(list(failed_states))]))

    with open("log.txt", 'w') as logfile:
        logfile.write(log)

    return log, success_status
Exemplo n.º 14
0
import sys
from openeye import oechem as oe

import common


class MyAromaticSmilesWriter(common.AromaticSmilesWriter):
    def getoutput(self, smi):
        mol = oe.OEGraphMol()
        ok = oe.OEParseSmiles(mol, smi)
        assert ok
        oe.OEAssignAromaticFlags(mol)
        return oe.OECreateSmiString(mol, 0)


msgstream = oe.oeosstream()
oe.OEThrow.SetOutputStream(msgstream)


class MyHydrogenCounter(common.HydrogenCounter):
    def getoutput(self, smi):
        mol = oe.OEGraphMol()
        msgstream.clear()
        ok = oe.OEParseSmiles(mol, smi)
        if not ok:
            msg = msgstream.str().decode("utf-8")
            if "Kekul" in msg:
                return None, "Kekulization_failure"
            else:
                return None, "Parse_error"
Exemplo n.º 15
0
        skip_oechem = False
        from openeye import oechem

if has_oechem:
    from chemfp.commandline import oe2fps
    import chemfp.openeye
    chemfp.openeye._USE_SELECT = False  # Grrr. Needed to automate testing.

    real_stdout = sys.stdout
    real_stderr = sys.stderr

    PUBCHEM_SDF = support.fullpath("pubchem.sdf")
    PUBCHEM_SDF_GZ = support.fullpath("pubchem.sdf.gz")
    PUBCHEM_ANOTHER_EXT = support.fullpath("pubchem.should_be_sdf_but_is_not")

    oeerrs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(oeerrs)


def _check_for_oe_errors():
    lines = oeerrs.str().splitlines()
    for line in lines:
        if line.startswith(
                "Warning: Stereochemistry corrected on atom number"):
            continue
        if line.startswith("Warning: Unknown file format set in input stream"):
            # There's a bug in OEChem where it generates this warning on unknown
            # file extensions even after SetFormat has been called
            continue
        raise AssertionError("Unexpected message from OEChem: %r" % (line, ))
def prepare_receptor(complex_pdb_filename,
                     output_basepath,
                     dimer=False,
                     retain_water=False):
    """
    Parameters
    ----------
    complex_pdb_filename : str
        The complex PDB file to read in
    output_basepath : str
        Base path for output
    dimer : bool, optional, default=False
        If True, generate the dimer as the biological unit
    retain_water : bool, optional, default=False
        If True, will retain waters
    """
    # Check whether this is a diamond SARS-CoV-2 Mpro structure or not
    import re
    is_diamond_structure = (re.search('-x\d+_', complex_pdb_filename)
                            is not None)

    import os
    basepath, filename = os.path.split(complex_pdb_filename)
    prefix, extension = os.path.splitext(filename)
    prefix = os.path.join(output_basepath, prefix)

    # Check if receptor already exists
    receptor_filename = f'{prefix}-receptor.oeb.gz'
    thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz'
    if os.path.exists(receptor_filename) and os.path.exists(
            thiolate_receptor_filename):
        return

    # Read in PDB file, skipping UNK atoms (left over from processing covalent ligands)
    pdbfile_lines = [
        line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line
    ]

    # Check if biological symmetry header is present
    has_biological_symmetry_header = False
    for line in pdbfile_lines:
        if 'REMARK 350' in line:
            has_biological_symmetry_header = True
            break

    # Prepend REMARK 350 (biological symmetry) header lines for Mpro (from 5RGG) if not present
    if is_diamond_structure and (not has_biological_symmetry_header):
        pdbfile_lines = [
            line + '\n' for line in BIOLOGICAL_SYMMETRY_HEADER.split('\n')
        ] + pdbfile_lines

    # If monomer is specified, drop crystal symmetry lines
    if not dimer:
        pdbfile_lines = [
            line for line in pdbfile_lines if 'REMARK 350' not in line
        ]

    # Filter out waters
    if not retain_water:
        pdbfile_lines = [line for line in pdbfile_lines if 'HOH' not in line]

    # Filter out LINK records to covalent inhibitors so we can model non-covalent complex
    pdbfile_lines = [line for line in pdbfile_lines if 'LINK' not in line]

    # Reconstruct PDBFile contents
    pdbfile_contents = ''.join(pdbfile_lines)

    # Append SEQRES to all structures if they do not have it
    seqres = """\
SEQRES   1 A  306  SER GLY PHE ARG LYS MET ALA PHE PRO SER GLY LYS VAL
SEQRES   2 A  306  GLU GLY CYS MET VAL GLN VAL THR CYS GLY THR THR THR
SEQRES   3 A  306  LEU ASN GLY LEU TRP LEU ASP ASP VAL VAL TYR CYS PRO
SEQRES   4 A  306  ARG HIS VAL ILE CYS THR SER GLU ASP MET LEU ASN PRO
SEQRES   5 A  306  ASN TYR GLU ASP LEU LEU ILE ARG LYS SER ASN HIS ASN
SEQRES   6 A  306  PHE LEU VAL GLN ALA GLY ASN VAL GLN LEU ARG VAL ILE
SEQRES   7 A  306  GLY HIS SER MET GLN ASN CYS VAL LEU LYS LEU LYS VAL
SEQRES   8 A  306  ASP THR ALA ASN PRO LYS THR PRO LYS TYR LYS PHE VAL
SEQRES   9 A  306  ARG ILE GLN PRO GLY GLN THR PHE SER VAL LEU ALA CYS
SEQRES  10 A  306  TYR ASN GLY SER PRO SER GLY VAL TYR GLN CYS ALA MET
SEQRES  11 A  306  ARG PRO ASN PHE THR ILE LYS GLY SER PHE LEU ASN GLY
SEQRES  12 A  306  SER CYS GLY SER VAL GLY PHE ASN ILE ASP TYR ASP CYS
SEQRES  13 A  306  VAL SER PHE CYS TYR MET HIS HIS MET GLU LEU PRO THR
SEQRES  14 A  306  GLY VAL HIS ALA GLY THR ASP LEU GLU GLY ASN PHE TYR
SEQRES  15 A  306  GLY PRO PHE VAL ASP ARG GLN THR ALA GLN ALA ALA GLY
SEQRES  16 A  306  THR ASP THR THR ILE THR VAL ASN VAL LEU ALA TRP LEU
SEQRES  17 A  306  TYR ALA ALA VAL ILE ASN GLY ASP ARG TRP PHE LEU ASN
SEQRES  18 A  306  ARG PHE THR THR THR LEU ASN ASP PHE ASN LEU VAL ALA
SEQRES  19 A  306  MET LYS TYR ASN TYR GLU PRO LEU THR GLN ASP HIS VAL
SEQRES  20 A  306  ASP ILE LEU GLY PRO LEU SER ALA GLN THR GLY ILE ALA
SEQRES  21 A  306  VAL LEU ASP MET CYS ALA SER LEU LYS GLU LEU LEU GLN
SEQRES  22 A  306  ASN GLY MET ASN GLY ARG THR ILE LEU GLY SER ALA LEU
SEQRES  23 A  306  LEU GLU ASP GLU PHE THR PRO PHE ASP VAL VAL ARG GLN
SEQRES  24 A  306  CYS SER GLY VAL THR PHE GLN
"""
    has_seqres = 'SEQRES' in pdbfile_contents
    if not has_seqres:
        #print('Adding SEQRES')
        pdbfile_contents = seqres + pdbfile_contents

    # Read the receptor and identify design units
    from openeye import oespruce, oechem
    from tempfile import NamedTemporaryFile
    with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile:
        pdbfile.write(pdbfile_contents)
        pdbfile.close()
        complex = read_pdb_file(pdbfile.name)
        # TODO: Clean up

    # Strip protons from structure to allow SpruceTK to add these back
    # See: 6wnp, 6wtj, 6wtk, 6xb2, 6xqs, 6xqt, 6xqu, 6m2n
    #print('Suppressing hydrogens')
    #print(f' Initial: {sum([1 for atom in complex.GetAtoms()])} atoms')
    for atom in complex.GetAtoms():
        if atom.GetAtomicNum() > 1:
            oechem.OESuppressHydrogens(atom)
    #print(f' Final: {sum([1 for atom in complex.GetAtoms()])} atoms')

    # Delete and rebuild C-terminal residue because Spruce causes issues with this
    # See: 6m2n 6lze
    #print('Deleting C-terminal residue O')
    pred = oechem.OEIsCTerminalAtom()
    for atom in complex.GetAtoms():
        if pred(atom):
            for nbor in atom.GetAtoms():
                if oechem.OEGetPDBAtomIndex(nbor) == oechem.OEPDBAtomName_O:
                    complex.DeleteAtom(nbor)

    #pred = oechem.OEAtomMatchResidue(["GLN:306:.*:.*:.*"])
    #for atom in complex.GetAtoms(pred):
    #    if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_O:
    #        print('Deleting O')
    #        complex.DeleteAtom(atom)

    #het = oespruce.OEHeterogenMetadata()
    #het.SetTitle("LIG")  # real ligand 3 letter code
    #het.SetID("CovMoonShot1234")  # in case you have corporate IDs
    #het.SetType(oespruce.OEHeterogenType_Ligand)
    #   mdata.AddHeterogenMetadata(het)

    #print('Identifying design units...')
    # Produce zero design units if we fail to protonate

    # Log warnings
    errfs = oechem.oeosstream(
    )  # create a stream that writes internally to a stream
    oechem.OEThrow.SetOutputStream(errfs)
    oechem.OEThrow.Clear()
    oechem.OEThrow.SetLevel(
        oechem.OEErrorLevel_Verbose)  # capture verbose error output

    opts = oespruce.OEMakeDesignUnitOptions()
    #print(f'ligand atoms: min {opts.GetSplitOptions().GetMinLigAtoms()}, max {opts.GetSplitOptions().GetMaxLigAtoms()}')
    opts.GetSplitOptions().SetMinLigAtoms(
        7)  # minimum fragment size (in heavy atoms)

    mdata = oespruce.OEStructureMetadata()
    opts.GetPrepOptions().SetStrictProtonationMode(True)

    # Both N- and C-termini should be zwitterionic
    # Mpro cleaves its own N- and C-termini
    # See https://www.pnas.org/content/113/46/12997
    opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False)
    opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False)
    # Don't allow truncation of termini, since force fields don't have parameters for this
    opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions(
    ).SetAllowTruncate(False)
    # Build loops and sidechains
    opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True)
    opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True)

    # Don't flip Gln189
    #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"])
    pred = oechem.OEAtomMatchResidue(["GLN:189:.*:.*:.*"])
    protonate_opts = opts.GetPrepOptions().GetProtonateOptions()
    place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions()
    #place_hydrogens_opts.SetBypassPredicate(pred)
    place_hydrogens_opts.SetNoFlipPredicate(pred)
    #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts)
    #opts.GetPrepOptions().SetProtonateOptions(protonate_options);

    # Make design units
    design_units = list(oespruce.OEMakeDesignUnits(complex, mdata, opts))

    # Restore error stream
    oechem.OEThrow.SetOutputStream(oechem.oeerr)

    # Capture the warnings to a string
    warnings = errfs.str().decode("utf-8")

    if len(design_units) >= 1:
        design_unit = design_units[0]
        print('')
        print('')
        print(f'{complex_pdb_filename} : SUCCESS')
        print(warnings)
    elif len(design_units) == 0:
        print('')
        print('')
        print(f'{complex_pdb_filename} : FAILURE')
        print(warnings)
        msg = f'No design units found for {complex_pdb_filename}\n'
        msg += warnings
        msg += '\n'
        raise Exception(msg)

    # Prepare the receptor
    #print('Preparing receptor...')
    from openeye import oedocking
    protein = oechem.OEGraphMol()
    design_unit.GetProtein(protein)
    ligand = oechem.OEGraphMol()
    design_unit.GetLigand(ligand)

    # Create receptor and other files
    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)
    with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)
    with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs:
        oechem.OEWriteMolecule(ofs, ligand)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))

    # Adjust protonation state of CYS145 to generate thiolate form
    #print('Deprotonating CYS145...') # DEBUG
    #pred = oechem.OEAtomMatchResidue(["CYS:145: :A"])
    pred = oechem.OEAtomMatchResidue(["CYS:145:.*:.*:.*"])
    place_hydrogens_opts.SetBypassPredicate(pred)
    for atom in protein.GetAtoms(pred):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG:
            #print('Modifying CYS 145 SG')
            oechem.OESuppressHydrogens(atom)
            atom.SetFormalCharge(-1)
            atom.SetImplicitHCount(0)
    #print('Protonating HIS41...') # DEBUG
    #pred = oechem.OEAtomMatchResidue(["HIS:41: :A"])
    pred = oechem.OEAtomMatchResidue(["HIS:41:.*:.*:.*"])
    place_hydrogens_opts.SetBypassPredicate(pred)
    for atom in protein.GetAtoms(pred):
        if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_ND1:
            #print('Protonating HIS 41 ND1')
            oechem.OESuppressHydrogens(atom)  # strip hydrogens from residue
            atom.SetFormalCharge(+1)
            atom.SetImplicitHCount(1)
    # Update the design unit with the modified formal charge for CYS 145 SG
    oechem.OEUpdateDesignUnit(design_unit, protein,
                              oechem.OEDesignUnitComponents_Protein)

    # Don't flip Gln189
    #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"])
    #protonate_opts = opts.GetPrepOptions().GetProtonateOptions();
    #place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions()
    #place_hydrogens_opts.SetNoFlipPredicate(pred)

    # Adjust protonation states
    #print('Re-optimizing hydrogen positions...') # DEBUG
    #place_hydrogens_opts = oechem.OEPlaceHydrogensOptions()
    #place_hydrogens_opts.SetBypassPredicate(pred)
    #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts)
    success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts)
    design_unit.GetProtein(protein)

    # Write thiolate form of receptor
    receptor = oechem.OEGraphMol()
    oedocking.OEMakeReceptor(receptor, protein, ligand)
    oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename)

    with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs:
        oechem.OEWriteMolecule(ofs, protein)

    # Filter out UNK from PDB files (which have covalent adducts)
    pdbfile_lines = [
        line for line in open(f'{prefix}-protein-thiolate.pdb', 'r')
        if 'UNK' not in line
    ]
    with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile:
        outfile.write(''.join(pdbfile_lines))
Exemplo n.º 17
0
def MMPTransform(itf):
    # input structure(s) to process
    ifsmols = oechem.oemolistream()
    if not ifsmols.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open %s for reading" %
                             itf.GetString("-input"))

    # check MMP index
    mmpimport = itf.GetString("-mmpindex")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpimport):
        oechem.OEThrow.Fatal(
            'Not a valid matched pair index input file, {}'.format(mmpimport))

    # load MMP index
    mmp = oemedchem.OEMatchedPairAnalyzer()
    if not oemedchem.OEReadMatchedPairAnalyzer(mmpimport, mmp):
        oechem.OEThrow.Fatal("Unable to load index {}".format(mmpimport))

    if not mmp.NumMols():
        oechem.OEThrow.Fatal(
            'No records in loaded MMP index file: {}'.format(mmpimport))

    if not mmp.NumMatchedPairs():
        oechem.OEThrow.Fatal(
            'No matched pairs found in MMP index file, ' +
            'use -fragGe,-fragLe options to extend indexing range')

    # output (transformed) structure(s)
    ofs = oechem.oemolostream()
    if not ofs.open(itf.GetString("-output")):
        oechem.OEThrow.Fatal("Unable to open %s for writing" %
                             itf.GetString("-output"))

    # request a specific context for the transform activity, here 0-bonds
    chemctxt = oemedchem.OEMatchedPairContext_Bond0
    askcontext = itf.GetString("-context")[:1]
    if askcontext == '0':
        chemctxt = oemedchem.OEMatchedPairContext_Bond0
    elif askcontext == '1':
        chemctxt = oemedchem.OEMatchedPairContext_Bond1
    elif askcontext == '2':
        chemctxt = oemedchem.OEMatchedPairContext_Bond2
    elif askcontext == '3':
        chemctxt = oemedchem.OEMatchedPairContext_Bond3
    elif askcontext == 'a' or askcontext == 'A':
        chemctxt = oemedchem.OEMatchedPairContext_AllBonds
    else:
        oechem.OEThrow.Fatal("Invalid context specified: " + askcontext +
                             ", only 0|1|2|3|A allowed")

    verbose = itf.GetBool("-verbose")

    # return some status information
    if verbose:
        oechem.OEThrow.Info("{}: molecules: {:d}, matched pairs: {:,d}".format(
            mmpimport, mmp.NumMols(), mmp.NumMatchedPairs()))

    minpairs = itf.GetInt("-minpairs")
    if minpairs > 1 and verbose:
        oechem.OEThrow.Info(
            'Requiring at least %d matched pairs to apply transformations' %
            minpairs)

    errs = None
    if itf.GetBool("-nowarnings"):
        errs = oechem.oeosstream()
        oechem.OEThrow.SetOutputStream(errs)

    orec = 0
    ocnt = 0
    for mol in ifsmols.GetOEGraphMols():
        orec += 1
        iter = oemedchem.OEMatchedPairApplyTransforms(mol, mmp, chemctxt,
                                                      minpairs)
        if not iter.IsValid():
            if verbose:
                # as minpairs increases, fewer transformed mols are generated - output if requested
                name = mol.GetTitle()
                if not mol.GetTitle():
                    name = 'Record ' + str(orec)
                oechem.OEThrow.Info("%s did not produce any output" % name)
            continue
        if errs is not None:
            errs.clear()
        for outmol in iter:
            ocnt += 1
            oechem.OEWriteMolecule(ofs, outmol)
        if errs is not None:
            errs.clear()

    if not orec:
        oechem.OEThrow.Fatal('No records in input structure file to transform')

    if not ocnt:
        oechem.OEThrow.Warning('No transformed structures generated')

    print("Input molecules={} Output molecules={}".format(orec, ocnt))

    return 0
def MMPIndex(itf):
    # checking input structures
    ifsindex = oechem.oemolistream()
    if not ifsindex.open(itf.GetString("-input")):
        oechem.OEThrow.Fatal("Unable to open {} for reading".format(
            itf.GetString("-input")))
    ifsindex.close()

    verbose = itf.GetBool("-verbose")
    vverbose = itf.GetBool("-vverbose")
    if vverbose:
        verbose = True

    # output index file
    mmpindexfile = itf.GetString("-output")
    if not oemedchem.OEIsMatchedPairAnalyzerFileType(mmpindexfile):
        oechem.OEThrow.Fatal("Output file is not a matched pair index type - \
                             needs .mmpidx extension: {}".format(mmpindexfile))

    # create options class with defaults
    mmpopts = oemedchem.OEMatchedPairAnalyzerOptions()
    # set up options from command line
    if not oemedchem.OESetupMatchedPairIndexOptions(mmpopts, itf):
        oechem.OEThrow.Fatal("Error setting matched pair indexing options!")

    if verbose:
        if not mmpopts.HasIndexableFragmentHeavyAtomRange():
            oechem.OEThrow.Info("Indexing all fragments")
        else:
            oechem.OEThrow.Info(
                "Limiting fragment cores to {0:.2f}-{1:.2f}% of input molecules"
                .format(mmpopts.GetIndexableFragmentRangeMin(),
                        mmpopts.GetIndexableFragmentRangeMax()))

    if itf.GetInt("-maxrec") and verbose:
        oechem.OEThrow.Info("Indexing a maximum of {} records".format(
            itf.GetInt("-maxrec")))

    if itf.GetBool("-exportcompress"):
        if verbose:
            oechem.OEThrow.Info("Removing singleton index nodes from index")
        if not mmpopts.SetOptions(
                mmpopts.GetOptions()
                | oemedchem.OEMatchedPairOptions_ExportCompression):
            oechem.OEThrow.Warning("Error enabling export compression!")

    # set indexing options
    indexopts = oemedchem.OECreateMMPIndexOptions(mmpopts)

    # set requested verbosity setting
    if vverbose:
        indexopts.SetVerbose(2)
    elif verbose:
        indexopts.SetVerbose(1)

    # limit number of records to process
    indexopts.SetMaxRecord(itf.GetInt("-maxrec"))

    # set number of threads to use
    indexopts.SetNumThreads(itf.GetInt("-threads"))
    if verbose:
        if not indexopts.GetNumThreads():
            oechem.OEThrow.Info(
                "Using the maximum number of threads available")
        else:
            oechem.OEThrow.Info("Limiting indexing to {} thread(s)".format(
                indexopts.GetNumThreads()))

    errs = None
    if itf.GetBool("-nowarnings"):
        errs = oechem.oeosstream()
        oechem.OEThrow.SetOutputStream(errs)

    if verbose:
        oechem.OEThrow.Info(
            "Threaded indexing of {}, all SD data will be preserved".format(
                itf.GetString("-input")))

    # create index
    indexstatus = oemedchem.OECreateMMPIndexFile(mmpindexfile,
                                                 itf.GetString("-input"),
                                                 indexopts)

    dupes = 0
    if errs is not None:
        oechem.OEThrow.SetOutputStream(oechem.oeout)
        for err in errs.str().decode().split('\n'):
            err = err.rstrip()
            if not err:
                continue
            if verbose:
                oechem.OEThrow.Info(err)
            if 'ignoring duplicate molecule,' in err:
                dupes += 1

    if not indexstatus.IsValid():
        oechem.OEThrow.Fatal('Invalid status returned from indexing!')

    if not indexstatus.GetTotalMols():
        oechem.OEThrow.Fatal('No records in index structure file: {}'.format(
            itf.GetString("-input")))

    if dupes:
        oechem.OEThrow.Info(
            'Found {} duplicate structures during indexing'.format(dupes))

    if not indexstatus.GetNumMatchedPairs():
        oechem.OEThrow.Fatal(
            'No matched pairs found from indexing, ' +
            'use -fragGe,-fragLe options to extend indexing range')

    # return some status information
    oechem.OEThrow.Info(
        "Records: {}, Indexed: {}, matched pairs: {:,d}".format(
            indexstatus.GetTotalMols(), indexstatus.GetNumMols(),
            indexstatus.GetNumMatchedPairs()))

    return 0
Exemplo n.º 19
0
def eMolecules_filtering(input_f, current_smiles = list()):
    """
    This function was used to filter eMolecules database
    and the eMolecules_incremental database.
    It creates all the filtered output files with 1000 molecules
    in each sdf file and 1,000,000 molecule-ID to smiles strings in each
    text file

    Parameter
    ---------
    input_f : string "path/to/inputfile.sdf"
    current_smiles : list of strings; smiles already in your molecule sets
    """
    set_name = input_f.split('.')[0]
    output_f = set_name+"_%i.sdf"
    smiles_base = set_name+"_%i.txt"
    molecule_name = set_name+"_%i_%i"

    # Load and check input file
    ifs = oechem.oemolistream(input_f)
    if not ifs.IsValid():
        raise Exception("Error: input_file (%s) was not valid" % input_f)

    errs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(errs)

    molecule = oechem.OECreateOEGraphMol()
    count = 0
    smile_count = 0
    saved = 0
    switch = False

    # first output file
    current_letter = 1000
    ofs_file = output_f%current_letter
    ofs = oechem.oemolostream(ofs_file)
    if not ofs.IsValid():
        raise Exception("output file %s is not valid" % ofs_file)
    add_smiles = open(smiles_base % current_letter, 'a')

    while oechem.OEReadMolecule(ifs, molecule):
        # count input file molecules
        count +=1

        if switch: # If True create new output file
            switch = False
            ofs.close()
            current_letter += 1
            ofs_file = output_f % current_letter
            # Load and check output file
            ofs = oechem.oemolostream(ofs_file)
            if not ofs.IsValid():
                raise Exception("output file %s is not valid" % ofs_file)
            print("Switching to file %s, currently saved %i molecules" % (ofs_file, saved))
            if current_letter%100 == 0:
                add_smiles.close()
                add_smiles = open(smiles_base % current_letter, 'a')

        # IF smiles in current list skip the molecule
        smi = oechem.OECreateIsoSmiString(molecule)
        if smi in current_smiles:
            smile_count += 1
            continue

        # Make copy of molecule before making changes
        mol_copy = oechem.OEMol(molecule)
        oechem.OEAddExplicitHydrogens(mol_copy)
        # if the molecule meets our requirements save to current output
        if keep_molecule(mol_copy):
            mol_title = molecule_name % (current_letter,count)
            mol_copy.SetTitle(mol_title)
            add_smiles.writelines("%s\t\t%s\n" % (mol_title, smi))
            oechem.OEWriteMolecule(ofs, mol_copy)
            saved += 1
            if saved%1000 == 0:
                switch = True

    print("%i molecules in input file" % (count))
    print("%i molecules were had repeated isomeric SMILES" % smile_count)
    print("%i molecules saved to output files" % (saved))

    ifs.close()
    ofs.close()
Exemplo n.º 20
0
if has_oechem:
    from chemfp.commandline import oe2fps
    import chemfp.openeye
    OEGRAPHSIM_API_VERSION = chemfp.openeye.OEGRAPHSIM_API_VERSION
    chemfp.openeye._USE_SELECT = False # Grrr. Needed to automate testing.

    real_stdout = sys.stdout
    real_stderr = sys.stderr

    PUBCHEM_SDF = support.fullpath("pubchem.sdf")
    PUBCHEM_SDF_GZ = support.fullpath("pubchem.sdf.gz")
    PUBCHEM_ANOTHER_EXT = support.fullpath("pubchem.should_be_sdf_but_is_not")


    oeerrs = oechem.oeosstream()
    oechem.OEThrow.SetOutputStream(oeerrs)

def convert_v1_atom_names_to_v2(s):
    return (s.replace("Aromaticity", "Arom")
            .replace("AtomicNumber", "AtmNum")
            .replace("EqAromatic", "EqArom")
            .replace("EqHalogen", "EqHalo")
            .replace("FormalCharge", "FCharge")
            .replace("HvyDegree", "HvyDeg")
            .replace("Hybridization", "Hyb")
            .replace("DefaultAtom", "Arom|AtmNum|Chiral|EqHalo|FCharge|HvyDeg|Hyb"))

def convert_v1_bond_names_to_v2(s):
    return (s.replace("DefaultBond", "Order|Chiral")
             .replace("BondOrder", "Order"))