Exemplo n.º 1
0
def get_iupac(molecule):
    """
    Generate IUPAC name

    Parameters
    ----------
    molecule :
        `oechem.OEMol`

    Returns
    -------
    str:
        iupac name

    Notes
    -----
    Will only be generated if has openeye license

    """
    if not has_openeye:
        raise ImportError(
            "OpenEye is not installed. You can use the canonicalization='rdkit' to use the RDKit backend"
            "The Conda recipe for cmiles installs rdkit")

    from openeye import oeiupac
    if not oeiupac.OEIUPACIsLicensed():
        raise ImportError("Must have OEIUPAC license!")
    return oeiupac.OECreateIUPACName(molecule)
Exemplo n.º 2
0
    def _create_implicit_solvent_openmm(self, mol):
        """
        Take a list of oemols, and generate openmm systems
        and positions for each.

        Parameters
        ----------
        mol : oemol
            oemol to be turned into system, positions

        Returns
        -------
        system : simtk.openmm.System
            openmm system corresponding to molecule
        positions : np.array, Quantity nm
           array of atomic positions
        """
        molecule_name = oeiupac.OECreateIUPACName(mol)
        openmoltools.openeye.enter_temp_directory()
        _, tripos_mol2_filename = openmoltools.openeye.molecule_to_mol2(
            mol,
            tripos_mol2_filename=molecule_name + '.tripos.mol2',
            conformer=0,
            residue_name='MOL')
        gaff_mol2, frcmod = openmoltools.amber.run_antechamber(
            molecule_name, tripos_mol2_filename)
        prmtop_file, inpcrd_file = openmoltools.amber.run_tleap(
            molecule_name, gaff_mol2, frcmod)
        prmtop = app.AmberPrmtopFile(prmtop_file)
        crd = app.AmberInpcrdFile(inpcrd_file)
        system = prmtop.createSystem(implicitSolvent=self.implicit_solvent,
                                     constraints=self.constraints,
                                     removeCMMotion=False)
        positions = crd.getPositions(asNumpy=True)
        return system, positions
Exemplo n.º 3
0
def generate_molecule_from_smiles(smiles, name=None):
    """
    Parameters
    ----------
    smiles : str
       The canonical isomeric SMILES string.
    name : str, optional, default=None
       If specified, the molecule title will be set to this; if not, the IUPAC name will be assigned.

    """

    # Generate a molecule from canonical isomeric SMILES.
    molecule = oechem.OEMol()
    if not oechem.OEParseSmiles(molecule, smiles):
        raise ValueError("The supplied SMILES '%s' could not be parsed." %
                         smiles)

    # Assign aromaticity.
    oechem.OEAssignAromaticFlags(molecule, oechem.OEAroModelOpenEye)

    # Add hydrogens.
    oechem.OEAddExplicitHydrogens(molecule)

    # Set title.
    if name is None:
        # Set title to IUPAC name.
        name = oeiupac.OECreateIUPACName(molecule)
    molecule.SetTitle(name)

    # Check for any missing atom names, if found reassign all of them.
    if any([atom.GetName() == '' for atom in molecule.GetAtoms()]):
        oechem.OETriposAtomNames(molecule)

    return molecule
Exemplo n.º 4
0
def normalize_molecule(molecule):
    """Normalize a copy of the molecule by checking aromaticity, adding explicit hydrogens, and renaming by IUPAC name.
    Parameters
    ----------
    molecule : OEMol
        the molecule to be normalized.

    Returns
    -------
    molcopy : OEMol
        A (copied) version of the normalized molecule
    """
    molcopy = oechem.OEMol(molecule)

    # Assign aromaticity.
    oechem.OEAssignAromaticFlags(molcopy, oechem.OEAroModelOpenEye)

    # Add hydrogens.
    oechem.OEAddExplicitHydrogens(molcopy)

    # Set title to IUPAC name.
    name = oeiupac.OECreateIUPACName(molcopy)
    molcopy.SetTitle(name)

    # Check for any missing atom names, if found reassign all of them.
    if any([atom.GetName() == '' for atom in molcopy.GetAtoms()]):
        oechem.OETriposAtomNames(molcopy)

    return molcopy
Exemplo n.º 5
0
    def __makeChemCompIdentifierCategory(self, ccId, oeMol):
        """

        loop_
        _pdbx_chem_comp_identifier.comp_id
        _pdbx_chem_comp_identifier.type
        _pdbx_chem_comp_identifier.program
        _pdbx_chem_comp_identifier.program_version
        _pdbx_chem_comp_identifier.identifier
        ATP "SYSTEMATIC NAME" ACDLabs              10.04
        ;adenosine 5'-(tetrahydrogen triphosphate)
        ;
        ATP "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 "[[(2R,3S,4R,5R)-5-(6-aminopurin-9-yl)-..."
        #
        """
        rowL = []
        #
        aRow = {}
        aRow["comp_id"] = ccId
        aRow["type"] = "SYSTEMATIC NAME"
        aRow["program"] = "OpenEye OEToolkits"
        aRow["program_version"] = self.__oeVersion
        style = oeiupac.OEGetIUPACNamStyle("systematic")
        name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style))
        aRow["identifier"] = name
        rowL.append(aRow)
        aRow = {}
        aRow["comp_id"] = ccId
        aRow["type"] = "COMMON"
        aRow["program"] = "OpenEye OEToolkits"
        aRow["program_version"] = self.__oeVersion
        style = oeiupac.OEGetIUPACNamStyle("traditional")
        name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style))
        aRow["identifier"] = name
        rowL.append(aRow)
        #
        aRow = {}
        aRow["comp_id"] = ccId
        aRow["type"] = "SYNONYM"
        aRow["program"] = "OpenEye OEToolkits"
        aRow["program_version"] = self.__oeVersion
        style = oeiupac.OEGetIUPACNamStyle("acdname")
        name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style))
        aRow["identifier"] = name
        rowL.append(aRow)
        #
        return rowL
Exemplo n.º 6
0
 def __makeChemCompCategory(self, ccId, oeMol, site="RCSB", missingModelXyz=False, skipAnnotations=False):
     #
     lt = time.strftime("%Y-%m-%d", time.localtime())
     formula = oechem.OEMolecularFormula(oeMol)
     charge = self.__getFormalCharge(oeMol)
     fW = oechem.OECalculateMolecularWeight(oeMol)
     #
     if skipAnnotations:
         name = ccId
     else:
         style = oeiupac.OEGetIUPACNamStyle("systematic")
         name = oeiupac.OEToUTF8(oeiupac.OECreateIUPACName(oeMol, style))
     #
     ccRow = {}
     ccRow["id"] = ccId
     if name is not None:
         ccRow["name"] = name
     else:
         ccRow["name"] = "?"
     ccRow["type"] = "NON-POLYMER"
     ccRow["pdbx_type"] = "?"
     if formula is not None:
         ccRow["formula"] = formula
     else:
         ccRow["formula"] = "?"
     ccRow["mon_nstd_parent_comp_id"] = "?"
     # ccRow["pdbx_synonyms"] = "?"
     if charge is not None:
         ccRow["pdbx_formal_charge"] = charge
     else:
         ccRow["pdbx_formal_charge"] = "?"
     ccRow["pdbx_ambiguous_flag"] = "N"
     ccRow["pdbx_initial_date"] = lt
     ccRow["pdbx_modified_date"] = lt
     ccRow["pdbx_release_status"] = "HOLD"
     ccRow["pdbx_replaced_by"] = "?"
     ccRow["pdbx_replaces"] = "?"
     if fW is not None:
         ccRow["formula_weight"] = "%0.3f" % fW
     else:
         ccRow["formula_weight"] = "?"
     ccRow["one_letter_code"] = "?"
     tlc = ccId.split("_")[0]
     ccRow["three_letter_code"] = tlc
     ccRow["pdbx_model_coordinates_details"] = "?"
     ccRow["pdbx_ideal_coordinates_details"] = "?"
     if missingModelXyz:
         ccRow["pdbx_model_coordinates_missing_flag"] = "Y"
     else:
         ccRow["pdbx_model_coordinates_missing_flag"] = "N"
     ccRow["pdbx_model_coordinates_db_code"] = "?"
     ccRow["pdbx_processing_site"] = site
     ccRow["pdbx_subcomponent_list"] = "?"
     return ccRow
Exemplo n.º 7
0
def Mol2Nam(itf):
    ifs = oechem.oemolistream()
    if not ifs.open(itf.GetString("-in")):
        oechem.OEThrow.Fatal("Unable to open '%s' for reading" % itf.GetString("-in"))

    ofs = oechem.oemolostream()
    outname = None
    if itf.HasString("-out"):
        outname = itf.GetString("-out")
        if not ofs.open(outname):
            oechem.OEThrow.Fatal("Unable to open '%s' for reading" % outname)

    language = oeiupac.OEGetIUPACLanguage(itf.GetString("-language"))
    charset = oeiupac.OEGetIUPACCharSet(itf.GetString("-encoding"))
    style = oeiupac.OEGetIUPACNamStyle(itf.GetString("-style"))

    for mol in ifs.GetOEGraphMols():
        name = oeiupac.OECreateIUPACName(mol, style)

        if language > 0:
            name = oeiupac.OEToLanguage(name, language)
        if itf.GetBool("-capitalize"):
            name = oeiupac.OECapitalizeName(name)

        if charset == oeiupac.OECharSet_ASCII:
            name = oeiupac.OEToAscii(name)
        elif charset == oeiupac.OECharSet_UTF8:
            name = oeiupac.OEToUTF8(name)
        elif charset == oeiupac.OECharSet_HTML:
            name = oeiupac.OEToHTML(name)
        elif charset == oeiupac.OECharSet_SJIS:
            name = oeiupac.OEToSJIS(name)
        elif charset == oeiupac.OECharSet_EUCJP:
            name = oeiupac.OEToEUCJP(name)

        if outname:
            if itf.HasString("-delim"):
                title = mol.GetTitle()
                name = title + itf.GetString("-delim") + name

            if itf.HasString("-tag"):
                oechem.OESetSDData(mol, itf.GetString("-tag"), name)

            mol.SetTitle(name)
            oechem.OEWriteMolecule(ofs, mol)

        else:
            print(name)
Exemplo n.º 8
0
    def _state_transition_to_iupac(self, state_transition):
        """
        Convenience function to convert SMILES to IUPAC names

        Parameters
        ----------
        state_transition : (str, str)
            Pair of smiles strings for the state transition

        Returns
        -------
        state_transition_iupac : [str, str]
            The pair of molecules in IUPAC names
        """
        state_transition_iupac = []
        for state in state_transition:
            mol = oechem.OEMol()
            oechem.OESmilesToMol(mol, state)
            iupac = oeiupac.OECreateIUPACName(mol)
            state_transition_iupac.append(iupac)

        return state_transition_iupac
Exemplo n.º 9
0
        template_script.format(yaml_filename)


if __name__ == "__main__":
    template_script_file_eq = "submit-eq.sh"
    template_script_file_neq = "submit-neq.sh"

    substituted_benzene_smilefile = "filtered_database.smi"

    substituted_benzenes_iupac = []

    istream = oechem.oemolistream(substituted_benzene_smilefile)

    for mol in istream.GetOEMols():
        mol_copy = oechem.OEMol(mol)
        substituted_benzenes_iupac.append(oeiupac.OECreateIUPACName(mol_copy))

    with open("rj_neq_template.yaml", "r") as yamlfile:
        template_yamldict = yaml.load(yamlfile)

    for pair in itertools.permutations(substituted_benzenes_iupac, 2):
        if pair[0] == pair[1]:
            continue

        new_yaml_dict = create_yaml_file(pair[0], pair[1],
                                         copy.deepcopy(template_yamldict))

        new_yaml_filename = "{}_{}_rjneq.yaml".format(pair[0], pair[1])

        with open(new_yaml_filename, 'w') as yaml_outfile:
            yaml.dump(new_yaml_dict, yaml_outfile)
Exemplo n.º 10
0
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2,
                       remove_map=True, json_filename=None):
    """
    This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES
     for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical
     isomeric SMILES.
     The dictionary also includes a provenance field which defines how the fragments were generated.

    Parameters
    ----------
    molecule: OEMol to fragment
    generate_visualization: bool
        If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory
        where this function is run from.
    combinatorial: bool
        If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS
    MAX_ROTORS: int
        rotor threshold for combinatorial
    strict_stereo: bool
        Note: This applies to the molecule being fragmented. Not the fragments.
        If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging.
    remove_map: bool
        If True, the index tags will be removed. This will remove duplicate fragments. Defualt True
    json_filename: str
        filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None

    Returns
    -------
    fragments: dict
        mapping of SMILES from the parent molecule to the SMILES of the fragments
    """
    fragments = dict()

    try:
        molecules = list(molecule)
    except TypeError:
        molecules = [molecule]
    for molecule in molecules:
        # normalize molecule
        molecule = normalize_molecule(molecule, molecule.GetTitle())
        if remove_map:
            # Remove tags from smiles. This is done to make it easier to find duplicate fragments
            for a in molecule.GetAtoms():
                a.SetMapIdx(0)
        frags = _generate_fragments(molecule, strict_stereo=strict_stereo)
        if not frags:
            logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule)))
            continue
        charged = frags[0]
        frags = frags[-1]
        frag_list = list(frags.values())
        if combinatorial:
            smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS)
        else:
            smiles = frag_to_smiles(frag_list, charged)

        parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False)
        if smiles:
            fragments[parent_smiles] = list(smiles.keys())
        else:
            # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds
            fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)]

        if generate_visualization:
            IUPAC = oeiupac.OECreateIUPACName(molecule)
            name = molecule.GetTitle()
            if IUPAC == name:
                name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0]
            oname = '{}.pdf'.format(name)
            ToPdf(charged, oname, frags)
        del charged, frags
    if json_filename:
        f = open(json_filename, 'w')
        j = json.dump(fragments, f, indent=2, sort_keys=True)
        f.close()

    return fragments