Exemplo n.º 1
0
    def create_cmiles_metadata(self, molecule: off.Molecule) -> Dict[str, str]:
        """
        Create the Cmiles metadata for the molecule in this dataset.

        Parameters:
            molecule: The molecule for which the cmiles data will be generated.

        Returns:
            The Cmiles identifiers generated for the input molecule.

        Note:
            The Cmiles identifiers currently include:

            - `canonical_smiles`
            - `canonical_isomeric_smiles`
            - `canonical_explicit_hydrogen_smiles`
            - `canonical_isomeric_explicit_hydrogen_smiles`
            - `canonical_isomeric_explicit_hydrogen_mapped_smiles`
            - `molecular_formula`
            - `standard_inchi`
            - `inchi_key`
        """

        cmiles = {
            "canonical_smiles":
            molecule.to_smiles(isomeric=False,
                               explicit_hydrogens=False,
                               mapped=False),
            "canonical_isomeric_smiles":
            molecule.to_smiles(isomeric=True,
                               explicit_hydrogens=False,
                               mapped=False),
            "canonical_explicit_hydrogen_smiles":
            molecule.to_smiles(isomeric=False,
                               explicit_hydrogens=True,
                               mapped=False),
            "canonical_isomeric_explicit_hydrogen_smiles":
            molecule.to_smiles(isomeric=True,
                               explicit_hydrogens=True,
                               mapped=False),
            "canonical_isomeric_explicit_hydrogen_mapped_smiles":
            molecule.to_smiles(isomeric=True,
                               explicit_hydrogens=True,
                               mapped=True),
            "molecular_formula":
            molecule.hill_formula,
            "standard_inchi":
            molecule.to_inchi(fixed_hydrogens=False),
            "inchi_key":
            molecule.to_inchikey(fixed_hydrogens=False),
        }

        return cmiles
Exemplo n.º 2
0
def check_missing_stereo(molecule: off.Molecule) -> bool:
    """
    Get if the given molecule has missing stereo by round trip and catching stereo errors.
    Here we use the RDKit backend explicitly for this check as this avoids nitrogen stereochemistry issues with the toolkit.

    Parameters
    ----------
    molecule: off.Molecule
        The molecule which should be checked for stereo issues.

    Returns
    -------
    bool
        `True` if some stereochemistry is missing else `False`.
    """
    try:
        _ = off.Molecule.from_smiles(
            smiles=molecule.to_smiles(isomeric=True, explicit_hydrogens=True),
            hydrogens_are_explicit=True,
            allow_undefined_stereo=False,
            toolkit_registry=RDKitToolkitWrapper(),
        )
        return False
    except UndefinedStereochemistryError:
        return True
Exemplo n.º 3
0
    def create_index(self, molecule: off.Molecule) -> str:
        """
        Create an index for the current molecule.

        Parameters:
            molecule: The molecule for which the dataset index will be generated.

        Returns:
            The canonical isomeric smiles for the molecule which is used as the dataset index.

        Important:
            Each dataset can have a different indexing system depending on the data, in this basic dataset each conformer
            of a molecule is expanded into its own entry separately indexed entry. This is handled by the dataset however
            so we just generate a general index for the molecule before adding to the dataset.
        """

        index = molecule.to_smiles(isomeric=True,
                                   explicit_hydrogens=False,
                                   mapped=False)
        return index
Exemplo n.º 4
0
    def create_index(self, molecule: off.Molecule) -> str:
        """
        Create a specific torsion index for the molecule, this will use the atom map on the molecule.

        Parameters:
            molecule:  The molecule for which the dataset index will be generated.

        Returns:
            The canonical mapped isomeric smiles, where the mapped indices are on the atoms in the torsion.

        Important:
            This dataset uses a non-standard indexing with 4 atom mapped indices representing the atoms in the torsion
            to be rotated.
        """

        assert "atom_map" in molecule.properties.keys()
        assert (len(molecule.properties["atom_map"]) == 4
                or len(molecule.properties["atom_map"]) == 8)

        index = molecule.to_smiles(isomeric=True,
                                   explicit_hydrogens=True,
                                   mapped=True)
        return index