示例#1
0
def all_atom_coords(mol: rdkit.Mol, conformer=-1):
    """
    Yields the coordinates of atoms in :attr:`mol`.
    Parameters
    ----------
    conformer : :class:`int`, optional
        The id of the conformer to be used.
    Yields
    ------
    :class:`tuple`
        The yielded :class:`tuple` has the form
        .. code-block:: python
            (32, numpy.array([12, 34, 3]))
        Where the first element is the atom id and the second
        element is an array holding the coordinates of the atom.
    """

    # Get the conformer from the rdkit instance.
    conf = mol.GetConformer(conformer)

    # Go through all the atoms and ask the conformer to return
    # the position of each atom. This is done by supplying the
    # conformers `GetAtomPosition` method with the atom's id.
    for atom in mol.GetAtoms():
        atom_id = atom.GetIdx()
        atom_position = conf.GetAtomPosition(atom_id)
        yield atom_id, np.array([*atom_position])
示例#2
0
def mol_to_atom_feats_and_adjacency_list(mol: AllChem.Mol,
                                         atom_map_to_index_map=None,
                                         params: AtomFeatParams = None):
    """
    :param atom_map_to_index_map: if you pass this in it will use the defined indices for each atom. Otherwise will use
    rdkit default indexing.
    """
    params = AtomFeatParams() if params is None else params
    atoms = mol.GetAtoms()
    num_atoms = len(atoms)

    node_feats = np.zeros((num_atoms, params.atom_feature_length),
                          dtype=np.float32)
    idx_to_atom_map = np.zeros(num_atoms, dtype=np.float32)

    if atom_map_to_index_map is None:
        # then we will create this map
        atom_map_to_index_map = {}
        use_supplied_idx_flg = False
    else:
        # we will use the mapping given
        use_supplied_idx_flg = True
        assert set(atom_map_to_index_map.values()) == set(range(len(atoms))), \
            "if give pre supplied ordering it must be the same size as the molecules trying to order"

    # First we will create the atom features and the mappings
    for atom in atoms:
        props = atom.GetPropsAsDict()
        am = props['molAtomMapNumber']  # the atom mapping in the file
        if use_supplied_idx_flg:
            idx = atom_map_to_index_map[am]
        else:
            idx = atom.GetIdx()  # goes from 0 to A-1
            atom_map_to_index_map[am] = idx
        idx_to_atom_map[idx] = am
        atom_features = get_atom_features(atom, params)
        node_feats[idx, :] = atom_features

    # Now we will go through and create the adjacency lists
    adjacency_lists = {k: [] for k in params.bond_names}
    for bond in mol.GetBonds():
        begin = bond.GetBeginAtom()
        end = bond.GetEndAtom()
        props_b = begin.GetPropsAsDict()
        props_e = end.GetPropsAsDict()
        am_b = props_b['molAtomMapNumber']
        am_e = props_e['molAtomMapNumber']
        ix_b = atom_map_to_index_map[am_b]
        ix_e = atom_map_to_index_map[am_e]

        bond_name = params.get_bond_name(bond)
        adjacency_lists[bond_name].append((ix_b, ix_e))

    # Finally we pack all the results together
    res = graph_as_adj_list.GraphAsAdjList(
        node_feats, {k: np.array(v).T
                     for k, v in adjacency_lists.items()},
        np.zeros(node_feats.shape[0], dtype=data_types.INT))

    return res
示例#3
0
def get_cavity_size(mol: rdkit.Mol, origin, conformer):
    """Calculates diameter of the conformer from `origin`.

    The cavity is measured by finding the atom nearest to
    `origin`, correcting for van der Waals diameter and multiplying
    by -2.

    Args:
        mol: Molecule to calculate diameter of.
        origin: Coordinates of the position from which
        the cavity is measured.
        conformer: ID of the conformer to use.
    Returns:
        (float): Cavity size of the molecule.
    """
    conf = mol.GetConformer(conformer)
    atom_vdw = np.array(
        [atom_vdw_radii[x.GetSymbol()] for x in mol.GetAtoms()])
    distances = euclidean_distances(conf.GetPositions(), np.matrix(origin))
    distances = distances.flatten() - atom_vdw
    return -2 * min(distances)
示例#4
0
def standardize(compound: AllChem.Mol,
                add_hs=True,
                remove_stereo=True,
                thorough=False) -> AllChem.Mol:
    """
    Standardizes an RDKit molecule by running various cleanup and sanitization operations.

    Parameters
    ----------
    compound : rdkit.Chem.rdchem.Mol
        A chemical compound.
    add_hs : bool
        If True, adds hydrogens to the compound.
    remove_stereo : bool
        If True, removes stereochemistry info from the compound.
    thorough : bool
        If True, removes charge, isotopes, and small fragments from the compound.

    Returns
    -------
    rdkit.Chem.rdchem.Mol
        The standardized compound.
    """
    # basic cleanup
    Chem.Cleanup(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)
    AllChem.AssignStereochemistry(compound,
                                  cleanIt=True,
                                  force=True,
                                  flagPossibleStereoCenters=True)

    # remove isotopes, neutralize charge
    if thorough:
        for atom in compound.GetAtoms():
            atom.SetIsotope(0)
        compound = _neutralize_charge(compound)
        Chem.SanitizeMol(compound,
                         sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                         catchErrors=False)

    # remove stereochemistry
    if remove_stereo:
        Chem.RemoveStereochemistry(compound)

    # commute inchi
    compound = _commute_inchi(compound)

    # keep biggest fragment
    if thorough:
        compound = _strip_small_fragments(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)

    # neutralize charge
    compound = _neutralize_charge(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)

    # add protons
    if add_hs:
        return Chem.AddHs(compound, explicitOnly=False, addCoords=True)
    return compound
示例#5
0
def apply_shift(mol: rdkit.Mol, shift, conformer=-1):
    """
    Shifts the coordinates of all atoms.
    This does not modify the molecule. A modified copy is returned.
    Parameters
    ----------
    shift : :class:`numpy.array`
        A numpy array holding the value of the shift along each
        axis.
    conformer : :class:`int`, optional
        The id of the conformer to use.
    Returns
    -------
    :class:`rdkit.Chem.rdchem.Mol`
        A copy of the molecule where the coordinates have been
        shifted by `shift`.
    """

    # The function does not modify the existing conformer, as a
    # result a new instance is created and used for modification.
    conf = rdkit.Conformer(mol.GetConformer(conformer))

    # For each atom, get the atomic positions from the conformer
    # and shift them. Create a new geometry instance from these new
    # coordinate values. The geometry instance is used by rdkit to
    # store the coordinates of atoms. Finally, set the conformers
    # atomic position to the values stored in this newly generated
    # geometry instance.
    for atom in mol.GetAtoms():

        # Remember the id of the atom you are currently using. It
        # is used to change the position of the correct atom at the
        # end of the loop.
        atom_id = atom.GetIdx()

        # `atom_position` in an instance holding in the x, y and z
        # coordinates of an atom in its 'x', 'y' and 'z'
        # attributes.
        atom_position = np.array(conf.GetAtomPosition(atom_id))

        # Inducing the shift.
        new_atom_position = atom_position + shift

        # Creating a new geometry instance.
        new_coords = Point3D(*new_atom_position)

        # Changes the position of the atom in the conformer to the
        # values stored in the new geometry instance.
        conf.SetAtomPosition(atom_id, new_coords)

    # Create a new copy of the rdkit molecule instance representing
    # the molecule - the original instance is not to be modified.
    new_mol = rdkit.Mol(mol)

    # The new rdkit molecule was copied from the one held in the
    # `mol` attribute, as result it has a copy of its conformer. To
    # prevent the rdkit molecule from holding multiple conformers
    # the `RemoveAllConformers` method is run first. The shifted
    # conformer is then given to the rdkit molecule, which is
    # returned.
    new_mol.RemoveAllConformers()
    new_mol.AddConformer(conf)
    return new_mol
示例#6
0
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: typing.Iterable[typing.Union[
            FunctionalGroup, FunctionalGroupFactory]],
        placer_ids: typing.Optional[tuple[int, ...]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                An :class:`iterable` of :class:`.FunctionalGroup` or
                :class:`.FunctionalGroupFactory` or both.
                :class:`.FunctionalGroup` instances are added to the
                building block and :class:`.FunctionalGroupFactory`
                instances are used to create :class:`.FunctionalGroup`
                instances the building block should hold.
                :class:`.FunctionalGroup` instances are used to
                identify which atoms are modified during
                :class:`.ConstructedMolecule` construction.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge())
            for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        super().__init__(atoms, bonds, position_matrix)
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: _FunctionalGroups,
        placer_ids: typing.Optional[abc.Iterable[int]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                The :class:`.FunctionalGroup` instances the building
                block should have, and / or
                :class:`.FunctionalGroupFactory` instances used for
                creating them.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(
                id=a.GetIdx(),
                atomic_number=a.GetAtomicNum(),
                charge=a.GetFormalCharge(),
            ) for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        Molecule.__init__(
            self=self,
            atoms=atoms,
            bonds=bonds,
            position_matrix=position_matrix,
        )
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))