示例#1
0
def all_atom_coords(mol: rdkit.Mol, conformer=-1):
    """
    Yields the coordinates of atoms in :attr:`mol`.
    Parameters
    ----------
    conformer : :class:`int`, optional
        The id of the conformer to be used.
    Yields
    ------
    :class:`tuple`
        The yielded :class:`tuple` has the form
        .. code-block:: python
            (32, numpy.array([12, 34, 3]))
        Where the first element is the atom id and the second
        element is an array holding the coordinates of the atom.
    """

    # Get the conformer from the rdkit instance.
    conf = mol.GetConformer(conformer)

    # Go through all the atoms and ask the conformer to return
    # the position of each atom. This is done by supplying the
    # conformers `GetAtomPosition` method with the atom's id.
    for atom in mol.GetAtoms():
        atom_id = atom.GetIdx()
        atom_position = conf.GetAtomPosition(atom_id)
        yield atom_id, np.array([*atom_position])
示例#2
0
def mol_to_atom_feats_and_adjacency_list(mol: AllChem.Mol,
                                         atom_map_to_index_map=None,
                                         params: AtomFeatParams = None):
    """
    :param atom_map_to_index_map: if you pass this in it will use the defined indices for each atom. Otherwise will use
    rdkit default indexing.
    """
    params = AtomFeatParams() if params is None else params
    atoms = mol.GetAtoms()
    num_atoms = len(atoms)

    node_feats = np.zeros((num_atoms, params.atom_feature_length),
                          dtype=np.float32)
    idx_to_atom_map = np.zeros(num_atoms, dtype=np.float32)

    if atom_map_to_index_map is None:
        # then we will create this map
        atom_map_to_index_map = {}
        use_supplied_idx_flg = False
    else:
        # we will use the mapping given
        use_supplied_idx_flg = True
        assert set(atom_map_to_index_map.values()) == set(range(len(atoms))), \
            "if give pre supplied ordering it must be the same size as the molecules trying to order"

    # First we will create the atom features and the mappings
    for atom in atoms:
        props = atom.GetPropsAsDict()
        am = props['molAtomMapNumber']  # the atom mapping in the file
        if use_supplied_idx_flg:
            idx = atom_map_to_index_map[am]
        else:
            idx = atom.GetIdx()  # goes from 0 to A-1
            atom_map_to_index_map[am] = idx
        idx_to_atom_map[idx] = am
        atom_features = get_atom_features(atom, params)
        node_feats[idx, :] = atom_features

    # Now we will go through and create the adjacency lists
    adjacency_lists = {k: [] for k in params.bond_names}
    for bond in mol.GetBonds():
        begin = bond.GetBeginAtom()
        end = bond.GetEndAtom()
        props_b = begin.GetPropsAsDict()
        props_e = end.GetPropsAsDict()
        am_b = props_b['molAtomMapNumber']
        am_e = props_e['molAtomMapNumber']
        ix_b = atom_map_to_index_map[am_b]
        ix_e = atom_map_to_index_map[am_e]

        bond_name = params.get_bond_name(bond)
        adjacency_lists[bond_name].append((ix_b, ix_e))

    # Finally we pack all the results together
    res = graph_as_adj_list.GraphAsAdjList(
        node_feats, {k: np.array(v).T
                     for k, v in adjacency_lists.items()},
        np.zeros(node_feats.shape[0], dtype=data_types.INT))

    return res
示例#3
0
def set_position(mol: rdkit.Mol, position, conformer=-1):
    """
    Sets the centroid of the molecule to `position`.
    Parameters
    ----------
    position : :class:`numpy.array`
        This array holds the position on which the centroid of the
        molecule should be placed.
    conformer : :class:`int`, optional
        The id of the conformer to be used.
    Returns
    -------
    :class:`rdkit.Chem.rdchem.Mol`
        The ``rdkit`` molecule with the centroid placed at
        `position`. This is the same instance as that in
        :attr:`Molecule.mol`.
    """

    conf_id = mol.GetConformer(conformer).GetId()

    # Get the original centroid.
    centroid = get_centroid(mol, conf_id)
    # Find out how much it needs to shift to reach `position`.
    shift = position - centroid
    # Apply the shift and get the resulting rdkit conformer object.
    new_conf = apply_shift(mol, shift, conf_id).GetConformer()
    new_conf.SetId(conf_id)

    # Replace the old rkdit conformer with one where the centroid
    # is at `position`.
    mol.RemoveConformer(conf_id)
    mol.AddConformer(new_conf)

    return mol
示例#4
0
文件: data.py 项目: Xiangyan93/AIMS
 def calc_features_mol(mol: Chem.Mol,
                       features_generator: FeaturesGenerator):
     if mol is not None and mol.GetNumHeavyAtoms() > 0:
         features_mol = features_generator(mol)
     # for H2
     elif mol is not None and mol.GetNumHeavyAtoms() == 0:
         # not all features_mol are equally long, so use methane as dummy
         # molecule to determine length
         features_mol = np.zeros(
             len(features_generator(Chem.MolFromSmiles('C'))))
     else:
         features_mol = None
     return np.asarray(features_mol)
def make_entry(
    mol: rdkit.Mol, sa_scorer: SyntheticAccesibilityScorer,
):
    # Ensure hydrogens are added to molecule.
    mol = rdkit.AddHs(mol)
    sascore, scscore, rfmodel = [
        sa_scorer.calculate_sa(mol, func) for func in sa_scorer.sa_funcs
    ]
    try:
        fg_name = str(
            list(
                filter(
                    lambda x: len(
                        mol.GetSubstructMatch(rdkit.MolFromSmarts(fg_names[x]))
                    )
                    != 0,
                    fg_names,
                )
            )[0]
        )
    except:
        fg_name = ""
    return (
        rdkit.MolToSmiles(mol),
        str(fg_name),
        float(sascore),
        float(scscore),
        float(rfmodel),
    )
示例#6
0
def calculate_normalizers(mol: Chem.Mol,
                          num_confs: int = 200,
                          pruning_thresh: float = 0.05) -> Tuple[float, float]:
    """Calculates the :math:`E_0` and :math:`Z_0` normalizing constants for a molecule used in the TorsionNet [1]_ paper.

    Parameters
    ----------
    mol : RDKit Mol
        The molecule of interest.
    num_confs : int
        The number of conformers to generate when calculating the constants. Should equal
        the number of steps for each episode of the environment containing this molecule.
    pruning_thresh : float
        TFD threshold for pruning the conformers of `mol`.

    References
    ----------
    .. [1] `TorsionNet paper <https://arxiv.org/abs/2006.07078>`_
    """
    Chem.MMFFSanitizeMolecule(mol)
    confslist = Chem.EmbedMultipleConfs(mol, numConfs=num_confs)
    if (len(confslist) < 1):
        raise Exception('Unable to embed molecule with conformer using rdkit')
    Chem.MMFFOptimizeMoleculeConfs(mol)
    mol = prune_conformers(mol, pruning_thresh)
    energys = get_conformer_energies(mol)
    E0 = energys.min()
    Z0 = np.sum(np.exp(-(energys - E0)))

    mol.RemoveAllConformers()

    return E0, Z0
示例#7
0
def prune_last_conformer(
        mol: Chem.Mol, tfd_thresh: float,
        energies: List[float]) -> Tuple[Chem.Mol, List[float]]:
    """Prunes the last conformer of the molecule.

    If no conformers in `mol` have a TFD (Torsional Fingerprint Deviation) with the last conformer of less than `tfd_thresh`,
    the last conformer is kept. Otherwise, the lowest energy conformer with TFD less than `tfd_thresh` is kept and all other conformers
    are discarded.

    Parameters
    ----------
    mol : RDKit Mol
        The molecule to be pruned. The conformers in the molecule should be ordered by ascending energy.
    tfd_thresh : float
        The minimum threshold for TFD between conformers.
    energies : list of float
        A list of all the energies of the conformers in `mol`.

    Returns
    -------
    mol : RDKit Mol
        The updated molecule after pruning, with conformers sorted by ascending energy.
    energies : list of float
        A list of all the energies of the conformers in `mol` after pruning and sorting by ascending energy.
    """
    if tfd_thresh < 0 or mol.GetNumConformers() <= 1:
        return mol, energies

    idx = bisect.bisect(energies[:-1], energies[-1])
    tfd = TorsionFingerprints.GetTFDBetweenConformers(
        mol,
        range(0,
              mol.GetNumConformers() - 1), [mol.GetNumConformers() - 1],
        useWeights=False)
    tfd = np.array(tfd)

    # if lower energy conformer is within threshold, drop new conf
    if not np.all(tfd[:idx] >= tfd_thresh):
        energies = energies[:-1]
        mol.RemoveConformer(mol.GetNumConformers() - 1)
        return mol, energies
    else:
        keep = list(range(0, idx))
        keep.append(mol.GetNumConformers() - 1)
        keep += [
            x for x in range(idx,
                             mol.GetNumConformers() - 1)
            if tfd[x] >= tfd_thresh
        ]

        new = Chem.Mol(mol)
        new.RemoveAllConformers()
        for i in keep:
            conf = mol.GetConformer(i)
            new.AddConformer(conf, assignId=True)

        return new, [energies[i] for i in keep]
示例#8
0
def get_max_diameter(mol: rdkit.Mol, conf=-1) -> float:
    """Gets the largest distance between two atoms in a molecule.

    Args:
        mol: Molecule to calculate diameter of.
        conf: Conformer to use to calculate diameter

    Returns:
        maxd: Maximum diameter.
    """
    maxid1, maxid2 = max(
        (x for x in it.combinations(range(mol.GetNumAtoms()), 2)),
        key=lambda x: atom_distance(mol, *x, conf),
    )
    maxd = atom_distance(mol, maxid1, maxid2, conf)
    maxd += (atom_vdw_radii[mol.GetAtomWithIdx(maxid1).GetSymbol()] +
             atom_vdw_radii[mol.GetAtomWithIdx(maxid2).GetSymbol()])
    return maxd
示例#9
0
def prune_conformers(mol: Chem.Mol, tfd_thresh: float) -> Chem.Mol:
    """Prunes all the conformers in the molecule.

    Removes conformers that have a TFD (torsional fingerprint deviation) lower than
    `tfd_thresh` with other conformers. Lowest energy conformers are kept.

    Parameters
    ----------
    mol : RDKit Mol
        The molecule to be pruned.
    tfd_thresh : float
        The minimum threshold for TFD between conformers.

    Returns
    -------
    mol : RDKit Mol
        The updated molecule after pruning.
    """
    if tfd_thresh < 0 or mol.GetNumConformers() <= 1:
        return mol

    energies = get_conformer_energies(mol)
    tfd = tfd_matrix(mol)
    sort = np.argsort(energies)  # sort by increasing energy
    keep = []  # always keep lowest-energy conformer
    discard = []

    for i in sort:
        this_tfd = tfd[i][np.asarray(keep, dtype=int)]
        # discard conformers within the tfd threshold
        if np.all(this_tfd >= tfd_thresh):
            keep.append(i)
        else:
            discard.append(i)

    # create a new molecule to hold the chosen conformers
    # this ensures proper conformer IDs and energy-based ordering
    new = Chem.Mol(mol)
    new.RemoveAllConformers()
    for i in keep:
        conf = mol.GetConformer(int(i))
        new.AddConformer(conf, assignId=True)

    return new
示例#10
0
def get_conformer_energy(mol: Chem.Mol, confId: int = None) -> float:
    """Returns the energy of the conformer with `confId` in `mol`.
    """
    if confId is None:
        confId = mol.GetNumConformers() - 1
    Chem.MMFFSanitizeMolecule(mol)
    mmff_props = Chem.MMFFGetMoleculeProperties(mol)
    ff = Chem.MMFFGetMoleculeForceField(mol, mmff_props, confId=confId)
    energy = ff.CalcEnergy()

    return energy
示例#11
0
def get_cavity_size(mol: rdkit.Mol, origin, conformer):
    """Calculates diameter of the conformer from `origin`.

    The cavity is measured by finding the atom nearest to
    `origin`, correcting for van der Waals diameter and multiplying
    by -2.

    Args:
        mol: Molecule to calculate diameter of.
        origin: Coordinates of the position from which
        the cavity is measured.
        conformer: ID of the conformer to use.
    Returns:
        (float): Cavity size of the molecule.
    """
    conf = mol.GetConformer(conformer)
    atom_vdw = np.array(
        [atom_vdw_radii[x.GetSymbol()] for x in mol.GetAtoms()])
    distances = euclidean_distances(conf.GetPositions(), np.matrix(origin))
    distances = distances.flatten() - atom_vdw
    return -2 * min(distances)
示例#12
0
def get_conformer_energies(mol: Chem.Mol) -> List[float]:
    """Returns a list of energies for each conformer in `mol`.
    """
    energies = []
    Chem.MMFFSanitizeMolecule(mol)
    mmff_props = Chem.MMFFGetMoleculeProperties(mol)
    for conf in mol.GetConformers():
        ff = Chem.MMFFGetMoleculeForceField(mol,
                                            mmff_props,
                                            confId=conf.GetId())
        energy = ff.CalcEnergy()
        energies.append(energy)

    return np.asarray(energies, dtype=float)
示例#13
0
def change_mol_bond(mol: AllChem.Mol, diff_mode: chem_details.ElectronMode,
                    bond_to_change_indcs: typing.Tuple[int, int]):
    """
    Change a molecule by adding or removing a pair of electrons from a bond.
    """
    ed_mol = Chem.RWMol(mol)
    exists = mol.GetBondBetweenAtoms(bond_to_change_indcs[0],
                                     bond_to_change_indcs[1])

    # Either we are reducing the number of pairs of electrons in the bond by one.
    if diff_mode is chem_details.ElectronMode.REMOVE:
        # a. we first remove the bond:
        ed_mol.RemoveBond(bond_to_change_indcs[0], bond_to_change_indcs[1])

        # b. we then (if it had more than one pair of electrons) add it back with one less pair of electrons than before:
        if exists:
            bt_d = exists.GetBondTypeAsDouble()
            if bt_d - 1 != 0:
                new_bt = NUM_TO_BOND[bt_d - 1]
                ed_mol.AddBond(bond_to_change_indcs[0],
                               bond_to_change_indcs[1],
                               order=new_bt)

    # Or we are increasing the number of pairs of electrons in the bond by one.
    elif diff_mode is chem_details.ElectronMode.ADD:
        if exists:
            # a. if it already exists we remove it and add it back with an extra pair of electrons
            bt_d = exists.GetBondTypeAsDouble()
            if bt_d + 1 not in NUM_TO_BOND:
                new_bt = NUM_TO_BOND[
                    bt_d]  # if already at maximum we leave it as it is (we do not deal with aromatic)
            else:
                new_bt = NUM_TO_BOND[bt_d + 1]
            ed_mol.RemoveBond(bond_to_change_indcs[0], bond_to_change_indcs[1])
            ed_mol.AddBond(bond_to_change_indcs[0],
                           bond_to_change_indcs[1],
                           order=new_bt)
        else:
            # b. if it does not exist then we create a single bond.
            ed_mol.AddBond(*bond_to_change_indcs, order=NUM_TO_BOND[1])
    else:
        raise RuntimeError("Invalid mode: {}".format(diff_mode))
    new_mol = ed_mol.GetMol()
    return new_mol
示例#14
0
def atom_distance(mol: rdkit.Mol,
                  atom1_id: int,
                  atom2_id: int,
                  conf=-1) -> float:
    """Gets distance between two atoms.

    Args:
        mol: Molecule containing the atoms.
        atom1_id: ID of first atom.
        atom2_id: ID of second atom.
        conf: Confomrer of `mol` to use.

    Returns:
        (float): Euclidean distance between two atoms.
    """

    # Get the atomic positions of each atom and use the scipy
    # function to calculate their distance in Euclidean space.
    conf = mol.GetConformer(conf)
    atom1_coords = conf.GetAtomPosition(atom1_id)
    atom2_coords = conf.GetAtomPosition(atom2_id)
    return euclidean(atom1_coords, atom2_coords)
示例#15
0
def apply_shift(mol: rdkit.Mol, shift, conformer=-1):
    """
    Shifts the coordinates of all atoms.
    This does not modify the molecule. A modified copy is returned.
    Parameters
    ----------
    shift : :class:`numpy.array`
        A numpy array holding the value of the shift along each
        axis.
    conformer : :class:`int`, optional
        The id of the conformer to use.
    Returns
    -------
    :class:`rdkit.Chem.rdchem.Mol`
        A copy of the molecule where the coordinates have been
        shifted by `shift`.
    """

    # The function does not modify the existing conformer, as a
    # result a new instance is created and used for modification.
    conf = rdkit.Conformer(mol.GetConformer(conformer))

    # For each atom, get the atomic positions from the conformer
    # and shift them. Create a new geometry instance from these new
    # coordinate values. The geometry instance is used by rdkit to
    # store the coordinates of atoms. Finally, set the conformers
    # atomic position to the values stored in this newly generated
    # geometry instance.
    for atom in mol.GetAtoms():

        # Remember the id of the atom you are currently using. It
        # is used to change the position of the correct atom at the
        # end of the loop.
        atom_id = atom.GetIdx()

        # `atom_position` in an instance holding in the x, y and z
        # coordinates of an atom in its 'x', 'y' and 'z'
        # attributes.
        atom_position = np.array(conf.GetAtomPosition(atom_id))

        # Inducing the shift.
        new_atom_position = atom_position + shift

        # Creating a new geometry instance.
        new_coords = Point3D(*new_atom_position)

        # Changes the position of the atom in the conformer to the
        # values stored in the new geometry instance.
        conf.SetAtomPosition(atom_id, new_coords)

    # Create a new copy of the rdkit molecule instance representing
    # the molecule - the original instance is not to be modified.
    new_mol = rdkit.Mol(mol)

    # The new rdkit molecule was copied from the one held in the
    # `mol` attribute, as result it has a copy of its conformer. To
    # prevent the rdkit molecule from holding multiple conformers
    # the `RemoveAllConformers` method is run first. The shifted
    # conformer is then given to the rdkit molecule, which is
    # returned.
    new_mol.RemoveAllConformers()
    new_mol.AddConformer(conf)
    return new_mol
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: _FunctionalGroups,
        placer_ids: typing.Optional[abc.Iterable[int]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                The :class:`.FunctionalGroup` instances the building
                block should have, and / or
                :class:`.FunctionalGroupFactory` instances used for
                creating them.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(
                id=a.GetIdx(),
                atomic_number=a.GetAtomicNum(),
                charge=a.GetFormalCharge(),
            ) for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        Molecule.__init__(
            self=self,
            atoms=atoms,
            bonds=bonds,
            position_matrix=position_matrix,
        )
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))
示例#17
0
def standardize(compound: AllChem.Mol,
                add_hs=True,
                remove_stereo=True,
                thorough=False) -> AllChem.Mol:
    """
    Standardizes an RDKit molecule by running various cleanup and sanitization operations.

    Parameters
    ----------
    compound : rdkit.Chem.rdchem.Mol
        A chemical compound.
    add_hs : bool
        If True, adds hydrogens to the compound.
    remove_stereo : bool
        If True, removes stereochemistry info from the compound.
    thorough : bool
        If True, removes charge, isotopes, and small fragments from the compound.

    Returns
    -------
    rdkit.Chem.rdchem.Mol
        The standardized compound.
    """
    # basic cleanup
    Chem.Cleanup(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)
    AllChem.AssignStereochemistry(compound,
                                  cleanIt=True,
                                  force=True,
                                  flagPossibleStereoCenters=True)

    # remove isotopes, neutralize charge
    if thorough:
        for atom in compound.GetAtoms():
            atom.SetIsotope(0)
        compound = _neutralize_charge(compound)
        Chem.SanitizeMol(compound,
                         sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                         catchErrors=False)

    # remove stereochemistry
    if remove_stereo:
        Chem.RemoveStereochemistry(compound)

    # commute inchi
    compound = _commute_inchi(compound)

    # keep biggest fragment
    if thorough:
        compound = _strip_small_fragments(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)

    # neutralize charge
    compound = _neutralize_charge(compound)
    Chem.SanitizeMol(compound,
                     sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL,
                     catchErrors=False)

    # add protons
    if add_hs:
        return Chem.AddHs(compound, explicitOnly=False, addCoords=True)
    return compound
示例#18
0
    def _init_from_rdkit_mol(
        self,
        molecule: rdkit.Mol,
        functional_groups: typing.Iterable[typing.Union[
            FunctionalGroup, FunctionalGroupFactory]],
        placer_ids: typing.Optional[tuple[int, ...]],
    ) -> None:
        """
        Initialize from an :mod:`rdkit` molecule.

        Parameters:

            molecule:
                The molecule.

            functional_groups:
                An :class:`iterable` of :class:`.FunctionalGroup` or
                :class:`.FunctionalGroupFactory` or both.
                :class:`.FunctionalGroup` instances are added to the
                building block and :class:`.FunctionalGroupFactory`
                instances are used to create :class:`.FunctionalGroup`
                instances the building block should hold.
                :class:`.FunctionalGroup` instances are used to
                identify which atoms are modified during
                :class:`.ConstructedMolecule` construction.

            placer_ids:
                The ids of *placer* atoms. These are the atoms which
                should be used for calculating the position of the
                building block. Depending on the values passed to
                `placer_ids`, and the functional groups in the building
                block, different *placer* ids will be used by the
                building block.

                #. `placer_ids` is passed to the initializer: the
                   passed *placer* ids will be used by the building
                   block.

                #. `placer_ids` is ``None`` and the building block has
                   functional groups: The *placer* ids of the
                   functional groups will be used as the *placer* ids
                   of the building block.

                #. `placer_ids` is ``None`` and `functional_groups` is
                   empty. All atoms of the molecule will be used for
                   *placer* ids.

        """

        atoms = tuple(
            Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge())
            for a in molecule.GetAtoms())
        bonds = tuple(
            Bond(atom1=atoms[b.GetBeginAtomIdx()],
                 atom2=atoms[b.GetEndAtomIdx()],
                 order=(9 if b.GetBondType() ==
                        rdkit.BondType.DATIVE else b.GetBondTypeAsDouble()))
            for b in molecule.GetBonds())
        position_matrix = molecule.GetConformer().GetPositions()

        super().__init__(atoms, bonds, position_matrix)
        self._with_functional_groups(
            self._extract_functional_groups(
                functional_groups=functional_groups, ))
        self._placer_ids = self._normalize_placer_ids(
            placer_ids=placer_ids,
            functional_groups=self._functional_groups,
        )
        self._core_ids = frozenset(
            self._get_core_ids(functional_groups=self._functional_groups, ))