示例#1
0
    def get_conformer_rmsd(mol: RDKitMol) -> np.ndarray:
        """
    Calculate conformer-conformer RMSD.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    rmsd: np.ndarray
      A conformer-conformer RMSD value. The shape is `(NumConformers, NumConformers)`
    """
        try:
            from rdkit.Chem import AllChem
        except ModuleNotFoundError:
            raise ValueError("This function requires RDKit to be installed.")

        rmsd = np.zeros((mol.GetNumConformers(), mol.GetNumConformers()),
                        dtype=float)
        for i, ref_conf in enumerate(mol.GetConformers()):
            for j, fit_conf in enumerate(mol.GetConformers()):
                if i >= j:
                    continue
                rmsd[i, j] = AllChem.GetBestRMS(mol, mol, ref_conf.GetId(),
                                                fit_conf.GetId())
                rmsd[j, i] = rmsd[i, j]
        return rmsd
示例#2
0
    def prune_conformers(self, mol: RDKitMol) -> RDKitMol:
        """
    Prune conformers from a molecule using an RMSD threshold, starting
    with the lowest energy conformer.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    new_mol: rdkit.Chem.rdchem.Mol
      A new rdkit.Chem.rdchem.Mol containing the chosen conformers, sorted by
      increasing energy.
    """
        try:
            from rdkit import Chem
        except ModuleNotFoundError:
            raise ValueError("This function requires RDKit to be installed.")

        if self.rmsd_threshold < 0 or mol.GetNumConformers() <= 1:
            return mol
        energies = self.get_conformer_energies(mol)
        rmsd = self.get_conformer_rmsd(mol)

        sort = np.argsort(energies)  # sort by increasing energy
        keep: List[float] = []  # always keep lowest-energy conformer
        discard = []
        for i in sort:
            # always keep lowest-energy conformer
            if len(keep) == 0:
                keep.append(i)
                continue

            # discard conformers after max_conformers is reached
            if len(keep) >= self.max_conformers:
                discard.append(i)
                continue

            # get RMSD to selected conformers
            this_rmsd = rmsd[i][np.asarray(keep, dtype=int)]

            # discard conformers within the RMSD threshold
            if np.all(this_rmsd >= self.rmsd_threshold):
                keep.append(i)
            else:
                discard.append(i)

        # create a new molecule to hold the chosen conformers
        # this ensures proper conformer IDs and energy-based ordering
        new_mol = Chem.Mol(mol)
        new_mol.RemoveAllConformers()
        conf_ids = [conf.GetId() for conf in mol.GetConformers()]
        for i in keep:
            conf = mol.GetConformer(conf_ids[i])
            new_mol.AddConformer(conf, assignId=True)
        return new_mol
示例#3
0
    def coulomb_matrix(self, mol: RDKitMol) -> np.ndarray:
        """
    Generate Coulomb matrices for each conformer of the given molecule.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      The coulomb matrices of the given molecule
    """
        try:
            from rdkit import Chem
            from rdkit.Chem import AllChem
        except ModuleNotFoundError:
            raise ImportError("This class requires RDKit to be installed.")

        # Check whether num_confs >=1 or not
        num_confs = len(mol.GetConformers())
        if num_confs == 0:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol, AllChem.ETKDG())

        if self.remove_hydrogens:
            mol = Chem.RemoveHs(mol)
        n_atoms = mol.GetNumAtoms()
        z = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
        rval = []
        for conf in mol.GetConformers():
            d = self.get_interatomic_distances(conf)
            m = np.outer(z, z) / d
            m[range(n_atoms), range(n_atoms)] = 0.5 * np.array(z)**2.4
            if self.randomize:
                for random_m in self.randomize_coulomb_matrix(m):
                    random_m = pad_array(random_m, self.max_atoms)
                    rval.append(random_m)
            else:
                m = pad_array(m, self.max_atoms)
                rval.append(m)
        rval = np.asarray(rval)
        return rval
示例#4
0
    def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
        """Calculate atomic coordinates.

    Parameters
    ----------
    datapoint: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      A numpy array of atomic coordinates. The shape is `(n_atoms, 3)`.
    """
        try:
            from rdkit import Chem
            from rdkit.Chem import AllChem
        except ModuleNotFoundError:
            raise ImportError("This class requires RDKit to be installed.")
        if 'mol' in kwargs:
            datapoint = kwargs.get("mol")
            raise DeprecationWarning(
                'Mol is being phased out as a parameter, please pass "datapoint" instead.'
            )

        # Check whether num_confs >=1 or not
        num_confs = len(datapoint.GetConformers())
        if num_confs == 0:
            datapoint = Chem.AddHs(datapoint)
            AllChem.EmbedMolecule(datapoint, AllChem.ETKDG())
            datapoint = Chem.RemoveHs(datapoint)

        N = datapoint.GetNumAtoms()
        coords = np.zeros((N, 3))

        # RDKit stores atomic coordinates in Angstrom. Atomic unit of length is the
        # bohr (1 bohr = 0.529177 Angstrom). Converting units makes gradient calculation
        # consistent with most QM software packages.
        if self.use_bohr:
            coords_list = [
                datapoint.GetConformer(0).GetAtomPosition(i).__idiv__(
                    0.52917721092) for i in range(N)
            ]
        else:
            coords_list = [
                datapoint.GetConformer(0).GetAtomPosition(i) for i in range(N)
            ]

        for atom in range(N):
            coords[atom, 0] = coords_list[atom].x
            coords[atom, 1] = coords_list[atom].y
            coords[atom, 2] = coords_list[atom].z

        return coords
示例#5
0
    def minimize_conformers(self, mol: RDKitMol) -> None:
        """
    Minimize molecule conformers.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object with embedded conformers.
    """
        for conf in mol.GetConformers():
            ff = self.get_molecule_force_field(mol, conf_id=conf.GetId())
            ff.Minimize()
示例#6
0
    def _featurize(self, mol: RDKitMol) -> np.ndarray:
        """Calculate atomic coordinates.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object

    Returns
    -------
    np.ndarray
      A numpy array of atomic coordinates. The shape is `(n_atoms, 3)`.
    """
        try:
            from rdkit import Chem
            from rdkit.Chem import AllChem
        except ModuleNotFoundError:
            raise ImportError("This class requires RDKit to be installed.")

        # Check whether num_confs >=1 or not
        num_confs = len(mol.GetConformers())
        if num_confs == 0:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol, AllChem.ETKDG())
            mol = Chem.RemoveHs(mol)

        N = mol.GetNumAtoms()
        coords = np.zeros((N, 3))

        # RDKit stores atomic coordinates in Angstrom. Atomic unit of length is the
        # bohr (1 bohr = 0.529177 Angstrom). Converting units makes gradient calculation
        # consistent with most QM software packages.
        if self.use_bohr:
            coords_list = [
                mol.GetConformer(0).GetAtomPosition(i).__idiv__(0.52917721092)
                for i in range(N)
            ]
        else:
            coords_list = [
                mol.GetConformer(0).GetAtomPosition(i) for i in range(N)
            ]

        for atom in range(N):
            coords[atom, 0] = coords_list[atom].x
            coords[atom, 1] = coords_list[atom].y
            coords[atom, 2] = coords_list[atom].z

        return coords
示例#7
0
  def get_conformer_energies(self, mol: RDKitMol) -> np.ndarray:
    """
    Calculate conformer energies.

    Parameters
    ----------
    mol: rdkit.Chem.rdchem.Mol
      RDKit Mol object with embedded conformers.

    Returns
    -------
    energies : np.ndarray
      Minimized conformer energies.
    """
    energies = []
    for conf in mol.GetConformers():
      ff = self.get_molecule_force_field(mol, conf_id=conf.GetId())
      energy = ff.CalcEnergy()
      energies.append(energy)
    return np.asarray(energies, dtype=float)