Пример #1
0
    def _transform_mol(self, mol):
        """Private method to transform a skchem molecule.

        Use `transform` for the public method, which genericizes the argument
        to iterables of mols.

        Args:
            mol (skchem.Mol): Molecule to calculate fingerprint for.

        Returns:
            np.array or dict:
                Fingerprint as an array (or a dict if sparse).
        """

        if self.as_bits and self.n_feats > 0:

            fp = GetMorganFingerprintAsBitVect(
                mol,
                self.radius,
                nBits=self.n_feats,
                useFeatures=self.use_features,
                useBondTypes=self.use_bond_types,
                useChirality=self.use_chirality)

            res = np.array(0)
            ConvertToNumpyArray(fp, res)
            res = res.astype(np.uint8)

        else:

            if self.n_feats <= 0:

                res = GetMorganFingerprint(mol,
                                           self.radius,
                                           useFeatures=self.use_features,
                                           useBondTypes=self.use_bond_types,
                                           useChirality=self.use_chirality)

                res = res.GetNonzeroElements()
                if self.as_bits:
                    res = {k: int(v > 0) for k, v in res.items()}

            else:
                res = GetHashedMorganFingerprint(
                    mol,
                    self.radius,
                    nBits=self.n_feats,
                    useFeatures=self.use_features,
                    useBondTypes=self.use_bond_types,
                    useChirality=self.use_chirality)

                res = np.array(list(res))

        return res
Пример #2
0
    def test__hashed_binary_fingerprints__ecfp(self) -> None:
        fprintr = CircularFPFeaturizer(fp_mode="binary")

        fps_mat_smi = fprintr.fit_transform(self.smis)  # using SMILES
        fps_mat_mol = fprintr.fit_transform(self.mols)  # using Mol objects

        # Output shape
        self.assertEqual(fps_mat_smi.shape[0], self.n_mols)
        self.assertEqual(fps_mat_smi.shape[1], fprintr.max_hash_value_)
        self.assertEqual(fps_mat_mol.shape[0], self.n_mols)
        self.assertEqual(fps_mat_mol.shape[1], fprintr.max_hash_value_)

        # Fingerprint matrix structure
        for i, mol in enumerate(self.mols):
            fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_,
                                           useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_)
            for hash in fps_ref.GetNonzeroElements():
                self.assertTrue(fps_mat_smi[i, hash])
                self.assertTrue(fps_mat_mol[i, hash])

            # No other elements are set
            self.assertEqual(np.sum(fps_mat_smi[i, :].data), len(fps_ref.GetNonzeroElements()))
            self.assertEqual(np.sum(fps_mat_mol[i, :].data), len(fps_ref.GetNonzeroElements()))
Пример #3
0
    def transform_mol(self,
                      molecule: Chem.rdchem.Mol) -> Tuple[np.ndarray, bool]:
        use_chirality = self.__dict__.get('use_chirality', False)

        fp = GetMorganFingerprint(
            molecule,
            radius=self.radius,
            useFeatures=self.use_features,
            useCounts=self.use_counts,
            useChirality=use_chirality,
            **self.fingerprint_extra_args,
        )
        fp = rdkit_sparse_array_to_np(fp.GetNonzeroElements().items(),
                                      use_counts=self.use_counts,
                                      fp_size=self.fp_size)

        return fp, True
Пример #4
0
    def test__string_output_format(self) -> None:
        fprintr = CircularFPFeaturizer(output_format="sparse_string")

        fps_str = fprintr.fit_transform(self.smis)  # using SMILES

        # Output shape
        self.assertEqual(self.n_mols, len(fps_str))

        # Fingerprint matrix structure
        for i, mol in enumerate(self.mols):
            fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_,
                                           useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_)

            fp_i_from_str = eval("{" + fps_str[i] + "}")

            for hash, cnt in fps_ref.GetNonzeroElements().items():
                self.assertEqual(fp_i_from_str[hash], cnt)
Пример #5
0
    def test__hashed_counting_fingerprints__fcfp(self) -> None:
        fprintr = CircularFPFeaturizer(fp_type="FCFP")

        fps_mat_smi = fprintr.fit_transform(self.smis)  # using SMILES
        fps_mat_mol = fprintr.fit_transform(self.mols)  # using Mol objects

        # Output shape
        self.assertEqual(fps_mat_smi.shape[0], self.n_mols)
        self.assertEqual(fps_mat_smi.shape[1], fprintr.max_hash_value_)
        self.assertEqual(fps_mat_mol.shape[0], self.n_mols)
        self.assertEqual(fps_mat_mol.shape[1], fprintr.max_hash_value_)

        # Fingerprint matrix structure
        for i, mol in enumerate(self.mols):
            fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_,
                                           useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_)
            for hash, cnt in fps_ref.GetNonzeroElements().items():
                self.assertEqual(fps_mat_smi[i, hash], cnt)
                self.assertEqual(fps_mat_mol[i, hash], cnt)