Пример #1
0
    def fit(self, X, y=None, **fit_params):
        """

        Parameters
        ----------
        X: sparse matrix of shape (n_docs, n_words)
            The data matrix to used to find the low-rank effects

        y: Ignored

        fit_params:
            optional model params

        Returns
        -------
        self

        """
        if self.model_type == "pLSA":
            self.model_ = enstop.PLSA(n_components=self.n_components,
                                      **fit_params).fit(X)
        elif self.model_type == "EnsTop":
            self.model_ = enstop.EnsembleTopics(n_components=self.n_components,
                                                **fit_params).fit(X)
        else:
            raise ValueError("model_type is not supported")

        return self
Пример #2
0
    def fit(self, X, y=None, **fit_params):
        """

        Parameters
        ----------
        X: sparse matrix of shape (n_docs, n_words)
            The data matrix that get's binarized that the model is attempting to fit to.

        y: Ignored

        fit_params:
            optional model params

        Returns
        -------
        self

        """
        binary_indicator_matrix = (X != 0).astype(np.float32)
        if self.model_type == "pLSA":
            self.model_ = enstop.PLSA(n_components=self.n_components, **fit_params).fit(
                binary_indicator_matrix
            )
        elif self.model_type == "EnsTop":
            self.model_ = enstop.EnsembleTopics(
                n_components=self.n_components, **fit_params
            ).fit(binary_indicator_matrix)
        else:
            raise ValueError("model_type is not supported")

        return self
Пример #3
0
    def fit(self, X, y=None, **fit_params):
        """

        Parameters
        ----------
        X: sparse matrix of shape (n_docs, n_words)
            The data matrix (that potentially get's binarized) that the model is attempting to fit to.

        y: Ignored

        fit_params:
            optional model params

        Returns
        -------
        self

        """

        if callable(self.information_function):
            self._information_function = self.information_function
        elif self.information_function in _INFORMATION_FUNCTIONS:
            self._information_function = _INFORMATION_FUNCTIONS[
                self.information_function]
        else:
            raise ValueError(
                f"Unrecognized kernel_function; should be callable or one of {_INFORMATION_FUNCTIONS.keys()}"
            )

        if self.information_function in ["idf", "average idf"]:
            self.binarize_matrix = True
        elif self.information_function in ["column KL", "Bernoulli KL"]:
            self.binarize_matrix = False

        if self.binarize_matrix:
            binary_indicator_matrix = (X != 0).astype(np.float32)
            if self.model_type == "pLSA":
                self.model_ = enstop.PLSA(
                    n_components=self.n_components,
                    **fit_params).fit(binary_indicator_matrix)
            elif self.model_type == "EnsTop":
                self.model_ = enstop.EnsembleTopics(
                    n_components=self.n_components,
                    **fit_params).fit(binary_indicator_matrix)
            else:
                raise ValueError("model_type is not supported")

            self.token_counts_ = np.array(
                binary_indicator_matrix.sum(axis=1)).T[0]

        else:

            if self.model_type == "pLSA":
                self.model_ = enstop.PLSA(n_components=self.n_components,
                                          **fit_params).fit(
                                              X.astype(np.float32))
            elif self.model_type == "EnsTop":
                self.model_ = enstop.EnsembleTopics(
                    n_components=self.n_components,
                    **fit_params).fit(X.astype(np.float32))
            else:
                raise ValueError("model_type is not supported")

        return self