def fit(self, X, y=None, **fit_params): """ Parameters ---------- X: sparse matrix of shape (n_docs, n_words) The data matrix to used to find the low-rank effects y: Ignored fit_params: optional model params Returns ------- self """ if self.model_type == "pLSA": self.model_ = enstop.PLSA(n_components=self.n_components, **fit_params).fit(X) elif self.model_type == "EnsTop": self.model_ = enstop.EnsembleTopics(n_components=self.n_components, **fit_params).fit(X) else: raise ValueError("model_type is not supported") return self
def fit(self, X, y=None, **fit_params): """ Parameters ---------- X: sparse matrix of shape (n_docs, n_words) The data matrix that get's binarized that the model is attempting to fit to. y: Ignored fit_params: optional model params Returns ------- self """ binary_indicator_matrix = (X != 0).astype(np.float32) if self.model_type == "pLSA": self.model_ = enstop.PLSA(n_components=self.n_components, **fit_params).fit( binary_indicator_matrix ) elif self.model_type == "EnsTop": self.model_ = enstop.EnsembleTopics( n_components=self.n_components, **fit_params ).fit(binary_indicator_matrix) else: raise ValueError("model_type is not supported") return self
def fit(self, X, y=None, **fit_params): """ Parameters ---------- X: sparse matrix of shape (n_docs, n_words) The data matrix (that potentially get's binarized) that the model is attempting to fit to. y: Ignored fit_params: optional model params Returns ------- self """ if callable(self.information_function): self._information_function = self.information_function elif self.information_function in _INFORMATION_FUNCTIONS: self._information_function = _INFORMATION_FUNCTIONS[ self.information_function] else: raise ValueError( f"Unrecognized kernel_function; should be callable or one of {_INFORMATION_FUNCTIONS.keys()}" ) if self.information_function in ["idf", "average idf"]: self.binarize_matrix = True elif self.information_function in ["column KL", "Bernoulli KL"]: self.binarize_matrix = False if self.binarize_matrix: binary_indicator_matrix = (X != 0).astype(np.float32) if self.model_type == "pLSA": self.model_ = enstop.PLSA( n_components=self.n_components, **fit_params).fit(binary_indicator_matrix) elif self.model_type == "EnsTop": self.model_ = enstop.EnsembleTopics( n_components=self.n_components, **fit_params).fit(binary_indicator_matrix) else: raise ValueError("model_type is not supported") self.token_counts_ = np.array( binary_indicator_matrix.sum(axis=1)).T[0] else: if self.model_type == "pLSA": self.model_ = enstop.PLSA(n_components=self.n_components, **fit_params).fit( X.astype(np.float32)) elif self.model_type == "EnsTop": self.model_ = enstop.EnsembleTopics( n_components=self.n_components, **fit_params).fit(X.astype(np.float32)) else: raise ValueError("model_type is not supported") return self