def _get_sliceXarray(self, row: slice, col: Sequence[int]) -> ss.csc_matrix: idxs = np.asarray(col) if idxs.dtype == bool: idxs = np.where(idxs) return ss.csc_matrix(get_compressed_vectors(self, idxs), shape=(self.shape[0], len(idxs)))[row, :]
def sparse_matrix_from_args(type, arg1, *args, **kwargs): if type == "coo": return _sp.coo_matrix(arg1, *args, **kwargs) elif type == "csr": return _sp.csr_matrix(arg1, *args, **kwargs) elif type == "csc": return _sp.csc_matrix(arg1, *args, **kwargs) elif type == "dia": return _sp.dia_matrix(arg1, *args, **kwargs)
def _get_missing_features_info(self, X): """Compute the imputer mask and the indices of the features containing missing values. Parameters ---------- X : {ndarray or sparse matrix}, shape (n_samples, n_features) The input data with missing values. Note that ``X`` has been checked in ``fit`` and ``transform`` before to call this function. Returns ------- imputer_mask : {ndarray or sparse matrix}, shape \ (n_samples, n_features) The imputer mask of the original data. features_with_missing : ndarray, shape (n_features_with_missing) The features containing missing values. """ if sparse.issparse(X): mask = _get_mask(X.data, self.missing_values) # The imputer mask will be constructed with the same sparse format # as X. sparse_constructor = (sparse.csr_matrix if X.format == 'csr' else sparse.csc_matrix) imputer_mask = sparse_constructor( (mask, X.indices.copy(), X.indptr.copy()), shape=X.shape, dtype=np.float32) # temporarly switch to using float32 as # cupy cannot operate with bool as of now if self.features == 'missing-only': n_missing = imputer_mask.sum(axis=0) if self.sparse is False: imputer_mask = imputer_mask.toarray() elif imputer_mask.format == 'csr': imputer_mask = imputer_mask.tocsc() else: imputer_mask = _get_mask(X, self.missing_values) if self.features == 'missing-only': n_missing = imputer_mask.sum(axis=0) if self.sparse is True: imputer_mask = sparse.csc_matrix(imputer_mask) if self.features == 'all': features_indices = np.arange(X.shape[1]) else: features_indices = np.flatnonzero(n_missing) return imputer_mask, features_indices
def _get_sliceXint(self, row: slice, col: int) -> ss.csc_matrix: return ss.csc_matrix(get_compressed_vector(self, col), shape=(self.shape[0], 1))[row, :]