def test_monotonic_validate_invert_labels(arr_type, dtype, copy): arr = np.array([0, 15, 10, 50, 20, 50], dtype=dtype) original = arr.copy() if arr_type == "cp": arr = cp.asarray(arr, dtype=dtype) arr_orig = arr.copy() monotonic, mapped_classes = make_monotonic(arr, copy=copy) cp.cuda.Stream.null.synchronize() assert array_equal(monotonic.get(), np.array([0, 2, 1, 4, 3, 4])) # We only care about in-place updating if data is on device if arr_type == "cp": if copy: assert array_equal(arr_orig.get(), arr.get()) else: assert array_equal(arr.get(), monotonic.get()) wrong_classes = cp.asarray([0, 1, 2], dtype=dtype) val_labels = check_labels(monotonic.get(), classes=wrong_classes) cp.cuda.Stream.null.synchronize() assert not val_labels correct_classes = cp.asarray([0, 1, 2, 3, 4], dtype=dtype) val_labels = check_labels(monotonic.get(), classes=correct_classes) cp.cuda.Stream.null.synchronize() assert val_labels if arr_type == "cp": monotonic_copy = monotonic.copy() inverted = invert_labels(monotonic, classes=cp.asarray([0, 10, 15, 20, 50], dtype=dtype), copy=copy) cp.cuda.Stream.null.synchronize() if arr_type == "cp": if copy: assert array_equal(monotonic_copy.get(), monotonic.get()) else: assert array_equal(monotonic.get(), arr_orig.get()) assert array_equal(inverted.get(), original)
def _partial_fit(self, X, y, sample_weight=None, _classes=None) -> "MultinomialNB": if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) data = cp.asarray(X.data, dtype=X.data.dtype) X = cupyx.scipy.sparse.coo_matrix((data, (rows, cols)), shape=X.shape) else: X = input_to_cupy_array(X, order='K').array y = input_to_cupy_array(y).array Y, label_classes = make_monotonic(y, copy=True) if not self.fit_called_: self.fit_called_ = True if _classes is not None: _classes, *_ = input_to_cuml_array(_classes, order='K') check_labels(Y, _classes) self.classes_ = _classes else: self.classes_ = label_classes self._n_classes_ = self.classes_.shape[0] self._n_features_ = X.shape[1] self._init_counters(self._n_classes_, self._n_features_, X.dtype) else: check_labels(Y, self.classes_) self._count(X, Y) self._update_feature_log_prob(self.alpha) self._update_class_log_prior(class_prior=self._class_prior_) return self
def _partial_fit(self, X, y, sample_weight=None, _classes=None): if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray): X = cp.asarray(X, X.dtype) elif scipy.sparse.isspmatrix(X) or cp.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) data = cp.asarray(X.data, dtype=X.data.dtype) X = cp.sparse.coo_matrix((data, (rows, cols)), shape=X.shape) if isinstance(y, np.ndarray) or isinstance(y, cp.ndarray): y = cp.asarray(y, y.dtype) Y, label_classes = make_monotonic(y, copy=True) if not self.fit_called_: self.fit_called_ = True if _classes is not None: check_labels(Y, _classes) self.classes_ = _classes else: self.classes_ = label_classes self.n_classes_ = self.classes_.shape[0] self.n_features_ = X.shape[1] self._init_counters(self.n_classes_, self.n_features_, X.dtype) else: check_labels(Y, self.classes_) self._count(X, Y) self._update_feature_log_prob(self.alpha) self._update_class_log_prior(class_prior=self.class_prior) return self
def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False) -> SparseCumlArray: """ A stateless helper function to dummy encode multi-class labels. Parameters ---------- y : array-like of size [n_samples,] or [n_samples, n_classes] classes : the set of unique classes in the input neg_label : integer the negative value for transformed output pos_label : integer the positive value for transformed output sparse_output : bool whether to return sparse array """ classes = cp.asarray(classes, dtype=classes.dtype) labels = cp.asarray(y, dtype=y.dtype) if not check_labels(labels, classes): raise ValueError("Unseen classes encountered in input") row_ind = cp.arange(0, labels.shape[0], 1, dtype=y.dtype) col_ind, _ = make_monotonic(labels, classes, copy=True) # Convert from CumlArray to cupy col_ind = cp.asarray(col_ind) val = cp.full(row_ind.shape[0], pos_label, dtype=y.dtype) sp = cupyx.scipy.sparse.coo_matrix( (val, (row_ind, col_ind)), shape=(col_ind.shape[0], classes.shape[0]), dtype=cp.float32) cp.cuda.Stream.null.synchronize() if sparse_output: sp = sp.tocsr() return sp else: arr = sp.toarray().astype(y.dtype) arr[arr == 0] = neg_label return arr
def _partial_fit(self, X, y, sample_weight=None, _classes=None, convert_dtype=True) -> "MultinomialNB": if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = _convert_x_sparse(X) # TODO: Expanded this since sparse kernel doesn't # actually require the scipy sparse container format. else: X = input_to_cupy_array( X, order='K', check_dtype=[cp.float32, cp.float64, cp.int32]).array expected_y_dtype = cp.int32 if X.dtype in [cp.float32, cp.int32 ] else cp.int64 y = input_to_cupy_array( y, convert_to_dtype=(expected_y_dtype if convert_dtype else False), check_dtype=expected_y_dtype).array Y, label_classes = make_monotonic(y, copy=True) if not self.fit_called_: self.fit_called_ = True if _classes is not None: _classes, *_ = input_to_cuml_array( _classes, order='K', convert_to_dtype=(expected_y_dtype if convert_dtype else False)) check_labels(Y, _classes) self.classes_ = _classes else: self.classes_ = label_classes self._n_classes_ = self.classes_.shape[0] self._n_features_ = X.shape[1] self._init_counters(self._n_classes_, self._n_features_, X.dtype) else: check_labels(Y, self.classes_) if cp.sparse.isspmatrix(X): self._count_sparse(X.row, X.col, X.data, X.shape, Y) else: self._count(X, Y) self._update_feature_log_prob(self.alpha) self._update_class_log_prior(class_prior=self._class_prior_) return self