def _check_X_y(X, y): """Overwrite the checking to let pass some string for categorical features. """ y, binarize_y = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None) return X, y, binarize_y
def fit(self, X, y): """Find the classes statistics before to perform sampling. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Matrix containing the data which have to be sampled. y : array-like, shape (n_samples,) Corresponding label for each sample in X. Returns ------- self : object, Return self. """ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) y = check_target_type(y) self.X_hash_, self.y_hash_ = hash_X_y(X, y) self.ratio_ = check_ratio(self.ratio, y, self._sampling_type) # Cluster input space self.clustering_labels_ = self.clusterer[0][1].fit_predict(X, y) # Identify majority and minority majority_label = [label for label, n_samples in self.ratio_.items() if n_samples == 0][0] minority_labels = [label for label in self.ratio_.keys() if label != majority_label] # Clusters imbalance ratios weights = pd.DataFrame() return self
def fit(self, X, y): X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) y = check_target_type(y) self.X_hash_, self.y_hash_ = hash_X_y(X, y) self._fit(X, y) return self
def _check_X_y(self, X, y): """Overwrite the checking to let pass some string for categorical features. """ y, binarize_y = check_target_type(y, indicate_one_vs_all=True) X, y = self._validate_data( X, y, reset=True, dtype=None, accept_sparse=["csr", "csc"] ) return X, y, binarize_y
def fit(self, X, y): """ Find the classes statistics to perform sampling. Parameters ---------- X : 2d ndarray or scipy sparse matrix, shape [n_samples, n_features] Matrix containing the data which have to be sampled. y : 1d ndarray, shape [n_samples] Corresponding label for each sample in X. Returns ------- self """ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']) y = check_target_type(y) self.X_hash_, self.y_hash_ = hash_X_y(X, y) self.ratio_ = check_ratio(self.ratio, y) return self
def fit(self, X, y): """Find the classes statistics before to perform sampling. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Matrix containing the data which have to be sampled. y : array-like, shape (n_samples,) Corresponding label for each sample in X. Returns ------- self : object, Return self. """ X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None) y = check_target_type(y) self.ratio_ = self.ratio self.X_hash_, self.y_hash_ = hash_X_y(X, y) labels = np.unique(y) counts = np.bincount(y) under_dict = {} over_dict = {} for lbl in labels: count = counts[lbl] if count < self.min_freq: under_dict[lbl] = count over_dict[lbl] = self.min_freq elif count > self.max_freq: under_dict[lbl] = self.max_freq over_dict[lbl] = self.max_freq else: under_dict[lbl] = count over_dict[lbl] = count self.under_sampler = RandomUnderSampler(ratio=under_dict, random_state=self.random_state) self.over_sampler = RandomOverSampler(ratio=over_dict, random_state=self.random_state) return self
def test_check_target_type_ova(target, output_target, is_ova): converted_target, binarize_target = check_target_type( target.astype(int), indicate_one_vs_all=True) assert_array_equal(converted_target, output_target.astype(int)) assert binarize_target == is_ova
def test_check_target_type(target, output_target): converted_target = check_target_type(target.astype(int)) assert_array_equal(converted_target, output_target.astype(int))
def fit(self, X, y): X, y = check_X_y(X, y, accept_sparse=True) y, _ = check_target_type(y, indicate_one_vs_all=True) self.sampling_strategy_ = 'sampling_strategy_' return self
def fit(self, X, y): y, _ = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=True) return self
def _check_X_y(X, y): y, binarize_y = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None) return X, y, binarize_y
def test_check_target_type_ova(target, output_target, is_ova): converted_target, binarize_target = check_target_type( target.astype(int), indicate_one_vs_all=True) assert_array_equal(converted_target, output_target.astype(int)) assert binarize_target == is_ova
def fit(self, X, y): y, _ = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=False) self.sampling_strategy_ = 'sampling_strategy_' return self
def test_check_target_warning(): target = np.arange(4).reshape((2, 2)) with pytest.warns(UserWarning, match='should be of types'): check_target_type(target)
def _check_X_y(X, y): y, binarize_y = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None) return X, y, binarize_y
def test_check_target_warning(): target = np.arange(4).reshape((2, 2)) with pytest.warns(UserWarning, match='should be of types'): check_target_type(target)
def test_check_target_type(target, output_target): converted_target = check_target_type(target.astype(int)) assert_array_equal(converted_target, output_target.astype(int))
def fit(self, X, y): y, _ = check_target_type(y, indicate_one_vs_all=True) X, y = check_X_y(X, y, accept_sparse=True) return self