def _check_X_y(X, y):
     """Overwrite the checking to let pass some string for categorical
     features.
     """
     y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
     return X, y, binarize_y
示例#2
0
    def fit(self, X, y):
        """Find the classes statistics before to perform sampling.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.

        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        self : object,
            Return self.

        """
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
        y = check_target_type(y)
        self.X_hash_, self.y_hash_ = hash_X_y(X, y)
        self.ratio_ = check_ratio(self.ratio, y, self._sampling_type)

        # Cluster input space
        self.clustering_labels_ = self.clusterer[0][1].fit_predict(X, y)

        # Identify majority and minority
        majority_label = [label for label, n_samples in self.ratio_.items() if n_samples == 0][0]
        minority_labels = [label for label in self.ratio_.keys() if label != majority_label]

        # Clusters imbalance ratios

        weights = pd.DataFrame()


        return self
    def fit(self, X, y):
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
        y = check_target_type(y)
        self.X_hash_, self.y_hash_ = hash_X_y(X, y)

        self._fit(X, y)

        return self
示例#4
0
    def _check_X_y(self, X, y):
        """Overwrite the checking to let pass some string for categorical
        features.
        """
        y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
        X, y = self._validate_data(
            X, y, reset=True, dtype=None, accept_sparse=["csr", "csc"]
        )

        return X, y, binarize_y
    def fit(self, X, y):
        """
        Find the classes statistics to perform sampling.

        Parameters
        ----------
        X : 2d ndarray or scipy sparse matrix, shape [n_samples, n_features]
            Matrix containing the data which have to be sampled.

        y : 1d ndarray, shape [n_samples]
            Corresponding label for each sample in X.

        Returns
        -------
        self
        """
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
        y = check_target_type(y)
        self.X_hash_, self.y_hash_ = hash_X_y(X, y)
        self.ratio_ = check_ratio(self.ratio, y)
        return self
示例#6
0
    def fit(self, X, y):
        """Find the classes statistics before to perform sampling.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Matrix containing the data which have to be sampled.
        y : array-like, shape (n_samples,)
            Corresponding label for each sample in X.

        Returns
        -------
        self : object,
            Return self.
        """
        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
        y = check_target_type(y)
        self.ratio_ = self.ratio
        self.X_hash_, self.y_hash_ = hash_X_y(X, y)
        labels = np.unique(y)
        counts = np.bincount(y)
        under_dict = {}
        over_dict = {}
        for lbl in labels:
            count = counts[lbl]
            if count < self.min_freq:
                under_dict[lbl] = count
                over_dict[lbl] = self.min_freq
            elif count > self.max_freq:
                under_dict[lbl] = self.max_freq
                over_dict[lbl] = self.max_freq
            else:
                under_dict[lbl] = count
                over_dict[lbl] = count
        self.under_sampler = RandomUnderSampler(ratio=under_dict,
                                                random_state=self.random_state)
        self.over_sampler = RandomOverSampler(ratio=over_dict,
                                              random_state=self.random_state)
        return self
def test_check_target_type_ova(target, output_target, is_ova):
    converted_target, binarize_target = check_target_type(
        target.astype(int), indicate_one_vs_all=True)
    assert_array_equal(converted_target, output_target.astype(int))
    assert binarize_target == is_ova
def test_check_target_type(target, output_target):
    converted_target = check_target_type(target.astype(int))
    assert_array_equal(converted_target, output_target.astype(int))
示例#9
0
 def fit(self, X, y):
     X, y = check_X_y(X, y, accept_sparse=True)
     y, _ = check_target_type(y, indicate_one_vs_all=True)
     self.sampling_strategy_ = 'sampling_strategy_'
     return self
示例#10
0
 def fit(self, X, y):
     y, _ = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=True)
     return self
示例#11
0
 def _check_X_y(X, y):
     y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
     return X, y, binarize_y
示例#12
0
def test_check_target_type_ova(target, output_target, is_ova):
    converted_target, binarize_target = check_target_type(
        target.astype(int), indicate_one_vs_all=True)
    assert_array_equal(converted_target, output_target.astype(int))
    assert binarize_target == is_ova
 def fit(self, X, y):
     y, _ = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=False)
     self.sampling_strategy_ = 'sampling_strategy_'
     return self
def test_check_target_warning():
    target = np.arange(4).reshape((2, 2))
    with pytest.warns(UserWarning, match='should be of types'):
        check_target_type(target)
示例#15
0
 def _check_X_y(X, y):
     y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'], dtype=None)
     return X, y, binarize_y
def test_check_target_warning():
    target = np.arange(4).reshape((2, 2))
    with pytest.warns(UserWarning, match='should be of types'):
        check_target_type(target)
示例#17
0
def test_check_target_type(target, output_target):
    converted_target = check_target_type(target.astype(int))
    assert_array_equal(converted_target, output_target.astype(int))
 def fit(self, X, y):
     y, _ = check_target_type(y, indicate_one_vs_all=True)
     X, y = check_X_y(X, y, accept_sparse=True)
     return self