Python ClusterCentroids.fit_resample示例，imblearn.under_sampling.ClusterCentroids.fit_resample Python示例

示例#1

0

显示文件

def test_cluster_centroids_n_jobs():
    # check that we deprecate the `n_jobs` parameter.
    cc = ClusterCentroids(n_jobs=1)
    with pytest.warns(FutureWarning) as record:
        cc.fit_resample(X, Y)
    assert len(record) == 1
    assert "'n_jobs' was deprecated" in record[0].message.args[0]

示例#2

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_fit_resample_check_voting():
    cc = ClusterCentroids(random_state=RND_SEED)
    cc.fit_resample(X, Y)
    assert cc.voting_ == 'soft'
    cc = ClusterCentroids(random_state=RND_SEED)
    cc.fit_resample(sparse.csr_matrix(X), Y)
    assert cc.voting_ == 'hard'

示例#3

0

显示文件

def test_fit_resample_check_voting():
    cc = ClusterCentroids(random_state=RND_SEED)
    cc.fit_resample(X, Y)
    assert cc.voting_ == 'soft'
    cc = ClusterCentroids(random_state=RND_SEED)
    cc.fit_resample(sparse.csr_matrix(X), Y)
    assert cc.voting_ == 'hard'

示例#4

0

显示文件

文件： Utils.py 项目： Lipairui/Deal_with_Imbalance

def under_sampling(X, y, method):
    if method == 'ClusterCentroids':
        model = ClusterCentroids()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'RandomUnderSampler':
        model = RandomUnderSampler()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'NearMiss':
        model = NearMiss()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'EditedNearestNeighbours':
        model = EditedNearestNeighbours()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'RepeatedEditedNearestNeighbours':
        model = RepeatedEditedNearestNeighbours()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'AllKNN':
        model = AllKNN()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'NeighbourhoodCleaningRule':
        model = NeighbourhoodCleaningRule()
        X_resampled, y_resampled = model.fit_resample(X, y)
    elif method == 'OneSidedSelection':
        model = OneSidedSelection()
        X_resampled, y_resampled = model.fit_resample(X, y)
    return X_resampled, y_resampled

示例#5

0

显示文件

def test_fit_resample_error():
    sampling_strategy = 'auto'
    cluster = 'rnd'
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED,
                          estimator=cluster)
    with raises(ValueError, match="has to be a KMeans clustering"):
        cc.fit_resample(X, Y)

    voting = 'unknown'
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          voting=voting,
                          random_state=RND_SEED)
    with raises(ValueError, match="needs to be one of"):
        cc.fit_resample(X, Y)

示例#6

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_resample_half():
    sampling_strategy = {0: 3, 1: 6}
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    assert X_resampled.shape == (9, 2)
    assert y_resampled.shape == (9, )

示例#7

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_resample_auto():
    sampling_strategy = "auto"
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    assert X_resampled.shape == (6, 2)
    assert y_resampled.shape == (6, )

示例#8

0

显示文件

文件： test_cluster_centroids.py 项目： scikit-learn-contrib/imbalanced-learn

def test_cluster_centroids_hard_target_class():
    # check that the samples selecting by the hard voting corresponds to the
    # targeted class
    # non-regression test for:
    # https://github.com/scikit-learn-contrib/imbalanced-learn/issues/738
    X, y = make_classification(
        n_samples=1000,
        n_features=2,
        n_informative=1,
        n_redundant=0,
        n_repeated=0,
        n_clusters_per_class=1,
        weights=[0.3, 0.7],
        class_sep=0.01,
        random_state=0,
    )

    cc = ClusterCentroids(voting="hard", random_state=0)
    X_res, y_res = cc.fit_resample(X, y)

    minority_class_indices = np.flatnonzero(y == 0)
    X_minority_class = X[minority_class_indices]

    resampled_majority_class_indices = np.flatnonzero(y_res == 1)
    X_res_majority = X_res[resampled_majority_class_indices]

    sample_from_minority_in_majority = [
        np.all(np.isclose(selected_sample, minority_sample))
        for selected_sample in X_res_majority
        for minority_sample in X_minority_class
    ]
    assert sum(sample_from_minority_in_majority) == 0

示例#9

0

显示文件

文件： model.py 项目： Quiescent/opensafely-sandpit

def fix_imbalance(X, y):
    """Fix imbalanced data in features X with labels Y.

This is an important step because an over representation of a label
means that it's easy to score high by guessing one label the whole
time."""
    cluster_centroids = ClusterCentroids()
    return cluster_centroids.fit_resample(X, y)

示例#10

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_fit_resample_error():
    sampling_strategy = 'auto'
    cluster = 'rnd'
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy,
        random_state=RND_SEED,
        estimator=cluster)
    with raises(ValueError, match="has to be a KMeans clustering"):
        cc.fit_resample(X, Y)

    voting = 'unknown'
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy,
        voting=voting,
        random_state=RND_SEED)
    with raises(ValueError, match="needs to be one of"):
        cc.fit_resample(X, Y)

示例#11

0

显示文件

文件： dataPreparation.py 项目： ceciliacal/MOBD_project

def Resampling(train_x, train_y, resampling_method):
    train_y.data = LabelEncoder().fit_transform(train_y.data)
    # summarize distribution

    # scommentare la riga di seguito se si vuole visualizzare il grafico a torta della distribuzione delle classi prima di resampling
    #plotGraphics.piePlot(train_y, "Before Resampling")

    # ---- UNDER-SAMPLING ------ #
    if resampling_method == "ClusterCentroids":
        resample = ClusterCentroids(voting='hard', random_state=42)

    if resampling_method == "CondensedNearestNeighbour":
        resample = CondensedNearestNeighbour(n_neighbors=7, random_state=42)

    if resampling_method == "EditedNearestNeighbours":
        resample = EditedNearestNeighbours(n_neighbors=7,
                                           kind_sel='mode',
                                           n_jobs=-1)

    if resampling_method == "RepeatedEditedNearestNeighbours":
        resample = RepeatedEditedNearestNeighbours(n_neighbors=7,
                                                   kind_sel='mode',
                                                   n_jobs=-1)

    if resampling_method == "AllKNN":
        resample = AllKNN(n_neighbors=7,
                          kind_sel='mode',
                          allow_minority=True,
                          n_jobs=-1)

    if resampling_method == "NearMiss":
        resample = NearMiss(n_neighbors=7, n_jobs=-1)

    if resampling_method == "NeighbourhoodCleaningRule":
        resample = NeighbourhoodCleaningRule(n_neighbors=7, kind_sel='all')

    if resampling_method == "RandomUnderSampler":
        resample = RandomUnderSampler(random_state=42)

    if resampling_method == "TomekLinks":
        resample = TomekLinks(n_jobs=-1)

    # ---- OVER-SAMPLING ------ #
    if resampling_method == "BorderlineSMOTE":
        resample = BorderlineSMOTE(random_state=42, n_jobs=-1)

    if resampling_method == "KMeansSMOTE":
        resample = KMeansSMOTE(random_state=42)

    if resampling_method == "RandomUnderSampler":
        resample = RandomOverSampler(random_state=42)

    if resampling_method == "SMOTE":
        resample = SMOTE(random_state=42, n_jobs=-1)

    # transform the dataset
    train_x.data, train_y.data = resample.fit_resample(train_x.data,
                                                       train_y.data)

示例#12

0

显示文件

文件： Class_Sampling.py 项目： abhiglobalistic/hams_ml

    def perform_Under_ClusterCentroids(self):

        print('Under sampling with ClusterCentroids, preserves imformation')

        cc = ClusterCentroids(random_state=0)

        X_resampled, y_resampled = cc.fit_resample(self.X, self.y)

        return X_resampled, y_resampled

示例#13

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_multiclass_fit_resample():
    y = Y.copy()
    y[5] = 2
    y[6] = 2
    cc = ClusterCentroids(random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, y)
    count_y_res = Counter(y_resampled)
    assert count_y_res[0] == 2
    assert count_y_res[1] == 2
    assert count_y_res[2] == 2

示例#14

0

显示文件

def undersample(X, y):
    cc = ClusterCentroids(random_state=12)
    rX, rY = cc.fit_resample(X, y)
    if isinstance(X, pd.DataFrame):
        rX = pd.DataFrame(data=rX, columns=X.columns)
    elif isinstance(X, pd.Series):
        rX = pd.Series(data=rX)
    if isinstance(y, pd.Series):
        rY = pd.Series(data=rY)
    return rX, rY

示例#15

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_multiclass_fit_resample():
    y = Y.copy()
    y[5] = 2
    y[6] = 2
    cc = ClusterCentroids(random_state=RND_SEED)
    _, y_resampled = cc.fit_resample(X, y)
    count_y_res = Counter(y_resampled)
    assert count_y_res[0] == 2
    assert count_y_res[1] == 2
    assert count_y_res[2] == 2

示例#16

0

显示文件

文件： under_sampling.py 项目： yemode2k/studio

def _under_sampling(table, label_col, sampling_strategy='not majority', seed=None, estimator='KMeans',
                    n_clusters=8, voting='auto', n_jobs=1):

    # Separate features and label
    features = table.drop([label_col], axis=1)
    y = table[label_col]

    if(sklearn_utils.multiclass.type_of_target(y) == 'continuous'):
        raise_error('0718', 'label_col')
    
    # Initialization label encoder
    lab_encoder = preprocessing.LabelEncoder()

    # Filter out categorical columns in features
    categorical_cols = [col for col in features.columns if features[col].dtypes == 'object']

    # Transform categorical columns and add to the original features
    for cate_col in categorical_cols:
        features_encoder = lab_encoder.fit_transform(features[cate_col])
        features[cate_col] = features_encoder
    
    # Transform label column with object type
    if (y.dtypes == 'object'):
        y_encoder = lab_encoder.fit_transform(y)
    else:
        y_encoder = y

    if (estimator == 'Kmeans'):
        estimator_model = KMeans(n_clusters=n_clusters)
    else:
        estimator_model = None
    
    # Process under sampling
    sm = ClusterCentroids(sampling_strategy=sampling_strategy, random_state=seed, 
                    estimator=estimator_model, voting=voting, n_jobs=n_jobs)
    
    X_res, y_res = sm.fit_resample(features, y_encoder)

    # Invert to original data
    if (y.dtypes == 'object'):
        y_decoder = lab_encoder.inverse_transform(y_res)
    else:    
        y_decoder = y_res

    df = pd.DataFrame(data=X_res, columns=features.columns)

    for cate_col in categorical_cols:
        df[cate_col] = lab_encoder.inverse_transform(df[cate_col].astype('int32'))

    df1 = pd.DataFrame(data=y_decoder, columns=[label_col])

    # Output result
    out_table = df.join(df1)

    return {'out_table' : out_table}

示例#17

0

显示文件

def test_fit_resample_auto():
    sampling_strategy = 'auto'
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.06738818, -0.529627],
                     [0.17901516, 0.69860992], [0.094035, -2.55298982]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

示例#18

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_fit_resample_auto():
    sampling_strategy = 'auto'
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy, random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.06738818, -0.529627],
                     [0.17901516, 0.69860992], [0.094035, -2.55298982]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

示例#19

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_resample_object():
    sampling_strategy = "auto"
    cluster = KMeans(random_state=RND_SEED)
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy,
        random_state=RND_SEED,
        estimator=cluster,
    )

    X_resampled, y_resampled = cc.fit_resample(X, Y)
    assert X_resampled.shape == (6, 2)
    assert y_resampled.shape == (6, )

示例#20

0

显示文件

def cluster_centroids(X,
                      y,
                      visualize=False,
                      pca2d=True,
                      pca3d=True,
                      tsne=True,
                      pie_evr=True):
    cc = ClusterCentroids(random_state=42)
    X_res, y_res = cc.fit_resample(X, y)
    if visualize == True:
        hist_over_and_undersampling(y_res)
        pca_general(X_res, y_res, d2=pca2d, d3=pca3d, pie_evr=pie_evr)
    return X_res, y_res

示例#21

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_fit_resample_half():
    sampling_strategy = {0: 3, 1: 6}
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy, random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.13347175, 0.12167502], [
        0.47104475, 0.44386323
    ], [0.09125309, -0.85409574], [0.19220316, 0.32337101],
                     [0.094035, -2.55298982], [0.20792588, 1.49407907],
                     [0.04352327, -0.20515826], [0.12372842, 0.6536186]])
    y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
    print(X_resampled)
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

示例#22

0

显示文件

def test_fit_resample_half():
    sampling_strategy = {0: 3, 1: 6}
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.13347175, 0.12167502],
                     [0.47104475, 0.44386323], [0.09125309, -0.85409574],
                     [0.19220316, 0.32337101], [0.094035, -2.55298982],
                     [0.20792588, 1.49407907], [0.04352327, -0.20515826],
                     [0.12372842, 0.6536186]])
    y_gt = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1])
    print(X_resampled)
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)

示例#23

0

显示文件

def clusterCentroidsUnderSample(x, y, label='class'):
    print('Balancing with  clusterCentroids')
    print('Current x state: ', x.shape)

    x_columns = x.columns.values
    sampler = ClusterCentroids(random_state=0)

    x, y = sampler.fit_resample(x, y)
    print('Resampled dataset shape %s' % Counter(y))

    x_bal = pd.DataFrame(x, columns=x_columns)
    y_bal = pd.DataFrame(y, columns=[label])

    return x_bal, y_bal

示例#24

0

显示文件

文件： preprocessor.py 项目： reach1dev/api-ml-web-tool

def resample(X, Y, resampling):
  X_resampled, y_resampled = X, Y
  if resampling == 'oversampling':
    from imblearn.over_sampling import RandomOverSampler
    ros = RandomOverSampler(random_state=0)
    X_resampled, y_resampled = ros.fit_resample(X, Y)
  if resampling == 'undersampling':
    from imblearn.under_sampling import ClusterCentroids
    cc = ClusterCentroids(random_state=0)
    X_resampled, y_resampled = cc.fit_resample(X, Y)
  if resampling == 'smote':
    from imblearn.over_sampling import BorderlineSMOTE
    # from imblearn.over_sampling import SMOTE
    X_resampled, y_resampled = BorderlineSMOTE().fit_resample(X, Y)
  return X_resampled.fillna(0), y_resampled.fillna(0)

示例#25

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_hard_voting():
    sampling_strategy = "auto"
    voting = "hard"
    cluster = KMeans(random_state=RND_SEED)
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy,
        random_state=RND_SEED,
        estimator=cluster,
        voting=voting,
    )

    X_resampled, y_resampled = cc.fit_resample(X, Y)
    assert X_resampled.shape == (6, 2)
    assert y_resampled.shape == (6, )
    for x in X_resampled:
        assert np.any(np.all(x == X, axis=1))

示例#26

0

显示文件

文件： preprocessing_classification.py 项目： gregoireandre/MLP_4MOST_TargetSelection

def CENTROID_us(X_train, Y_train, seed, sampling_strategy):
    if not isinstance(sampling_strategy, str):
        sampling_strategy = compute_sampling_strategy(sampling_strategy,
                                                      Y_train, 'undersampling')
    cc = ClusterCentroids(random_state=seed,
                          n_jobs=-1,
                          sampling_strategy=sampling_strategy)
    print('Before Cluster Centroid undersampling : ',
          sorted(Counter(Y_train).items()))
    X_train_resampled, Y_train_resampled = cc.fit_resample(X_train, Y_train)
    print('After Cluster Centroid undersampling : ',
          sorted(Counter(Y_train_resampled).items()))

    X_train_resampled, Y_train_resampled = shuffle_dataset(
        X_train_resampled, Y_train_resampled, seed)

    return X_train_resampled, Y_train_resampled

示例#27

0

显示文件

文件： main.py 项目： wararaki718/scrapbox2

def main():
    X, y = make_classification(n_samples=5000,
                               n_features=2,
                               n_informative=2,
                               n_redundant=0,
                               n_repeated=0,
                               n_classes=3,
                               n_clusters_per_class=1,
                               weights=[0.01, 0.04, 0.95],
                               class_sep=0.8,
                               random_state=42)
    print(sorted(Counter(y).items()))

    cc = ClusterCentroids(random_state=42)
    X_resampled, y_resampled = cc.fit_resample(X, y)
    print(sorted(Counter(y_resampled).items()))
    print('DONE')

示例#28

0

显示文件

def test_fit_hard_voting():
    sampling_strategy = 'auto'
    voting = 'hard'
    cluster = KMeans(random_state=RND_SEED)
    cc = ClusterCentroids(sampling_strategy=sampling_strategy,
                          random_state=RND_SEED,
                          estimator=cluster,
                          voting=voting)

    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.09125309, -0.85409574],
                     [0.12372842, 0.6536186], [0.094035, -2.55298982]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)
    for x in X_resampled:
        assert np.any(np.all(x == X, axis=1))

示例#29

0

显示文件

文件： test_cluster_centroids.py 项目： bodycat/imbalanced-learn

def test_fit_hard_voting():
    sampling_strategy = 'auto'
    voting = 'hard'
    cluster = KMeans(random_state=RND_SEED)
    cc = ClusterCentroids(
        sampling_strategy=sampling_strategy,
        random_state=RND_SEED,
        estimator=cluster,
        voting=voting)

    X_resampled, y_resampled = cc.fit_resample(X, Y)
    X_gt = np.array([[0.92923648, 0.76103773], [0.47104475, 0.44386323],
                     [0.13347175, 0.12167502], [0.09125309, -0.85409574],
                     [0.12372842, 0.6536186], [0.094035, -2.55298982]])
    y_gt = np.array([0, 0, 0, 1, 1, 1])
    assert_allclose(X_resampled, X_gt, rtol=R_TOL)
    assert_array_equal(y_resampled, y_gt)
    for x in X_resampled:
        assert np.any(np.all(x == X, axis=1))

示例#30

0

显示文件

def sampling(X_train, y_train, smpl):

    if smpl == 'ROS':
        ros = RandomOverSampler(random_state=0)
        X_train, y_train = ros.fit_resample(X_train, y_train)

    elif smpl == 'SMOTE':
        X_train, y_train = SMOTE().fit_resample(X_train, y_train)

    elif smpl == 'ADASYN':
        X_train, y_train = ADASYN().fit_resample(X_train, y_train)

    elif smpl == 'CC':
        cc = ClusterCentroids(random_state=0)
        X_train, y_train = cc.fit_resample(X_train, y_train)

    elif smpl == 'RUS':
        rus = RandomUnderSampler(random_state=0)
        X_train, y_train = rus.fit_resample(X_train, y_train)

    return X_train, y_train

示例#31

0

显示文件

文件： BurdenDerek.py 项目： BurdenDerek/Academic_Success_Predictions

    def cluster(df, drop, target):

        # split the table into features and outcomes
        x_cols = [i for i in df.columns if i not in drop]
        X = df[x_cols]
        y = df[target]

        # split features and outcomes into train and test data
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            random_state=1)
        cc = ClusterCentroids(random_state=1)
        X_resampled, y_resampled = cc.fit_resample(X_train, y_train)

        model = LogisticRegression(solver='lbfgs', random_state=1)
        model.fit(X_resampled, y_resampled)

        y_predictions = model.predict(X_test)

        # Calculating the accuracy score.
        acc_score = balanced_accuracy_score(y_test, y_predictions)

        return acc_score * 100

示例#32

0

显示文件

for i, v in enumerate(importance):
    print('Feature: %0d, Score: %.5f' % (i, v))
# plot feature importance
plt.bar([x for x in range(len(importance))], importance)
plt.show()

# In[280]:

# LOGISTIC REGRESSION WITH ClusterCentroids

# In[281]:

from imblearn.under_sampling import ClusterCentroids

cc = ClusterCentroids(random_state=1)
X_train_cc, Y_train_cc = cc.fit_resample(X_train, Y_train)
Counter(Y_train_cc)

# In[282]:

# Train the Logistic Regression model using the resampled data
cluster_model = LogisticRegression(solver='saga',
                                   random_state=1,
                                   max_iter=1000)
cluster_model.fit(X_train_cc, Y_train_cc)

# In[283]:

Y_pred_LR_cc = cluster_model.predict(X_test)

# In[284]:

示例#33

0

显示文件

文件： plot_cluster_centroids.py 项目： bodycat/imbalanced-learn

print(__doc__)

# Generate the dataset
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9],
                           n_informative=3, n_redundant=1, flip_y=0,
                           n_features=20, n_clusters_per_class=1,
                           n_samples=50, random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply Cluster Centroids
cc = ClusterCentroids()
X_resampled, y_resampled = cc.fit_resample(X, y)
X_res_vis_soft = pca.transform(X_resampled)

# Use hard voting instead of soft voting
cc = ClusterCentroids(voting='hard')
X_resampled, y_resampled = cc.fit_resample(X, y)
X_res_vis_hard = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))

c0 = ax1.scatter(X_vis[y == 0, 0], X_vis[y == 0, 1], label="Class #0",
                 alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0], X_vis[y == 1, 1], label="Class #1",
                 alpha=0.5)
ax1.set_title('Original set')

示例#34

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_resample_check_voting(X, expected_voting):
    cc = ClusterCentroids(random_state=RND_SEED)
    cc.fit_resample(X, Y)
    assert cc.voting_ == expected_voting

示例#35

0

显示文件

文件： test_cluster_centroids.py 项目： tasrif60/Software_inventory

def test_fit_resample_error(cluster_centroids_params, err_msg):
    cc = ClusterCentroids(**cluster_centroids_params)
    with pytest.raises(ValueError, match=err_msg):
        cc.fit_resample(X, Y)

示例#36

0

显示文件

文件： predictor.py 项目： RedLicorice/crypto-forecast

	def undersample(self, X, y):
		cc = ClusterCentroids(random_state=12)
		return cc.fit_resample(X, y)

示例#37

0

显示文件

文件： plot_cluster_centroids.py 项目： xvwei1989/imbalanced-learn

                           n_informative=3,
                           n_redundant=1,
                           flip_y=0,
                           n_features=20,
                           n_clusters_per_class=1,
                           n_samples=50,
                           random_state=10)

# Instanciate a PCA object for the sake of easy visualisation
pca = PCA(n_components=2)
# Fit and transform x to visualise inside a 2D feature space
X_vis = pca.fit_transform(X)

# Apply Cluster Centroids
cc = ClusterCentroids()
X_resampled, y_resampled = cc.fit_resample(X, y)
X_res_vis_soft = pca.transform(X_resampled)

# Use hard voting instead of soft voting
cc = ClusterCentroids(voting='hard')
X_resampled, y_resampled = cc.fit_resample(X, y)
X_res_vis_hard = pca.transform(X_resampled)

# Two subplots, unpack the axes array immediately
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))

c0 = ax1.scatter(X_vis[y == 0, 0],
                 X_vis[y == 0, 1],
                 label="Class #0",
                 alpha=0.5)
c1 = ax1.scatter(X_vis[y == 1, 0],