def test_pipeline_sample(): # Test whether pipeline works with a sampler at the end. # Also test pipeline.sampler X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) rus = RandomUnderSampler(random_state=0) pipeline = Pipeline([('rus', rus)]) # test transform and fit_transform: X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_trans2, y_trans2 = pipeline.fit_sample(X, y) X_trans3, y_trans3 = rus.fit_sample(X, y) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(X_trans, X_trans3) assert_array_almost_equal(y_trans, y_trans2) assert_array_almost_equal(y_trans, y_trans3) pca = PCA() pipeline = Pipeline([('pca', pca), ('rus', rus)]) X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_pca = pca.fit_transform(X) X_trans2, y_trans2 = rus.fit_sample(X_pca, y) assert_array_almost_equal(X_trans, X_trans2) assert_array_almost_equal(y_trans, y_trans2)
def test_pipeline_sample(): # Test whether pipeline works with a sampler at the end. # Also test pipeline.sampler X, y = make_classification( n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) rus = RandomUnderSampler(random_state=0) pipeline = Pipeline([('rus', rus)]) # test transform and fit_transform: X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_trans2, y_trans2 = pipeline.fit_sample(X, y) X_trans3, y_trans3 = rus.fit_sample(X, y) assert_allclose(X_trans, X_trans2, rtol=R_TOL) assert_allclose(X_trans, X_trans3, rtol=R_TOL) assert_allclose(y_trans, y_trans2, rtol=R_TOL) assert_allclose(y_trans, y_trans3, rtol=R_TOL) pca = PCA() pipeline = Pipeline([('pca', PCA()), ('rus', rus)]) X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_pca = pca.fit_transform(X) X_trans2, y_trans2 = rus.fit_sample(X_pca, y) # We round the value near to zero. It seems that PCA has some issue # with that X_trans[np.bitwise_and(X_trans < R_TOL, X_trans > -R_TOL)] = 0 X_trans2[np.bitwise_and(X_trans2 < R_TOL, X_trans2 > -R_TOL)] = 0 assert_allclose(X_trans, X_trans2, rtol=R_TOL) assert_allclose(y_trans, y_trans2, rtol=R_TOL)
def test_pipeline_sample(): # Test whether pipeline works with a sampler at the end. # Also test pipeline.sampler X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=5000, random_state=0) rus = RandomUnderSampler(random_state=0) pipeline = Pipeline([('rus', rus)]) # test transform and fit_transform: X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_trans2, y_trans2 = pipeline.fit_sample(X, y) X_trans3, y_trans3 = rus.fit_sample(X, y) assert_allclose(X_trans, X_trans2, rtol=R_TOL) assert_allclose(X_trans, X_trans3, rtol=R_TOL) assert_allclose(y_trans, y_trans2, rtol=R_TOL) assert_allclose(y_trans, y_trans3, rtol=R_TOL) pca = PCA() pipeline = Pipeline([('pca', PCA()), ('rus', rus)]) X_trans, y_trans = pipeline.fit(X, y).sample(X, y) X_pca = pca.fit_transform(X) X_trans2, y_trans2 = rus.fit_sample(X_pca, y) # We round the value near to zero. It seems that PCA has some issue # with that X_trans[np.bitwise_and(X_trans < R_TOL, X_trans > -R_TOL)] = 0 X_trans2[np.bitwise_and(X_trans2 < R_TOL, X_trans2 > -R_TOL)] = 0 assert_allclose(X_trans, X_trans2, rtol=R_TOL) assert_allclose(y_trans, y_trans2, rtol=R_TOL)