def test_kneighbors_regressor(): # Test chaining KNeighborsTransformer and classifiers/regressors rng = np.random.RandomState(0) X = 2 * rng.rand(40, 5) - 1 X2 = 2 * rng.rand(40, 5) - 1 y = rng.rand(40, 1) n_neighbors = 12 radius = 1.5 # We precompute more neighbors than necessary, to have equivalence between # k-neighbors estimator after radius-neighbors transformer, and vice-versa. factor = 2 k_trans = KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance') k_trans_factor = KNeighborsTransformer(n_neighbors=int(n_neighbors * factor), mode='distance') r_trans = RadiusNeighborsTransformer(radius=radius, mode='distance') r_trans_factor = RadiusNeighborsTransformer(radius=int(radius * factor), mode='distance') k_reg = KNeighborsRegressor(n_neighbors=n_neighbors) r_reg = RadiusNeighborsRegressor(radius=radius) test_list = [ (k_trans, k_reg), (k_trans_factor, r_reg), (r_trans, r_reg), (r_trans_factor, k_reg), ] for trans, reg in test_list: # compare the chained version and the compact version reg_compact = clone(reg) reg_precomp = clone(reg) reg_precomp.set_params(metric='precomputed') reg_chain = make_pipeline(clone(trans), reg_precomp) y_pred_chain = reg_chain.fit(X, y).predict(X2) y_pred_compact = reg_compact.fit(X, y).predict(X2) assert_array_almost_equal(y_pred_chain, y_pred_compact)
def test_dbscan(): # Test chaining RadiusNeighborsTransformer and DBSCAN radius = 0.3 n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) # compare the chained version and the compact version est_chain = make_pipeline( RadiusNeighborsTransformer(radius=radius, mode='distance'), DBSCAN(metric='precomputed', eps=radius)) est_compact = DBSCAN(eps=radius) labels_chain = est_chain.fit_predict(X) labels_compact = est_compact.fit_predict(X) assert_array_almost_equal(labels_chain, labels_compact)
def test_transformer_result(): # Test the number of neighbors returned n_neighbors = 5 n_samples_fit = 20 n_queries = 18 n_features = 10 rng = np.random.RandomState(42) X = rng.randn(n_samples_fit, n_features) X2 = rng.randn(n_queries, n_features) radius = np.percentile(euclidean_distances(X), 10) # with n_neighbors for mode in ["distance", "connectivity"]: add_one = mode == "distance" nnt = KNeighborsTransformer(n_neighbors=n_neighbors, mode=mode) Xt = nnt.fit_transform(X) assert Xt.shape == (n_samples_fit, n_samples_fit) assert Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), ) assert Xt.format == "csr" assert _is_sorted_by_data(Xt) X2t = nnt.transform(X2) assert X2t.shape == (n_queries, n_samples_fit) assert X2t.data.shape == (n_queries * (n_neighbors + add_one), ) assert X2t.format == "csr" assert _is_sorted_by_data(X2t) # with radius for mode in ["distance", "connectivity"]: add_one = mode == "distance" nnt = RadiusNeighborsTransformer(radius=radius, mode=mode) Xt = nnt.fit_transform(X) assert Xt.shape == (n_samples_fit, n_samples_fit) assert not Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), ) assert Xt.format == "csr" assert _is_sorted_by_data(Xt) X2t = nnt.transform(X2) assert X2t.shape == (n_queries, n_samples_fit) assert not X2t.data.shape == (n_queries * (n_neighbors + add_one), ) assert X2t.format == "csr" assert _is_sorted_by_data(X2t)