def test_pipeline_new_with_params(): p = Pipeline([ steps.SelectCanvas('band_1'), steps.Flatten(), ('pca', steps.Transform(IncrementalPCA(n_components=3))), ('kmeans', KMeans(n_clusters=4)) ]) p.fit(random_elm_store()) p.predict(random_elm_store()) assert p.steps[-1][-1].cluster_centers_.shape[0] == 4 p2 = p.new_with_params(kmeans__n_clusters=7, pca__n_components=2) with pytest.raises(NotFittedError): p2.predict(random_elm_store()) p2.fit(random_elm_store()) assert p2.steps[-1][-1].cluster_centers_.shape[0] == 7
def test_poly(): s = flat_poly_var_kmeans p = Pipeline(s[:1]) flat, y, sample_weight = p.fit_transform(**data_source) assert hasattr(flat, 'flat') p = Pipeline(s[:2]) more_cols, _, _ = p.fit_transform(**data_source) assert more_cols.flat.shape[1] > flat.flat.shape[1] p = Pipeline(s[:3]) feat_sel = p.fit_transform(**data_source) assert isinstance(feat_sel, tuple) p = Pipeline(s) # thru KMeans # fit should always return a Pipeline instance (self after fitting) fitted = p.fit(**data_source) assert isinstance(fitted, Pipeline) assert isinstance(fitted.steps[-1][-1], KMeans) assert fitted._estimator.cluster_centers_.shape[0] == fitted.get_params( )['kmeans__n_clusters'] # predict should return KMeans's predict output pred = p.predict(**data_source) # fit_transform here should return the transform of the KMeans, # the distances in each dimension to the cluster centers. out = p.fit_transform(**data_source) assert isinstance(out, tuple) and len(out) == 3 X, _, _ = out assert X.shape[0] == pred.size
def test_predict(): p = Pipeline(flat_poly_var_kmeans) # sample below is X, y, sample_weight sample = p.create_sample(**data_source) # fitted is a Pipeline instance (it returns self after fitting) fitted = p.fit(*sample) # this should be a numpy array pred = fitted.predict(*sample) assert isinstance(pred, np.ndarray)