def test_pipeline_ducktyping(): pipeline = make_pipeline(Mult(5)) pipeline.predict pipeline.transform pipeline.inverse_transform pipeline = make_pipeline(Transf()) assert not hasattr(pipeline, 'predict') pipeline.transform pipeline.inverse_transform pipeline = make_pipeline('passthrough') assert pipeline.steps[0] == ('passthrough', 'passthrough') assert not hasattr(pipeline, 'predict') pipeline.transform pipeline.inverse_transform pipeline = make_pipeline(Transf(), NoInvTransf()) assert not hasattr(pipeline, 'predict') pipeline.transform assert not hasattr(pipeline, 'inverse_transform') pipeline = make_pipeline(NoInvTransf(), Transf()) assert not hasattr(pipeline, 'predict') pipeline.transform assert not hasattr(pipeline, 'inverse_transform')
def test_classes_property(): X = iris.data y = iris.target reg = make_pipeline(SelectKBest(k=1), LinearRegression()) reg.fit(X, y) assert_raises(AttributeError, getattr, reg, "classes_") clf = make_pipeline(SelectKBest(k=1), LogisticRegression(random_state=0)) assert_raises(AttributeError, getattr, clf, "classes_") clf.fit(X, y) assert_array_equal(clf.classes_, np.unique(y))
def test_make_pipeline_memory(): cachedir = mkdtemp() if LooseVersion(joblib.__version__) < LooseVersion('0.12'): # Deal with change of API in joblib memory = joblib.Memory(cachedir=cachedir, verbose=10) else: memory = joblib.Memory(location=cachedir, verbose=10) pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory) assert pipeline.memory is memory pipeline = make_pipeline(DummyTransf(), SVC()) assert pipeline.memory is None assert len(pipeline) == 2 shutil.rmtree(cachedir)
def test_noise_sim(sim): p = Pipeline([('s', sim), ('c', DecisionTreeClassifier())]) p.fit(X, y) p.predict(X) p = make_pipeline(sim, DecisionTreeClassifier()) p.fit(X, y) p.predict(X)
def test_make_pipeline(): t1 = Transf() t2 = Transf() pipe = make_pipeline(t1, t2) assert isinstance(pipe, Pipeline) assert pipe.steps[0][0] == "transf-1" assert pipe.steps[1][0] == "transf-2" pipe = make_pipeline(t1, t2, FitParamT()) assert isinstance(pipe, Pipeline) assert pipe.steps[0][0] == "transf-1" assert pipe.steps[1][0] == "transf-2" assert pipe.steps[2][0] == "fitparamt" assert_raise_message(TypeError, 'Unknown keyword arguments: "random_parameter"', make_pipeline, t1, t2, random_parameter='rnd')
def test_score_samples_on_pipeline_without_score_samples(): X = np.array([[1], [2]]) y = np.array([1, 2]) # Test that a pipeline does not have score_samples method when the final # step of the pipeline does not have score_samples defined. pipe = make_pipeline(LogisticRegression()) pipe.fit(X, y) with pytest.raises(AttributeError, match="'LogisticRegression' object has no attribute " "'score_samples'"): pipe.score_samples(X)
def test_n_features_in_pipeline(): # make sure pipelines delegate n_features_in to the first step X = [[1, 2], [3, 4], [5, 6]] y = [0, 1, 2] ss = StandardScaler() gbdt = HistGradientBoostingClassifier() pipe = make_pipeline(ss, gbdt) assert not hasattr(pipe, 'n_features_in_') pipe.fit(X, y) assert pipe.n_features_in_ == ss.n_features_in_ == 2 # if the first step has the n_features_in attribute then the pipeline also # has it, even though it isn't fitted. ss = StandardScaler() gbdt = HistGradientBoostingClassifier() pipe = make_pipeline(ss, gbdt) ss.fit(X, y) assert pipe.n_features_in_ == ss.n_features_in_ == 2 assert not hasattr(gbdt, 'n_features_in_')
def test_example_two(): from skclean.simulate_noise import UniformNoise from skclean.detectors import KDN from skclean.handlers import Filter from skclean.pipeline import Pipeline, make_pipeline # Importing from skclean, not sklearn from skclean.utils import load_data X, y = load_data('breast_cancer') clf = Pipeline([ ('scale', StandardScaler()), # Scale features ('feat_sel', VarianceThreshold(.2)), # Feature selection ('detector', KDN()), # Detect mislabeled samples ('handler', Filter(SVC()) ), # Filter out likely mislabeled samples and then train a SVM ]) clf_g = GridSearchCV(clf, {'detector__n_neighbors': [2, 5, 10]}) n_clf_g = make_pipeline(UniformNoise(.3), clf_g) # Create label noise at the very first step print(cross_val_score(n_clf_g, X, y, cv=5).mean()) # 5-fold cross validation
def test_pipeline_param_error(): clf = make_pipeline(LogisticRegression()) with pytest.raises(ValueError, match="Pipeline.fit does not accept " "the sample_weight parameter"): clf.fit([[0], [0]], [0, 1], sample_weight=[1, 1])
('d', KDN()), ('e', skclean.handlers.SampleWeight(dummy)) ]) # Inside Pipeline tmp_Handlers = [] for h in NOISE_HANDLERS: if h.iterative: # Exclude iterative handlers continue ch = clone(h) ch.detector = None tmp_Handlers.append(ch) preli_steps = [UniformNoise(.2, random_state=SEED), StandardScaler()] all_comb = product(NOISE_DETECTORS, tmp_Handlers) INSIDE_PIPE = [make_pipeline(*preli_steps + list(comb)) for comb in all_comb] # Outside Pipe OUTSIDE_PIPE = [] for h in NOISE_HANDLERS: for d in NOISE_DETECTORS: ch, d = clone(h), clone(d) ch.detector = d if 'random_state' in ch.get_params(): # trying to avoid flaky tests ch.set_params(random_state=42) OUTSIDE_PIPE.append(ch) ALL_COMBS = INSIDE_PIPE + OUTSIDE_PIPE ALL_ESTIMATORS = NOISE_SIMULATORS + NOISE_DETECTORS + ALL_COMBS + [PIPELINE]