def test_forcehandleidentity_does_not_crash(tmpdir): p = Pipeline([ ForceHandleIdentity() ]) data_inputs = np.array([0, 1, 2, 3]) expected_outputs = data_inputs * 2 p.fit(data_inputs, expected_outputs) p.fit_transform(data_inputs, expected_outputs) p.transform(data_inputs=data_inputs)
def main(): p = Pipeline([ ForceAlwaysAlwaysHandleMixinStep(), ]) p = p.fit(np.array([0, 1]), np.array([0, 1])) p = p.transform(np.array([0, 1]))
def test_model_stacking_transform(): model_stacking = Pipeline([ ModelStacking( [ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) })), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)})), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({ "alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean() })), ) ]) expected_outputs_shape = (379, 1) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape) model_stacking = model_stacking.fit(data_inputs, expected_outputs) outputs = model_stacking.transform(data_inputs) assert outputs.shape == expected_outputs_shape
def test_pipeline_fit_then_transform(steps_list, pipeline_runner): data_input_ = [AN_INPUT] expected_output_ = [AN_EXPECTED_OUTPUT] p = Pipeline(steps_list, pipeline_runner=pipeline_runner()) p = p.fit(data_input_, expected_output_) result = p.transform(data_input_) assert tuple(result) == tuple(expected_output_)
def main(): p = Pipeline([ NonFittableStep(), NonTransformableStep(), Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin. ]) p = p.fit(np.array([0, 1]), np.array([0, 1])) out = p.transform(np.array([0, 1]))
def test_should_fit_each_steps(steps: List[BaseStep], expected_tape: List[str]): tape.data = [] tape.name_tape = [] pipeline = Pipeline(steps=steps) actual_pipeline = pipeline.fit(data_inputs, expected_outputs) actual_tape = tape.get_name_tape() assert isinstance(actual_pipeline, Pipeline) assert actual_tape == expected_tape
def test_should_transform_each_steps(steps: List[BaseStep], expected_tape: List[str]): pipeline = Pipeline(steps=steps) pipeline = pipeline.fit(data_inputs) tape.data = [] tape.name_tape = [] actual_data_inputs = pipeline.transform(data_inputs) actual_tape = tape.get_name_tape() assert actual_tape == expected_tape assert np.array_equal(actual_data_inputs, data_inputs)
def test_sklearn_wrapper_transform_partial_fit_with_predict(): model = SKLearnWrapper(SGDRegressor(), use_partial_fit=True) p = Pipeline([DataShuffler(), model]) data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1) expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1) for _ in range(2000): p = p.fit(data_inputs, expected_outputs) outputs = model.transform(data_inputs) assert all([ np.isclose(a, b, atol=0.1) for a, b in zip(expected_outputs, outputs) ])
def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs(): tape = TapeCallbackFunction() p = Pipeline([ ForEachDataInput(Pipeline([ FitCallbackStep(tape.callback, ["1"]), FitCallbackStep(tape.callback, ["2"]), ])) ]) data_inputs = [[0, 1], [1, 2]] expected_outputs = [[2, 3], [4, 5]] p = p.fit(data_inputs, expected_outputs) assert isinstance(p, Pipeline) assert tape.get_name_tape() == ["1", "2", "1", "2"] assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) p = Pipeline([ NumpyShapePrinter(), AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), NumpyShapePrinter(), RidgeModelStacking([ GradientBoostingRegressor(), GradientBoostingRegressor(n_estimators=500), GradientBoostingRegressor(max_depth=5), KMeans(), ]), NumpyShapePrinter(), ]) print("Fitting on train:") p = p.fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("Evaluating transformed train:") score_train = r2_score(y_train_predicted, y_train) print('R2 regression score:', score_train) print("") print("Evaluating transformed test:") score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test) assert y_train_predicted.shape == (379, ) assert y_test_predicted.shape == (127, ) assert isinstance(score_train, float) assert isinstance(score_test, float) return y_train_predicted, y_test_predicted, score_train, score_test
def test_sklearn_wrapper_transform_partial_fit_classifier(): data_inputs = np.array([[0, 1], [0, 0], [3, -2], [-1, 1], [-2, 1], [2, 0], [2, -1], [4, -2], [-3, 1], [-1, 0]]) expected_outputs = np.ravel( np.expand_dims(data_inputs[:, 0] + 2 * data_inputs[:, 1] + 1, axis=-1)) classes = np.array([0, 1, 2, 3]) model = SKLearnWrapper(SGDClassifier(), use_partial_fit=True, partial_fit_kwargs={'classes': classes}) p = Pipeline([DataShuffler(), model]) for _ in range(2000): p = p.fit(data_inputs, expected_outputs) outputs = model.transform(data_inputs) assert outputs.shape == (10, ) assert len(set(outputs) - set(classes)) == 0
def train_neuraxle(X_train, X_test, y_train, y_test, mtype, common_name_model, problemtype, classes, default_featurenames, transform_model, settings, model_session): # get train and test data model_name = common_name_model + '.pickle' files = list() if mtype in ['classification', 'c']: print('neuraxle currently does not support classsification...') elif mtype in ['regression', 'r']: p = Pipeline([ NumpyShapePrinter(), AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), NumpyShapePrinter(), RidgeModelStacking([ GradientBoostingRegressor(), GradientBoostingRegressor(n_estimators=500), GradientBoostingRegressor(max_depth=5), KMeans(), ]), NumpyShapePrinter(), ]) # Fitting and evaluating the pipeline. # X_train data shape: (batch, different_lengths, n_feature_columns) # y_train data shape: (batch, different_lengths) pipeline = p.fit(X_train, y_train) # export pickle file print('saving model - %s' % (model_name)) f = open(model_name, 'wb') pickle.dump(pipeline, f) f.close() files.append(model_name) model_dir = os.getcwd() return model_name, model_dir, files
def main(): p = Pipeline([ NonFittableStep(), NonTransformableStep(), Identity() # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin. ]) some_data = np.array([0, 1]) p = p.fit(some_data) # Out: # NonFittableStep: I transformed. # NonTransformableStep: I fitted. out = p.transform(some_data) # Out: # NonFittableStep: I transformed. assert np.array_equal(out, some_data)
def test_expand_dim_fit(): handle_fit_callback = TapeCallbackFunction() handle_transform_callback = TapeCallbackFunction() handle_fit_transform_callback = TapeCallbackFunction() p = Pipeline([ ExpandDim( HandleCallbackStep(handle_fit_callback, handle_transform_callback, handle_fit_transform_callback)) ]) p['ExpandDim'].hashers = [SomeSummaryHasher(fake_summary_id=SUMMARY_ID)] p = p.fit(np.array(range(10)), np.array(range(10))) assert handle_transform_callback.data == [] assert handle_fit_transform_callback.data == [] assert handle_fit_callback.data[0][0].current_ids == [SUMMARY_ID] assert handle_fit_callback.data[0][0].summary_id == SUMMARY_ID assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs), np.array([np.array(range(10))])) assert np.array_equal( np.array(handle_fit_callback.data[0][0].expected_outputs), np.array([np.array(range(10))]))
def test_expand_dim_fit(): handle_fit_callback = TapeCallbackFunction() handle_transform_callback = TapeCallbackFunction() handle_fit_transform_callback = TapeCallbackFunction() p = Pipeline([ ExpandDim( HandleCallbackStep(handle_fit_callback, handle_transform_callback, handle_fit_transform_callback)) ]) p = p.fit(np.array(range(10)), np.array(range(10))) assert handle_transform_callback.data == [] assert handle_fit_transform_callback.data == [] assert handle_fit_callback.data[0][0].current_ids == [ '781e5e245d69b566979b86e28d23f2c7' ] assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs), np.array([np.array(range(10))])) assert np.array_equal( np.array(handle_fit_callback.data[0][0].expected_outputs), np.array([np.array(range(10))]))
def main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) pipeline = Pipeline([ AddFeatures([ PCA(n_components=2), FastICA(n_components=2), ]), RidgeModelStacking([ GradientBoostingRegressor(), KMeans(), ]), ]) print("Fitting on train:") pipeline = pipeline.fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = pipeline.transform(X_train) y_test_predicted = pipeline.transform(X_test) print("") print("Evaluating transformed train:") score = r2_score(y_train_predicted, y_train) print('R2 regression score:', score) print("") print("Evaluating transformed test:") score = r2_score(y_test_predicted, y_test) print('R2 regression score:', score) print("Deploying the application by routing data to the transform method:") class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder): """This is a custom JSON decoder class that precedes the pipeline's transformation.""" def decode(self, data_inputs): """ Transform a JSON list object into an np.array object. :param data_inputs: json object :return: np array for data inputs """ return np.array(data_inputs) class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder): """This is a custom JSON response encoder class for converting the pipeline's transformation outputs.""" def encode(self, data_inputs) -> dict: """ Convert predictions to a dict for creating a JSON Response object. :param data_inputs: :return: """ return {'predictions': list(data_inputs)} app = FlaskRestApiWrapper( json_decoder=CustomJSONDecoderFor2DArray(), wrapped=pipeline, json_encoder=CustomJSONEncoderOfOutputs()).get_app() print("Finally, run the app by uncommenting this next line of code:") # app.run(debug=False, port=5000) print("You can now call your pipeline over HTTP with a (JSON) REST API.") # test_predictictions = requests.post( # url='http://127.0.0.1:5000/', # json=X_test.tolist() # ) # print(test_predictictions) # print(test_predictictions.content) assert isinstance(app, Flask) return app
# %% # Spline features make it possible for the linear model to successfully # leverage the periodic time-related features and reduce the error from ~14% to # ~10% of the maximum demand, which is similar to what we observed with the # one-hot encoded features. # # Qualitative analysis of the impact of features on linear model predictions # -------------------------------------------------------------------------- # # Here, we want to visualize the impact of the feature engineering choices on # the time related shape of the predictions. # # To do so we consider an arbitrary time-based split to compare the predictions # on a range of held out data points. naive_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0]) naive_linear_predictions = naive_linear_pipeline.predict(X.iloc[test_0]) one_hot_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0]) one_hot_linear_predictions = one_hot_linear_pipeline.predict(X.iloc[test_0]) cyclic_cossin_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0]) cyclic_cossin_linear_predictions = cyclic_cossin_linear_pipeline.predict( X.iloc[test_0]) cyclic_spline_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0]) cyclic_spline_linear_predictions = cyclic_spline_linear_pipeline.predict( X.iloc[test_0]) # %% # We visualize those predictions by zooming on the last 96 hours (4 days) of
AddFeatures([ SKLearnWrapper(PCA(n_components=2)), SKLearnWrapper(FastICA(n_components=2)), ]), NumpyShapePrinter(), RidgeModelStacking([ SKLearnWrapper(GradientBoostingRegressor()), SKLearnWrapper(GradientBoostingRegressor(n_estimators=500)), SKLearnWrapper(GradientBoostingRegressor(max_depth=5)), SKLearnWrapper(KMeans()), ]), NumpyShapePrinter(), ]) print("Fitting on train:") p = p.fit(X_train, y_train) print("") print("Transforming train and test:") y_train_predicted = p.transform(X_train) y_test_predicted = p.transform(X_test) print("") print("Evaluating transformed train:") score = r2_score(y_train_predicted, y_train) print('R2 regression score:', score) print("") print("Evaluating transformed test:") score = r2_score(y_test_predicted, y_test) print('R2 regression score:', score)