示例#1
0
def test_forcehandleidentity_does_not_crash(tmpdir):
    p = Pipeline([
        ForceHandleIdentity()
    ])
    data_inputs = np.array([0, 1, 2, 3])
    expected_outputs = data_inputs * 2
    p.fit(data_inputs, expected_outputs)
    p.fit_transform(data_inputs, expected_outputs)
    p.transform(data_inputs=data_inputs)
示例#2
0
def main():
    p = Pipeline([
        ForceAlwaysAlwaysHandleMixinStep(),
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))
    p = p.transform(np.array([0, 1]))
示例#3
0
def test_model_stacking_transform():
    model_stacking = Pipeline([
        ModelStacking(
            [
                SKLearnWrapper(
                    GradientBoostingRegressor(),
                    HyperparameterSpace({
                        "n_estimators": RandInt(50, 600),
                        "max_depth": RandInt(1, 10),
                        "learning_rate": LogUniform(0.07, 0.7)
                    })),
                SKLearnWrapper(
                    KMeans(),
                    HyperparameterSpace({"n_clusters": RandInt(5, 10)})),
            ],
            joiner=NumpyTranspose(),
            judge=SKLearnWrapper(
                Ridge(),
                HyperparameterSpace({
                    "alpha": LogUniform(0.7, 1.4),
                    "fit_intercept": Boolean()
                })),
        )
    ])
    expected_outputs_shape = (379, 1)
    data_inputs_shape = (379, 13)
    data_inputs = _create_data(data_inputs_shape)
    expected_outputs = _create_data(expected_outputs_shape)

    model_stacking = model_stacking.fit(data_inputs, expected_outputs)
    outputs = model_stacking.transform(data_inputs)

    assert outputs.shape == expected_outputs_shape
示例#4
0
def test_pipeline_fit_then_transform(steps_list, pipeline_runner):
    data_input_ = [AN_INPUT]
    expected_output_ = [AN_EXPECTED_OUTPUT]
    p = Pipeline(steps_list, pipeline_runner=pipeline_runner())

    p = p.fit(data_input_, expected_output_)
    result = p.transform(data_input_)

    assert tuple(result) == tuple(expected_output_)
示例#5
0
def main():
    p = Pipeline([
        NonFittableStep(),
        NonTransformableStep(),
        Identity()  # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
    ])

    p = p.fit(np.array([0, 1]), np.array([0, 1]))

    out = p.transform(np.array([0, 1]))
示例#6
0
def test_should_fit_each_steps(steps: List[BaseStep],
                               expected_tape: List[str]):
    tape.data = []
    tape.name_tape = []
    pipeline = Pipeline(steps=steps)

    actual_pipeline = pipeline.fit(data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert isinstance(actual_pipeline, Pipeline)
    assert actual_tape == expected_tape
示例#7
0
def test_should_transform_each_steps(steps: List[BaseStep],
                                     expected_tape: List[str]):
    pipeline = Pipeline(steps=steps)
    pipeline = pipeline.fit(data_inputs)
    tape.data = []
    tape.name_tape = []

    actual_data_inputs = pipeline.transform(data_inputs)

    actual_tape = tape.get_name_tape()
    assert actual_tape == expected_tape
    assert np.array_equal(actual_data_inputs, data_inputs)
示例#8
0
def test_sklearn_wrapper_transform_partial_fit_with_predict():
    model = SKLearnWrapper(SGDRegressor(), use_partial_fit=True)
    p = Pipeline([DataShuffler(), model])
    data_inputs = np.expand_dims(np.array(list(range(10))), axis=-1)
    expected_outputs = np.expand_dims(np.array(list(range(10, 20))), axis=-1)

    for _ in range(2000):
        p = p.fit(data_inputs, expected_outputs)
    outputs = model.transform(data_inputs)

    assert all([
        np.isclose(a, b, atol=0.1) for a, b in zip(expected_outputs, outputs)
    ])
示例#9
0
def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs():
    tape = TapeCallbackFunction()
    p = Pipeline([
        ForEachDataInput(Pipeline([
            FitCallbackStep(tape.callback, ["1"]),
            FitCallbackStep(tape.callback, ["2"]),
        ]))
    ])
    data_inputs = [[0, 1], [1, 2]]
    expected_outputs = [[2, 3], [4, 5]]

    p = p.fit(data_inputs, expected_outputs)

    assert isinstance(p, Pipeline)
    assert tape.get_name_tape() == ["1", "2", "1", "2"]
    assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    p = Pipeline([
        NumpyShapePrinter(),
        AddFeatures([
            PCA(n_components=2),
            FastICA(n_components=2),
        ]),
        NumpyShapePrinter(),
        RidgeModelStacking([
            GradientBoostingRegressor(),
            GradientBoostingRegressor(n_estimators=500),
            GradientBoostingRegressor(max_depth=5),
            KMeans(),
        ]),
        NumpyShapePrinter(),
    ])

    print("Fitting on train:")
    p = p.fit(X_train, y_train)
    print("")
    print("Transforming train and test:")
    y_train_predicted = p.predict(X_train)
    y_test_predicted = p.predict(X_test)
    print("")
    print("Evaluating transformed train:")
    score_train = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score_train)
    print("")
    print("Evaluating transformed test:")
    score_test = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score_test)

    assert y_train_predicted.shape == (379, )
    assert y_test_predicted.shape == (127, )
    assert isinstance(score_train, float)
    assert isinstance(score_test, float)

    return y_train_predicted, y_test_predicted, score_train, score_test
示例#11
0
def test_sklearn_wrapper_transform_partial_fit_classifier():
    data_inputs = np.array([[0, 1], [0, 0], [3, -2], [-1, 1], [-2, 1], [2, 0],
                            [2, -1], [4, -2], [-3, 1], [-1, 0]])
    expected_outputs = np.ravel(
        np.expand_dims(data_inputs[:, 0] + 2 * data_inputs[:, 1] + 1, axis=-1))
    classes = np.array([0, 1, 2, 3])
    model = SKLearnWrapper(SGDClassifier(),
                           use_partial_fit=True,
                           partial_fit_kwargs={'classes': classes})
    p = Pipeline([DataShuffler(), model])

    for _ in range(2000):
        p = p.fit(data_inputs, expected_outputs)
    outputs = model.transform(data_inputs)

    assert outputs.shape == (10, )
    assert len(set(outputs) - set(classes)) == 0
示例#12
0
def train_neuraxle(X_train, X_test, y_train, y_test, mtype, common_name_model,
                   problemtype, classes, default_featurenames, transform_model,
                   settings, model_session):

    # get train and test data
    model_name = common_name_model + '.pickle'
    files = list()

    if mtype in ['classification', 'c']:
        print('neuraxle currently does not support classsification...')

    elif mtype in ['regression', 'r']:

        p = Pipeline([
            NumpyShapePrinter(),
            AddFeatures([
                PCA(n_components=2),
                FastICA(n_components=2),
            ]),
            NumpyShapePrinter(),
            RidgeModelStacking([
                GradientBoostingRegressor(),
                GradientBoostingRegressor(n_estimators=500),
                GradientBoostingRegressor(max_depth=5),
                KMeans(),
            ]),
            NumpyShapePrinter(),
        ])

        # Fitting and evaluating the pipeline.
        # X_train data shape: (batch, different_lengths, n_feature_columns)
        # y_train data shape: (batch, different_lengths)
        pipeline = p.fit(X_train, y_train)

        # export pickle file
        print('saving model - %s' % (model_name))
        f = open(model_name, 'wb')
        pickle.dump(pipeline, f)
        f.close()

        files.append(model_name)

    model_dir = os.getcwd()

    return model_name, model_dir, files
def main():
    p = Pipeline([
        NonFittableStep(),
        NonTransformableStep(),
        Identity()  # Note: Identity does nothing: it inherits from both NonFittableMixin and NonTransformableMixin.
    ])

    some_data = np.array([0, 1])
    p = p.fit(some_data)
    # Out:
    #     NonFittableStep: I transformed.
    #     NonTransformableStep: I fitted.

    out = p.transform(some_data)
    # Out:
    #     NonFittableStep: I transformed.

    assert np.array_equal(out, some_data)
示例#14
0
def test_expand_dim_fit():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])
    p['ExpandDim'].hashers = [SomeSummaryHasher(fake_summary_id=SUMMARY_ID)]

    p = p.fit(np.array(range(10)), np.array(range(10)))

    assert handle_transform_callback.data == []
    assert handle_fit_transform_callback.data == []
    assert handle_fit_callback.data[0][0].current_ids == [SUMMARY_ID]
    assert handle_fit_callback.data[0][0].summary_id == SUMMARY_ID
    assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs),
                          np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))
示例#15
0
def test_expand_dim_fit():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])

    p = p.fit(np.array(range(10)), np.array(range(10)))

    assert handle_transform_callback.data == []
    assert handle_fit_transform_callback.data == []
    assert handle_fit_callback.data[0][0].current_ids == [
        '781e5e245d69b566979b86e28d23f2c7'
    ]
    assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs),
                          np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))
示例#16
0
def main():
    boston = load_boston()
    X, y = shuffle(boston.data, boston.target, random_state=13)
    X = X.astype(np.float32)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.25,
                                                        shuffle=False)

    pipeline = Pipeline([
        AddFeatures([
            PCA(n_components=2),
            FastICA(n_components=2),
        ]),
        RidgeModelStacking([
            GradientBoostingRegressor(),
            KMeans(),
        ]),
    ])

    print("Fitting on train:")
    pipeline = pipeline.fit(X_train, y_train)
    print("")
    print("Transforming train and test:")
    y_train_predicted = pipeline.transform(X_train)
    y_test_predicted = pipeline.transform(X_test)
    print("")
    print("Evaluating transformed train:")
    score = r2_score(y_train_predicted, y_train)
    print('R2 regression score:', score)
    print("")
    print("Evaluating transformed test:")
    score = r2_score(y_test_predicted, y_test)
    print('R2 regression score:', score)
    print("Deploying the application by routing data to the transform method:")

    class CustomJSONDecoderFor2DArray(JSONDataBodyDecoder):
        """This is a custom JSON decoder class that precedes the pipeline's transformation."""
        def decode(self, data_inputs):
            """
            Transform a JSON list object into an np.array object.

            :param data_inputs: json object
            :return: np array for data inputs
            """
            return np.array(data_inputs)

    class CustomJSONEncoderOfOutputs(JSONDataResponseEncoder):
        """This is a custom JSON response encoder class for converting the pipeline's transformation outputs."""
        def encode(self, data_inputs) -> dict:
            """
            Convert predictions to a dict for creating a JSON Response object.

            :param data_inputs:
            :return:
            """
            return {'predictions': list(data_inputs)}

    app = FlaskRestApiWrapper(
        json_decoder=CustomJSONDecoderFor2DArray(),
        wrapped=pipeline,
        json_encoder=CustomJSONEncoderOfOutputs()).get_app()

    print("Finally, run the app by uncommenting this next line of code:")

    # app.run(debug=False, port=5000)

    print("You can now call your pipeline over HTTP with a (JSON) REST API.")

    # test_predictictions = requests.post(
    #     url='http://127.0.0.1:5000/',
    #     json=X_test.tolist()
    # )
    # print(test_predictictions)
    # print(test_predictictions.content)

    assert isinstance(app, Flask)

    return app
示例#17
0
# %%
# Spline features make it possible for the linear model to successfully
# leverage the periodic time-related features and reduce the error from ~14% to
# ~10% of the maximum demand, which is similar to what we observed with the
# one-hot encoded features.
#
# Qualitative analysis of the impact of features on linear model predictions
# --------------------------------------------------------------------------
#
# Here, we want to visualize the impact of the feature engineering choices on
# the time related shape of the predictions.
#
# To do so we consider an arbitrary time-based split to compare the predictions
# on a range of held out data points.
naive_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0])
naive_linear_predictions = naive_linear_pipeline.predict(X.iloc[test_0])

one_hot_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0])
one_hot_linear_predictions = one_hot_linear_pipeline.predict(X.iloc[test_0])

cyclic_cossin_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0])
cyclic_cossin_linear_predictions = cyclic_cossin_linear_pipeline.predict(
    X.iloc[test_0])

cyclic_spline_linear_pipeline.fit(X.iloc[train_0], y.iloc[train_0])
cyclic_spline_linear_predictions = cyclic_spline_linear_pipeline.predict(
    X.iloc[test_0])

# %%
# We visualize those predictions by zooming on the last 96 hours (4 days) of
示例#18
0
    AddFeatures([
        SKLearnWrapper(PCA(n_components=2)),
        SKLearnWrapper(FastICA(n_components=2)),
    ]),
    NumpyShapePrinter(),
    RidgeModelStacking([
        SKLearnWrapper(GradientBoostingRegressor()),
        SKLearnWrapper(GradientBoostingRegressor(n_estimators=500)),
        SKLearnWrapper(GradientBoostingRegressor(max_depth=5)),
        SKLearnWrapper(KMeans()),
    ]),
    NumpyShapePrinter(),
])

print("Fitting on train:")
p = p.fit(X_train, y_train)
print("")

print("Transforming train and test:")
y_train_predicted = p.transform(X_train)
y_test_predicted = p.transform(X_test)
print("")

print("Evaluating transformed train:")
score = r2_score(y_train_predicted, y_train)
print('R2 regression score:', score)
print("")

print("Evaluating transformed test:")
score = r2_score(y_test_predicted, y_test)
print('R2 regression score:', score)