示例#1
0
    def test_Pipegraph__ex_3_inject(self):
        import numpy as np
        import pandas as pd
        from sklearn.preprocessing import MinMaxScaler
        from sklearn.preprocessing import PolynomialFeatures
        from sklearn.linear_model import LinearRegression
        from sklearn.model_selection import GridSearchCV
        from pipegraph.base import PipeGraphRegressor
        from pipegraph.demo_blocks import CustomPower

        X = pd.DataFrame(
            dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
                 sample_weight=np.array([
                     0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95,
                     0.95, 0.01
                 ])))
        y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150])

        scaler = MinMaxScaler()
        polynomial_features = PolynomialFeatures()
        linear_model = LinearRegression()
        custom_power = CustomPower()
        selector = ColumnSelector(mapping={
            'X': slice(0, 1),
            'sample_weight': slice(1, 2)
        })

        steps = [('selector', selector), ('custom_power', custom_power),
                 ('scaler', scaler),
                 ('polynomial_features', polynomial_features),
                 ('linear_model', linear_model)]

        pgraph = PipeGraphRegressor(steps=steps)

        self.assertTrue(pgraph._pipegraph.fit_connections is None)
        self.assertTrue(pgraph._pipegraph.predict_connections is None)

        (pgraph.inject(
            sink='selector',
            sink_var='X', source='_External', source_var='X').inject(
                'custom_power', 'X', 'selector',
                'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
                    'polynomial_features', 'X', 'scaler').inject(
                        'linear_model', 'X',
                        'polynomial_features').inject('linear_model',
                                                      'y',
                                                      source_var='y').inject(
                                                          'linear_model',
                                                          'sample_weight',
                                                          'custom_power'))

        self.assertTrue(pgraph._pipegraph.fit_connections is not None)
        self.assertTrue(pgraph._pipegraph.predict_connections is not None)
        pgraph.fit(X, y)
        self.assertEqual(
            pgraph._pipegraph.fit_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })

        self.assertEqual(
            pgraph._pipegraph.predict_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })
示例#2
0
 def test_ColumnSelector__pick_three_columns(self):
     X = self.X
     selector = ColumnSelector(mapping={'X': slice(0, 3)})
     self.assertTrue(selector.fit() is selector)
     assert_frame_equal(selector.predict(X)['X'], X)
示例#3
0
 def test_ColumnSelector__pick_two_columns(self):
     X = self.X
     selector = ColumnSelector(mapping={'X': slice(0, 2)})
     self.assertTrue(selector.fit() is selector)
     assert_frame_equal(selector.predict(X)['X'], X.loc[:, ["V1", "V2"]])
示例#4
0
 def test_ColumnSelector__pick_one_column_last(self):
     X = self.X
     selector = ColumnSelector(mapping={'y': slice(2, 3)})
     self.assertTrue(selector.fit() is selector)
     assert_frame_equal(selector.predict(X)['y'], X.loc[:, ["V3"]])
示例#5
0
 def test_ColumnSelector__mapping_is_None(self):
     X = self.X
     selector = ColumnSelector()
     self.assertTrue(selector.fit() is selector)
     assert_frame_equal(selector.predict(X)['predict'], X)
X = pd.DataFrame(
    dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
         sample_weight=np.array([
             0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95, 0.95, 0.01
         ])))
y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150])

###############################################################################
# Next we define the steps and we use :class:`PipeGraphRegressor` as estimator for :class:`GridSearchCV`.

scaler = MinMaxScaler()
polynomial_features = PolynomialFeatures()
linear_model = LinearRegression()
custom_power = CustomPower()
selector = ColumnSelector(mapping={
    'X': slice(0, 1),
    'sample_weight': slice(1, 2)
})

steps = [('selector', selector), ('custom_power', custom_power),
         ('scaler', scaler), ('polynomial_features', polynomial_features),
         ('linear_model', linear_model)]

pgraph = PipeGraph(steps=steps)

(pgraph.inject(
    sink='selector', sink_var='X', source='_External', source_var='X').inject(
        'custom_power', 'X', 'selector',
        'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
            'polynomial_features', 'X', 'scaler').inject(
                'linear_model', 'X',
                'polynomial_features').inject('linear_model',