def test_Pipegraph__ex_3_inject(self): import numpy as np import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression from sklearn.model_selection import GridSearchCV from pipegraph.base import PipeGraphRegressor from pipegraph.demo_blocks import CustomPower X = pd.DataFrame( dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), sample_weight=np.array([ 0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95, 0.95, 0.01 ]))) y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150]) scaler = MinMaxScaler() polynomial_features = PolynomialFeatures() linear_model = LinearRegression() custom_power = CustomPower() selector = ColumnSelector(mapping={ 'X': slice(0, 1), 'sample_weight': slice(1, 2) }) steps = [('selector', selector), ('custom_power', custom_power), ('scaler', scaler), ('polynomial_features', polynomial_features), ('linear_model', linear_model)] pgraph = PipeGraphRegressor(steps=steps) self.assertTrue(pgraph._pipegraph.fit_connections is None) self.assertTrue(pgraph._pipegraph.predict_connections is None) (pgraph.inject( sink='selector', sink_var='X', source='_External', source_var='X').inject( 'custom_power', 'X', 'selector', 'sample_weight').inject('scaler', 'X', 'selector', 'X').inject( 'polynomial_features', 'X', 'scaler').inject( 'linear_model', 'X', 'polynomial_features').inject('linear_model', 'y', source_var='y').inject( 'linear_model', 'sample_weight', 'custom_power')) self.assertTrue(pgraph._pipegraph.fit_connections is not None) self.assertTrue(pgraph._pipegraph.predict_connections is not None) pgraph.fit(X, y) self.assertEqual( pgraph._pipegraph.fit_connections, { 'selector': { 'X': ('_External', 'X') }, 'custom_power': { 'X': ('selector', 'sample_weight') }, 'scaler': { 'X': ('selector', 'X') }, 'polynomial_features': { 'X': ('scaler', 'predict') }, 'linear_model': { 'X': ('polynomial_features', 'predict'), 'y': ('_External', 'y'), 'sample_weight': ('custom_power', 'predict') } }) self.assertEqual( pgraph._pipegraph.predict_connections, { 'selector': { 'X': ('_External', 'X') }, 'custom_power': { 'X': ('selector', 'sample_weight') }, 'scaler': { 'X': ('selector', 'X') }, 'polynomial_features': { 'X': ('scaler', 'predict') }, 'linear_model': { 'X': ('polynomial_features', 'predict'), 'y': ('_External', 'y'), 'sample_weight': ('custom_power', 'predict') } })
def test_ColumnSelector__pick_three_columns(self): X = self.X selector = ColumnSelector(mapping={'X': slice(0, 3)}) self.assertTrue(selector.fit() is selector) assert_frame_equal(selector.predict(X)['X'], X)
def test_ColumnSelector__pick_two_columns(self): X = self.X selector = ColumnSelector(mapping={'X': slice(0, 2)}) self.assertTrue(selector.fit() is selector) assert_frame_equal(selector.predict(X)['X'], X.loc[:, ["V1", "V2"]])
def test_ColumnSelector__pick_one_column_last(self): X = self.X selector = ColumnSelector(mapping={'y': slice(2, 3)}) self.assertTrue(selector.fit() is selector) assert_frame_equal(selector.predict(X)['y'], X.loc[:, ["V3"]])
def test_ColumnSelector__mapping_is_None(self): X = self.X selector = ColumnSelector() self.assertTrue(selector.fit() is selector) assert_frame_equal(selector.predict(X)['predict'], X)
X = pd.DataFrame( dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), sample_weight=np.array([ 0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95, 0.95, 0.01 ]))) y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150]) ############################################################################### # Next we define the steps and we use :class:`PipeGraphRegressor` as estimator for :class:`GridSearchCV`. scaler = MinMaxScaler() polynomial_features = PolynomialFeatures() linear_model = LinearRegression() custom_power = CustomPower() selector = ColumnSelector(mapping={ 'X': slice(0, 1), 'sample_weight': slice(1, 2) }) steps = [('selector', selector), ('custom_power', custom_power), ('scaler', scaler), ('polynomial_features', polynomial_features), ('linear_model', linear_model)] pgraph = PipeGraph(steps=steps) (pgraph.inject( sink='selector', sink_var='X', source='_External', source_var='X').inject( 'custom_power', 'X', 'selector', 'sample_weight').inject('scaler', 'X', 'selector', 'X').inject( 'polynomial_features', 'X', 'scaler').inject( 'linear_model', 'X', 'polynomial_features').inject('linear_model',