Пример #1
0
    def setUp(self):
        X, y = datasets.make_blobs(n_samples=10000, n_features=5, centers=10)
        self.X, self.y = X, y
        clustering = KMeans(n_clusters=10)
        classification = LinearDiscriminantAnalysis()

        steps = [('clustering', clustering),
                 ('classification', classification)]

        pgraph = PipeGraph(steps=steps)
        pgraph.inject(sink='clustering',
                      sink_var='X',
                      source='_External',
                      source_var='X')
        pgraph.inject(sink='classification',
                      sink_var='X',
                      source='_External',
                      source_var='X')
        pgraph.inject(sink='classification',
                      sink_var='y',
                      source='clustering',
                      source_var='predict')
        self.pgraph = pgraph
    'X': slice(0, 1),
    'sample_weight': slice(1, 2)
})

steps = [('selector', selector), ('custom_power', custom_power),
         ('scaler', scaler), ('polynomial_features', polynomial_features),
         ('linear_model', linear_model)]

pgraph = PipeGraph(steps=steps)

(pgraph.inject(
    sink='selector', sink_var='X', source='_External', source_var='X').inject(
        'custom_power', 'X', 'selector',
        'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
            'polynomial_features', 'X', 'scaler').inject(
                'linear_model', 'X',
                'polynomial_features').inject('linear_model',
                                              'y',
                                              source_var='y').inject(
                                                  'linear_model',
                                                  'sample_weight',
                                                  'custom_power'))

###############################################################################
# Then we define ``param_grid`` as expected by :class:`GridSearchCV` exploring a few possibilities
#  of varying parameters.
param_grid = {
    'polynomial_features__degree': range(1, 3),
    'linear_model__fit_intercept': [True, False],
    'custom_power__power': [1, 5, 10, 20, 30]
}
Пример #3
0
mlp = MLPClassifier()
concatenator = Concatenator()

steps = [('scaler', scaler), ('gaussian_nb', gaussian_nb), ('svc', svc),
         ('concat', concatenator), ('mlp', mlp)]

###############################################################################
# In this example we use a :class:`PipeGraphClassifier` because the result is a classification and we want to take advantage of Scikit-Learn default scoring method for classifiers.

pgraph = PipeGraph(steps=steps)
(pgraph.inject(sink='scaler', sink_var='X', source='_External',
               source_var='X').inject('gaussian_nb', 'X', 'scaler').inject(
                   'gaussian_nb', 'y',
                   source_var='y').inject('svc', 'X', 'scaler').inject(
                       'svc', 'y',
                       source_var='y').inject('concat', 'X1', 'scaler').inject(
                           'concat', 'X2',
                           'gaussian_nb').inject('concat', 'X3', 'svc').inject(
                               'mlp', 'X', 'concat').inject('mlp',
                                                            'y',
                                                            source_var='y'))

param_grid = {
    'svc__C': [0.1, 0.5, 1.0],
    'mlp__hidden_layer_sizes': [
        (3, ),
        (6, ),
        (9, ),
    ],
    'mlp__max_iter': [5000, 10000]
}
Пример #4
0
    def test_Pipegraph__ex_3_inject(self):
        import numpy as np
        import pandas as pd
        from sklearn.preprocessing import MinMaxScaler
        from sklearn.preprocessing import PolynomialFeatures
        from sklearn.linear_model import LinearRegression
        from sklearn.model_selection import GridSearchCV
        from pipegraph.base import PipeGraph
        from pipegraph.demo_blocks import CustomPower

        X = pd.DataFrame(
            dict(X=np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
                 sample_weight=np.array([
                     0.01, 0.95, 0.10, 0.95, 0.95, 0.10, 0.10, 0.95, 0.95,
                     0.95, 0.01
                 ])))
        y = np.array([10, 4, 20, 16, 25, -60, 85, 64, 81, 100, 150])

        scaler = MinMaxScaler()
        polynomial_features = PolynomialFeatures()
        linear_model = LinearRegression()
        custom_power = CustomPower()
        selector = ColumnSelector(mapping={
            'X': slice(0, 1),
            'sample_weight': slice(1, 2)
        })

        steps = [('selector', selector), ('custom_power', custom_power),
                 ('scaler', scaler),
                 ('polynomial_features', polynomial_features),
                 ('linear_model', linear_model)]

        pgraph = PipeGraph(steps=steps)  #PipeGraphRegressor

        self.assertTrue(pgraph.fit_connections is None)
        self.assertTrue(pgraph.predict_connections is None)

        (pgraph.inject(
            sink='selector',
            sink_var='X', source='_External', source_var='X').inject(
                'custom_power', 'X', 'selector',
                'sample_weight').inject('scaler', 'X', 'selector', 'X').inject(
                    'polynomial_features', 'X', 'scaler').inject(
                        'linear_model', 'X',
                        'polynomial_features').inject('linear_model',
                                                      'y',
                                                      source_var='y').inject(
                                                          'linear_model',
                                                          'sample_weight',
                                                          'custom_power'))

        self.assertTrue(pgraph.fit_connections is not None)
        self.assertTrue(pgraph.predict_connections is not None)
        pgraph.fit(X, y)
        self.assertEqual(
            pgraph.fit_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })

        self.assertEqual(
            pgraph.predict_connections, {
                'selector': {
                    'X': ('_External', 'X')
                },
                'custom_power': {
                    'X': ('selector', 'sample_weight')
                },
                'scaler': {
                    'X': ('selector', 'X')
                },
                'polynomial_features': {
                    'X': ('scaler', 'predict')
                },
                'linear_model': {
                    'X': ('polynomial_features', 'predict'),
                    'y': ('_External', 'y'),
                    'sample_weight': ('custom_power', 'predict')
                }
            })