示例#1
0
class Pipe2(Pipe):
    requirements = {'a2'}

    transform_requires = {
        'x': types.List(float),
    }

    fit_requires = {
        'x': types.List(float),
    }

    fitted_parameters = {'mean': float, 'var': float}

    fit_parameters = {'unused': float}

    transform_modifies = {
        'x': types.List(float),
    }

    def fit(self, data: dict, parameters: dict = None):
        self['mean'] = sum(data['x']) / len(data['x'])
        self['var'] = sum([x_i**2 for x_i in data['x']]) / len(
            data['x']) - self['mean']**2

    def transform(self, data: dict):
        data['x'] = [(x_i - self['mean']) / self['var']**0.5
                     for x_i in data['x']]
        return data
示例#2
0
class Pipe(pipe.Pipe):
    requirements = {'sklearn'}

    # variables required by fit (supervised learning)
    fit_requires = {
        # (arbitrary items, arbitrary features)
        'x': types.Array(np.float64, shape=(None, None)),
        'y': types.List(float)
    }

    transform_requires = {'x': types.List(float)}

    # parameter passed to fit()
    fit_parameters = {'alpha': float}

    # parameter assigned in fit()
    fitted_parameters = {'model': object}

    # type and key of transform
    transform_modifies = {'model': object}

    def fit(self, data, parameters=None):
        import sklearn.linear_model
        self['model'] = sklearn.linear_model.Lasso(parameters['alpha'])

        self['model'].fit(data['x'], data['y'])

    def transform(self, data):
        data['model'] = self['model']
        return data
示例#3
0
class Pipe4(Pipe):
    fit_requires = {
        'x1': types.List(float),
    }

    fitted_parameters = {'mean': float}

    transform_requires = {
        'x': types.List(float),
    }
示例#4
0
    def test_transform_schema(self):
        p = Pipe()

        self.assertEqual(p.transform_schema({'x': types.List(float)}), {
            'x': types.List(float),
            'model': object
        })

        with self.assertRaises(exceptions.WrongSchema) as e:
            p.transform_schema({'y': types.List(float)})
        self.assertIn('in transform', str(e.exception))
示例#5
0
    def test_two_fit_schema(self):
        # P4 fit-needs 'x1', P2 fit-needs 'x' (float) => fit_requires needs both on its first type-occurrence
        p = Pipeline([Pipe1(), Pipe4(), Pipe2()])

        self.assertEqual(p.transform_requires, {'x': types.List(str)})

        self.assertEqual(p.fit_requires, {
            'x': types.List(str),
            'x1': types.List(float)
        })

        self.assertEqual(p.transform_modifies, {'x': types.List(float)})
示例#6
0
class Pipe1(Pipe):
    requirements = {'a1'}

    transform_requires = {
        'x': types.List(str),
    }

    transform_modifies = {
        'x': types.List(float),
    }

    def transform(self, data: dict):
        data['x'] = [float(x_i) for x_i in data['x']]
        return data
示例#7
0
    def test_two_transform_data(self):
        # P1 needs 'x', P2 needs 'x1'
        p = Pipeline([Pipe1(), Pipe3(), Pipe2()])

        self.assertEqual(p.transform_requires, {
            'x': types.List(str),
            'x1': types.List(str)
        })

        self.assertEqual(p.fit_requires, {
            'x': types.List(str),
            'x1': types.List(str)
        })

        self.assertEqual(p.transform_modifies, {'x': types.List(float)})
示例#8
0
class PipeWrongTransform(Pipe):
    """
    Claims to convert to float, but converts to int
    """
    requirements = {'a1'}

    transform_requires = {
        'x': types.List(str),
    }

    transform_modifies = {
        'x': types.List(float),
    }

    def transform(self, data: dict):
        data['x'] = [int(x_i) for x_i in data['x']]
        return data
示例#9
0
    def test_basic(self):
        p = Pipeline([Pipe1(), Pipe2()])

        self.assertEqual(p.fitted_parameters, {
            '0': {},
            '1': {
                'mean': float,
                'var': float
            }
        })
        self.assertEqual(p.transform_requires, {'x': types.List(str)})
        self.assertEqual(p.fit_requires, {'x': types.List(str)})

        p.fit({'x': ['1', '2', '3']}, {'1': {'unused': 1.0}})
        result = p.transform({'x': ['1', '2', '3']})

        # std([1,2,3]) == 0.816496580927726
        self.assertEqual(result['x'],
                         [-1.2247448713915887, 0.0, 1.2247448713915887])
示例#10
0
class FillNaN(Pipe):
    fit_requires = transform_modifies = transform_requires = {
        'x': sf_types.PandasDataFrame(schema={}),
        'x_categorical': sf_types.PandasDataFrame(schema={})
    }

    fitted_parameters = {
        'means': sf_types.List(float),
        'most_frequent': sf_types.List(str)
    }

    def fit(self, data: dict, parameters: dict = None):
        self['means'] = data['x'].mean(axis=0)
        self['most_frequent'] = data['x_categorical'].mode(axis=0)

    def transform(self, data: dict):
        data['x'] = data['x'].fillna(self['means'])
        for column in data['x_categorical'].columns:
            data['x_categorical'].loc[
                data['x_categorical'][column].isnull(),
                column] = self['most_frequent'][column][0]
        return data
示例#11
0
class SplitNumericCategorical(Pipe):
    fit_requires = transform_requires = {
        'x': sf_types.PandasDataFrame(schema={})
    }
    transform_modifies = {
        'x_categorical': sf_types.PandasDataFrame(schema={}),
        'x': sf_types.PandasDataFrame(schema={})
    }

    fitted_parameters = {'numeric_columns': sf_types.List(str)}

    def fit(self, data: dict, parameters: dict = None):
        self['numeric_columns'] = list(
            data['x'].select_dtypes(include=[np.number]).columns)

    def transform(self, data: dict):
        data['x_categorical'] = data['x'].drop(self['numeric_columns'], axis=1)
        data['x'] = data['x'].loc[:, self['numeric_columns']]
        return data
示例#12
0
    def test_transform_schema(self):
        # P1 needs 'x', P2 needs 'x1'
        p = Pipeline([Pipe1(), Pipe3(), Pipe2()])

        # 'x1' is passed along without modification
        self.assertEqual(
            p.transform_schema({
                'x': types.List(str),
                'x1': types.List(str)
            }), {
                'x': types.List(float),
                'x1': types.List(str)
            })

        with self.assertRaises(exceptions.WrongSchema) as e:
            p.transform_schema({'y': types.List(str)})
        self.assertIn('in transform of pipe \'0\' of Pipeline',
                      str(e.exception))

        with self.assertRaises(exceptions.WrongSchema) as e:
            p.transform_schema({'x': types.List(str)})
        self.assertIn('in transform of pipe \'1\' of Pipeline',
                      str(e.exception))
示例#13
0
class Pipe3(Pipe):
    transform_requires = {
        'x1': types.List(str),
        'x': types.List(float),
    }