示例#1
0
    def test_get_outputs_str_named(self):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_variable',
                    'type': 'a_type',
                }
            ],
            'debug': [
                {
                    'name': 'another_name',
                    'variable': 'another_variable',
                }
            ]
        }
        pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs)

        returned = pipeline.get_outputs('debug')

        expected = [
            {
                'name': 'another_name',
                'variable': 'another_variable',
            }
        ]
        assert returned == expected
def test_fit_predict_args_in_init():
    def add(a, b):
        return a + b

    primitive = {
        'name': 'add',
        'primitive': add,
        'produce': {
            'args': [
                {
                    'name': 'a',
                    'type': 'float',
                },
                {
                    'name': 'b',
                    'type': 'float',
                },
            ],
            'output': [{
                'type': 'float',
                'name': 'out'
            }]
        }
    }

    primitives = [primitive]
    init_params = {'add': {'b': 10}}
    pipeline = MLPipeline(primitives, init_params=init_params)

    out = pipeline.predict(a=3)

    assert out == 13
示例#3
0
    def test_fit_debug_str(self):
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline.blocks['a_primitive#1'].fit_args = [
            {
                'name': 'fit_input',
                'type': 'whatever'
            }
        ]

        expected_return = dict()
        expected_return['debug'] = 'tm'
        expected_return['fit'] = {
            'a_primitive#1': {
                'time': 0,
                'memory': 0,
            }
        }

        returned = mlpipeline.fit(debug='tm')

        assert isinstance(returned, dict)
        assert set(returned.keys()) == set(expected_return.keys())  # fit / produce
        assert set(returned['fit'].keys()) == set(expected_return['fit'].keys())  # block name

        for block_name, dictionary in expected_return['fit'].items():
            assert set(returned['fit'][block_name].keys()) == set(dictionary.keys())
示例#4
0
    def test__get_block_args(self):
        input_names = {'a_block': {'arg_3': 'arg_3_alt'}}
        pipeline = MLPipeline(list(), input_names=input_names)

        block_args = [
            {
                'name': 'arg_1',
            },
            {
                'name': 'arg_2',
                'default': 'arg_2_value'
            },
            {
                'name': 'arg_3',
            },
            {
                'name': 'arg_4',
                'required': False
            },
        ]
        context = {'arg_1': 'arg_1_value', 'arg_3_alt': 'arg_3_value'}

        args = pipeline._get_block_args('a_block', block_args, context)

        expected = {
            'arg_1': 'arg_1_value',
            'arg_3': 'arg_3_value',
        }
        assert args == expected
示例#5
0
    def test_get_diagram_fit(self):
        f = open('tests/data/diagrams/diagram_fit.txt', 'r')
        expected = f.read()[:-1]
        f.close()

        output = [
            {
                'name': 'output_variable',
                'type': 'another_whatever',
                'variable': 'a_primitive#1.output_variable'
            }
        ]

        pipeline = MLPipeline(['a_primitive'], outputs={'default': output})
        pipeline.blocks['a_primitive#1'].produce_args = [
            {
                'name': 'input_variable',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['a_primitive#1'].fit_args = [
            {
                'name': 'input_variable',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['a_primitive#1'].produce_output = output

        assert str(pipeline.get_diagram()) == expected
示例#6
0
    def test_get_hyperparameters(self):
        block_1 = Mock()
        block_1.get_hyperparameters.return_value = {'a': 'a'}
        block_2 = Mock()
        block_2.get_hyperparameters.return_value = {
            'b': 'b',
            'c': 'c',
        }
        blocks = OrderedDict((
            ('a.primitive.Name#1', block_1),
            ('a.primitive.Name#2', block_2),
        ))
        mlpipeline = MLPipeline(list())
        mlpipeline.blocks = blocks

        hyperparameters = mlpipeline.get_hyperparameters()

        assert hyperparameters == {
            'a.primitive.Name#1': {
                'a': 'a',
            },
            'a.primitive.Name#2': {
                'b': 'b',
                'c': 'c',
            },
        }
        block_1.get_hyperparameters.assert_called_once_with()
        block_2.get_hyperparameters.assert_called_once_with()
    def test_get_hyperparameters_flat(self):
        block_1 = Mock()
        block_1.get_hyperparameters.return_value = {
            'a': 'a'
        }
        block_2 = Mock()
        block_2.get_hyperparameters.return_value = {
            'b': 'b',
            'c': 'c',
        }
        blocks = OrderedDict((
            ('a.primitive.Name#1', block_1),
            ('a.primitive.Name#2', block_2),
        ))
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline.blocks = blocks

        hyperparameters = mlpipeline.get_hyperparameters(flat=True)

        assert hyperparameters == {
            ('a.primitive.Name#1', 'a'): 'a',
            ('a.primitive.Name#2', 'b'): 'b',
            ('a.primitive.Name#2', 'c'): 'c',
        }
        block_1.get_hyperparameters.assert_called_once_with()
        block_2.get_hyperparameters.assert_called_once_with()
示例#8
0
    def test__get_block_variables_is_str(self):
        pipeline = MLPipeline(['a_primitive'])
        pipeline.blocks['a_primitive#1'].produce_outputs = 'get_produce_outputs'
        pipeline.blocks['a_primitive#1'].instance.get_produce_outputs.return_value = [
            {
                'name': 'output_from_function',
                'type': 'test'
            }

        ]

        outputs = pipeline._get_block_variables(
            'a_primitive#1',
            'produce_outputs',
            {'output': 'name_output'}
        )

        expected = {
            'output_from_function': {
                'name': 'output_from_function',
                'type': 'test',
            }
        }
        assert outputs == expected
        pipeline.blocks['a_primitive#1'].instance.get_produce_outputs.assert_called_once_with()
    def test_fit_start(self):
        # Setup variables
        primitives = [
            'sklearn.preprocessing.StandardScaler',
            'sklearn.linear_model.LogisticRegression'
        ]
        pipeline = MLPipeline(primitives)

        # Mock the first block
        block_mock = Mock()
        pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock

        # Run first block
        context = {
            'X': self.X,
            'y': self.y
        }
        int_start = 1
        str_start = 'sklearn.linear_model.LogisticRegression#1'

        pipeline.fit(start_=int_start, **context)
        pipeline.fit(start_=str_start, **context)

        # Assert that mock has not been called
        block_mock.fit.assert_not_called()
示例#10
0
    def test_get_diagram_multiple_blocks(self):
        f = open('tests/data/diagrams/diagram_multiple_blocks.txt', 'r')
        expected = f.read()[:-1]
        f.close()

        first_output = [
            {
                'name': 'output_variable_a',
                'type': 'another_whatever',
                'variable': 'a_primitive#1.output_variable_a'
            }
        ]
        second_output = [
            {
                'name': 'output_variable_b',
                'type': 'another_whatever',
                'variable': 'b_primitive#1.output_variable_b'
            }
        ]

        pipeline = MLPipeline(['a_primitive', 'b_primitive'], outputs={'default': second_output})
        pipeline.blocks['a_primitive#1'].produce_args = [
            {
                'name': 'input_variable',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['a_primitive#1'].produce_output = first_output
        pipeline.blocks['b_primitive#1'].produce_args = first_output
        pipeline.blocks['b_primitive#1'].produce_output = second_output

        assert str(pipeline.get_diagram()) == expected
示例#11
0
    def test_predict_debug(self):
        outputs = {
            'default': [{
                'name': 'a_name',
                'variable': 'a_primitive#1.a_variable',
                'type': 'a_type',
            }]
        }
        mlpipeline = MLPipeline(['a_primitive'], outputs=outputs)
        mlpipeline.blocks['a_primitive#1'].produce_args = [{
            'name': 'input',
            'type': 'whatever'
        }]

        mlpipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'a_name',
            'type': 'a_type'
        }]

        expected_return = dict()
        expected_return = {
            "a_primitive#1": {
                "elapsed": 0,
                "input": {"whatever"},
                "output": {"whatever"}
            }
        }
        returned, debug_returned = mlpipeline.predict(debug=True)
        assert len([returned]) == len(outputs["default"])
        assert isinstance(debug_returned, dict)
        assert set(debug_returned.keys()) == set(expected_return.keys())

        for block_name, dictionary in expected_return.items():
            assert set(debug_returned[block_name].keys()) == set(
                dictionary.keys())
示例#12
0
    def test_predict_no_debug(self):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_primitive#1.a_variable',
                    'type': 'a_type',
                },
                {
                    'name': 'b_name',
                    'variable': 'a_primitive#1.b_variable',
                    'type': 'b_type',
                },
            ]
        }
        mlpipeline = MLPipeline(['a_primitive'], outputs=outputs)
        mlpipeline.blocks['a_primitive#1'].produce_args = [{
            'name': 'input',
            'type': 'whatever'
        }]

        mlpipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'a_name',
            'type': 'a_type'
        }, {
            'name': 'b_name',
            'type': 'b_type'
        }]

        returned = mlpipeline.predict(debug=False)
        assert len(returned) == len(outputs["default"])
        for returned_output, expected_output in zip(returned,
                                                    outputs["default"]):
            assert returned_output == expected_output["variable"]
示例#13
0
    def test_fit_debug(self):
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline.blocks['a_primitive#1'].fit_args = [{
            'name': 'fit_input',
            'type': 'whatever'
        }]

        expected_return = dict()
        expected_return["fit"] = {
            "a_primitive#1": {
                "elapsed": 0,
                "input": {"whatever"}
            }
        }

        returned = mlpipeline.fit(debug=True)

        print(returned)
        assert isinstance(returned, dict)
        assert set(returned.keys()) == set(
            expected_return.keys())  # fit / produce
        assert set(returned["fit"].keys()) == set(
            expected_return["fit"].keys())  # block name

        for block_name, dictionary in expected_return["fit"].items():
            assert set(returned["fit"][block_name].keys()) == set(
                dictionary.keys())
示例#14
0
    def test_get_predict_args(self):
        pipeline = MLPipeline(['a_primitive'])
        pipeline.blocks['a_primitive#1'].produce_args = [
            {
                'name': 'input',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['a_primitive#1'].fit_args = [
            {
                'name': 'fit_input',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['a_primitive#1'].produce_output = [
            {
                'name': 'output',
                'type': 'another_whatever'
            }
        ]
        outputs = pipeline.get_predict_args()

        expected = [
            {
                'name': 'input',
                'type': 'whatever'
            }
        ]
        assert outputs == expected
示例#15
0
    def test_fit_produce_debug_str(self):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_primitive#1.a_variable',
                    'type': 'a_type',
                }
            ]
        }
        mlpipeline = MLPipeline(['a_primitive'], outputs=outputs)
        mlpipeline.blocks['a_primitive#1'].fit_args = [
            {
                'name': 'fit_input',
                'type': 'whatever'
            }
        ]

        mlpipeline.blocks['a_primitive#1'].produce_args = [
            {
                'name': 'input',
                'type': 'whatever'
            }
        ]

        mlpipeline.blocks['a_primitive#1'].produce_output = [
            {
                'name': 'a_name',
                'type': 'a_type'
            }
        ]

        expected_return = dict()
        expected_return['debug'] = 'tm'
        expected_return['fit'] = {
            'a_primitive#1': {
                'time': 0,
                'memory': 0,
            }
        }
        expected_return['produce'] = {
            'a_primitive#1': {
                'time': 0,
                'memory': 0,
            }
        }

        returned, debug_returned = mlpipeline.fit(output_='default', debug='tm')

        assert len([returned]) == len(outputs['default'])
        assert isinstance(debug_returned, dict)
        assert set(debug_returned.keys()) == set(expected_return.keys())  # fit / produce
        assert set(debug_returned['fit'].keys()) == set(expected_return['fit'].keys())
        assert set(debug_returned['produce'].keys()) == set(expected_return['produce'].keys())

        for block_name, dictionary in expected_return['fit'].items():
            assert set(debug_returned['fit'][block_name].keys()) == set(dictionary.keys())

        for block_name, dictionary in expected_return['produce'].items():
            assert set(debug_returned['produce'][block_name].keys()) == set(dictionary.keys())
示例#16
0
    def test_get_tunable_hyperparameters(self):
        mlpipeline = MLPipeline(['a_primitive'])
        tunable = dict()
        mlpipeline._tunable_hyperparameters = tunable

        returned = mlpipeline.get_tunable_hyperparameters()

        assert returned == tunable
        assert returned is not tunable
示例#17
0
    def test_get_outputs_invalid(self):
        pipeline = MLPipeline(['a_primitive'])

        pipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'output',
            'type': 'whatever'
        }]

        with pytest.raises(ValueError):
            pipeline.get_outputs('a_primitive#1.invalid')
示例#18
0
    def test_fit_no_debug(self):
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline.blocks['a_primitive#1'].fit_args = [{
            'name': 'fit_input',
            'type': 'whatever'
        }]

        returned = mlpipeline.fit(debug=False)

        assert returned is None
    def test_get_outputs_int(self):
        pipeline = MLPipeline(['a_primitive', 'another_primitive'])

        returned = pipeline.get_outputs(-1)

        expected = [{
            'name': 'another_primitive#1',
            'variable': 'another_primitive#1',
        }]
        assert returned == expected
示例#20
0
    def test_get_outputs_invalid(self, mlblock_mock):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_variable',
                    'type': 'a_type',
                }
            ],
            'debug': [
                {
                    'name': 'another_name',
                    'variable': 'another_variable',
                }
            ]
        }
        mlblock_mock.side_effect = [MagicMock(), MagicMock()]
        pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs)

        pipeline.blocks['a_primitive#1'].produce_output = [
            {
                'name': 'output',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['another_primitive#1'].produce_output = [
            {
                'name': 'something',
            }
        ]

        returned = pipeline.get_outputs(['default', 'debug', -1, 'a_primitive#1.output'])

        expected = [
            {
                'name': 'a_name',
                'variable': 'a_variable',
                'type': 'a_type'
            },
            {
                'name': 'another_name',
                'variable': 'another_variable',
            },
            {
                'name': 'something',
                'variable': 'another_primitive#1.something',
            },
            {
                'name': 'output',
                'type': 'whatever',
                'variable': 'a_primitive#1.output'
            }
        ]

        assert returned == expected
示例#21
0
    def test_get_outputs_combination(self):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_variable',
                    'type': 'a_type',
                }
            ],
            'debug': [
                {
                    'name': 'another_name',
                    'variable': 'another_variable',
                }
            ]
        }
        pipeline = MLPipeline(['a_primitive', 'another_primitive'], outputs=outputs)

        pipeline.blocks['a_primitive#1'].produce_output = [
            {
                'name': 'output',
                'type': 'whatever'
            }
        ]
        pipeline.blocks['another_primitive#1'].produce_output = [
            {
                'name': 'something',
            }
        ]

        returned = pipeline.get_outputs(['default', 'debug', -1, 'a_primitive#1.output'])

        expected = [
            {
                'name': 'a_name',
                'variable': 'a_variable',
                'type': 'a_type'
            },
            {
                'name': 'another_name',
                'variable': 'another_variable',
            },
            {
                'name': 'another_primitive#1',
                'variable': 'another_primitive#1',
            },
            {
                'name': 'output',
                'type': 'whatever',
                'variable': 'a_primitive#1.output'
            }
        ]

        assert returned == expected
示例#22
0
    def test_get_tunable_hyperparameters_flat(self):
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline._tunable_hyperparameters = {
            'block_1': {
                'hp_1': {
                    'type': 'int',
                    'range': [
                        1,
                        10
                    ],
                }
            },
            'block_2': {
                'hp_1': {
                    'type': 'str',
                    'default': 'a',
                    'values': [
                        'a',
                        'b',
                        'c'
                    ],
                },
                'hp_2': {
                    'type': 'bool',
                    'default': True,
                }
            }
        }

        returned = mlpipeline.get_tunable_hyperparameters(flat=True)

        expected = {
            ('block_1', 'hp_1'): {
                'type': 'int',
                'range': [
                    1,
                    10
                ],
            },
            ('block_2', 'hp_1'): {
                'type': 'str',
                'default': 'a',
                'values': [
                    'a',
                    'b',
                    'c'
                ],
            },
            ('block_2', 'hp_2'): {
                'type': 'bool',
                'default': True,
            }
        }
        assert returned == expected
示例#23
0
    def test_get_inputs_fit(self):
        expected = {
            'input': {
                'name': 'input',
                'type': 'whatever',
            },
            'fit_input': {
                'name': 'fit_input',
                'type': 'whatever',
            },
            'another_input': {
                'name': 'another_input',
                'type': 'another_whatever',
            }

        }

        pipeline = MLPipeline(['a_primitive', 'another_primitive'])

        pipeline.blocks['a_primitive#1'].produce_args = [
            {
                'name': 'input',
                'type': 'whatever'
            }
        ]

        pipeline.blocks['a_primitive#1'].fit_args = [
            {
                'name': 'fit_input',
                'type': 'whatever'
            }
        ]

        pipeline.blocks['a_primitive#1'].produce_output = [
            {
                'name': 'output',
                'type': 'another_whatever'
            }
        ]

        pipeline.blocks['another_primitive#1'].produce_args = [
            {
                'name': 'output',
                'type': 'another_whatever'
            },
            {
                'name': 'another_input',
                'type': 'another_whatever'
            }
        ]

        inputs = pipeline.get_inputs()
        assert inputs == expected
示例#24
0
    def test_get_output_variables(self):
        outputs = {
            'default': [{
                'name': 'a_name',
                'variable': 'a_variable',
                'type': 'a_type',
            }]
        }
        pipeline = MLPipeline(['a_primitive'], outputs=outputs)

        names = pipeline.get_output_variables()

        assert names == ['a_variable']
    def test_get_outputs_str_variable(self):
        pipeline = MLPipeline(['a_primitive', 'another_primitive'])
        pipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'output',
            'type': 'whatever'
        }]

        returned = pipeline.get_outputs('a_primitive#1.output')

        expected = [{
            'name': 'output',
            'type': 'whatever',
            'variable': 'a_primitive#1.output'
        }]
        assert returned == expected
示例#26
0
    def test_set_hyperparameters(self):
        block_1 = get_mlblock_mock()
        block_2 = get_mlblock_mock()
        blocks = OrderedDict((
            ('a.primitive.Name#1', block_1),
            ('a.primitive.Name#2', block_2),
        ))
        mlpipeline = MLPipeline(['a_primitive'])
        mlpipeline.blocks = blocks

        hyperparameters = {'a.primitive.Name#2': {'some': 'arg'}}
        mlpipeline.set_hyperparameters(hyperparameters)

        block_1.set_hyperparameters.assert_not_called()
        block_2.set_hyperparameters.assert_called_once_with({'some': 'arg'})
示例#27
0
    def test_set_hyperparameters_flat(self):
        block_1 = Mock()
        block_2 = Mock()
        blocks = OrderedDict((
            ('a.primitive.Name#1', block_1),
            ('a.primitive.Name#2', block_2),
        ))
        mlpipeline = MLPipeline(list())
        mlpipeline.blocks = blocks

        hyperparameters = {('a.primitive.Name#2', 'some'): 'arg'}
        mlpipeline.set_hyperparameters(hyperparameters)

        block_1.set_hyperparameters.assert_not_called()
        block_2.set_hyperparameters.assert_called_once_with({'some': 'arg'})
    def test__get_block_variables_is_dict(self):
        pipeline = MLPipeline(['a_primitive'])
        pipeline.blocks['a_primitive#1'].produce_outputs = [{
            'name': 'output',
            'type': 'whatever'
        }]

        outputs = pipeline._get_block_variables('a_primitive#1',
                                                'produce_outputs',
                                                {'output': 'name_output'})

        expected = {
            'name_output': {
                'name': 'output',
                'type': 'whatever',
            }
        }
        assert outputs == expected
示例#29
0
    def test__get_outputs_defaults(self):
        self_ = MagicMock(autospec=MLPipeline)

        pipeline = dict()
        outputs = {'default': ['some', 'outputs']}
        returned = MLPipeline._get_outputs(self_, pipeline, outputs)

        expected = {'default': ['some', 'outputs']}
        assert returned == expected
        self_._get_block_outputs.assert_not_called()
示例#30
0
    def test_fit_pending_one_primitive(self):
        block_1 = get_mlblock_mock()
        block_2 = get_mlblock_mock()
        blocks = OrderedDict((
            ('a.primitive.Name#1', block_1),
            ('a.primitive.Name#2', block_2),
        ))

        self_ = MagicMock(autospec=MLPipeline)
        self_.blocks = blocks
        self_._last_fit_block = 'a.primitive.Name#1'

        MLPipeline.fit(self_)

        expected = [
            call('a.primitive.Name#1'),
        ]
        self_._fit_block.call_args_list = expected

        assert not self_._produce_block.called