Пример #1
0
    def test_predict_start(self):
        # Setup variables
        primitives = [
            'sklearn.preprocessing.StandardScaler',
            'sklearn.linear_model.LogisticRegression'
        ]
        pipeline = MLPipeline(primitives)
        pipeline.fit(self.X_train, self.y_train)

        # Mock the first block
        block_mock = Mock()
        pipeline.blocks['sklearn.preprocessing.StandardScaler#1'] = block_mock

        # Run first block
        context = {
            'X': self.X_train,
        }
        int_start = 1
        str_start = 'sklearn.linear_model.LogisticRegression#1'

        pipeline.predict(start_=int_start, **context)
        pipeline.predict(start_=str_start, **context)

        # Assert that mock has not been called
        block_mock.predict.assert_not_called()
def test_fit_predict_args_in_init():
    def add(a, b):
        return a + b

    primitive = {
        'name': 'add',
        'primitive': add,
        'produce': {
            'args': [
                {
                    'name': 'a',
                    'type': 'float',
                },
                {
                    'name': 'b',
                    'type': 'float',
                },
            ],
            'output': [{
                'type': 'float',
                'name': 'out'
            }]
        }
    }

    primitives = [primitive]
    init_params = {'add': {'b': 10}}
    pipeline = MLPipeline(primitives, init_params=init_params)

    out = pipeline.predict(a=3)

    assert out == 13
Пример #3
0
    def test_predict_debug(self):
        outputs = {
            'default': [{
                'name': 'a_name',
                'variable': 'a_primitive#1.a_variable',
                'type': 'a_type',
            }]
        }
        mlpipeline = MLPipeline(['a_primitive'], outputs=outputs)
        mlpipeline.blocks['a_primitive#1'].produce_args = [{
            'name': 'input',
            'type': 'whatever'
        }]

        mlpipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'a_name',
            'type': 'a_type'
        }]

        expected_return = dict()
        expected_return = {
            "a_primitive#1": {
                "elapsed": 0,
                "input": {"whatever"},
                "output": {"whatever"}
            }
        }
        returned, debug_returned = mlpipeline.predict(debug=True)
        assert len([returned]) == len(outputs["default"])
        assert isinstance(debug_returned, dict)
        assert set(debug_returned.keys()) == set(expected_return.keys())

        for block_name, dictionary in expected_return.items():
            assert set(debug_returned[block_name].keys()) == set(
                dictionary.keys())
Пример #4
0
    def test_predict_no_debug(self):
        outputs = {
            'default': [
                {
                    'name': 'a_name',
                    'variable': 'a_primitive#1.a_variable',
                    'type': 'a_type',
                },
                {
                    'name': 'b_name',
                    'variable': 'a_primitive#1.b_variable',
                    'type': 'b_type',
                },
            ]
        }
        mlpipeline = MLPipeline(['a_primitive'], outputs=outputs)
        mlpipeline.blocks['a_primitive#1'].produce_args = [{
            'name': 'input',
            'type': 'whatever'
        }]

        mlpipeline.blocks['a_primitive#1'].produce_output = [{
            'name': 'a_name',
            'type': 'a_type'
        }, {
            'name': 'b_name',
            'type': 'b_type'
        }]

        returned = mlpipeline.predict(debug=False)
        assert len(returned) == len(outputs["default"])
        for returned_output, expected_output in zip(returned,
                                                    outputs["default"]):
            assert returned_output == expected_output["variable"]
Пример #5
0
def run():

    print("============================================")
    print("Testing Multi Table Pipeline")
    print("============================================")

    orders = pd.read_csv("data/Retail/orders.csv")
    order_products = pd.read_csv("data/Retail/order_products.csv")
    label_times = pd.read_csv("data/Retail/label_times.csv")

    X_train = label_times.sample(frac=0.8)
    X_test = label_times.drop(X_train.index)
    y_train = X_train["label"]
    y_test = X_test["label"]

    entity_set = make_entity_set(orders, order_products)

    multitable = MLPipeline(['dfs', 'random_forest_classifier'])

    updated_hyperparam = MLHyperparam('max_depth', 'int', [1, 10])
    updated_hyperparam.block_name = 'dfs'
    # multitable.update_tunable_hyperparams([updated_hyperparam])

    # Check that the hyperparameters are correct.
    for hyperparam in multitable.get_tunable_hyperparams():
        print(hyperparam)

    # Check that the blocks are correct.
    expected_blocks = {'dfs', 'rf_classifier'}
    blocks = set(multitable.blocks.keys())
    assert expected_blocks == blocks

    # Check that we can score properly.
    produce_params = {
        ('dfs', 'entityset'): entity_set,
        ('dfs', 'cutoff_time_in_index'): True
    }
    print("\nFitting pipeline...")
    fit_params = {
        ('dfs', 'entityset'): entity_set,
        ('dfs', 'target_entity'): "users",
        ('dfs', 'training_window'): ft.Timedelta("60 days")
    }
    multitable.fit(X_train,
                   y_train,
                   fit_params=fit_params,
                   produce_params=produce_params)
    print("\nFit pipeline.")

    print("\nScoring pipeline...")
    predicted_y_val = multitable.predict(X_test, predict_params=produce_params)
    score = f1_score(predicted_y_val, y_test, average='micro')
    print("\nf1 micro score: %f" % score)

    return score
Пример #6
0
def run(train_size=160, test_size=40):

    print("============================================")
    print("Testing Audio Pipeline")
    print("============================================")

    # Data loading.
    classes = [
        'street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling',
        'drilling', 'dog_bark', 'children_playing', 'car_horn',
        'air_conditioner'
    ]

    labels = []
    all_filepaths = []
    for label_class in classes:
        for filepath in glob.glob(
                os.path.join('data/UrbanSound/data', label_class, '*.wav')):
            all_filepaths.append(filepath)
            labels.append(label_class)

    filepaths, filepaths_test, y, y_test = train_test_split(
        all_filepaths, labels, train_size=train_size, test_size=test_size)

    audio_pipeline = MLPipeline([
        'audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier'
    ])

    # Check that the hyperparameters are correct.
    for hyperparam in audio_pipeline.get_tunable_hyperparams():
        print(hyperparam)

    # Check that the blocks are correct.
    expected_blocks = {
        'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier'
    }
    blocks = set(audio_pipeline.blocks.keys())
    assert expected_blocks == blocks

    # Check that we can score properly.
    print("\nFitting pipeline...")
    X, sample_freqs = load_and_segment(filepaths)
    produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs}
    audio_pipeline.fit(X, y, produce_params=produce_params)
    print("\nFit pipeline.")

    print("\nScoring pipeline...")
    X_test, sample_freqs_test = load_and_segment(filepaths_test)
    predict_params = {('audio_featurizer', 'sample_freqs'): sample_freqs_test}
    predicted_y_val = audio_pipeline.predict(X_test, predict_params)
    score = f1_score(predicted_y_val, y_test, average='micro')
    print("\nf1 micro score: %f" % score)

    return score