Exemplo n.º 1
0
def create_test_case_invalid_step_choosen():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()

    return NeuraxleTestCase(pipeline=Pipeline([
        ChooseOneOrManyStepsOf([
            ('a',
             TransformCallbackStep(a_callback,
                                   transform_function=lambda di: di * 2)),
            ('b',
             TransformCallbackStep(b_callback,
                                   transform_function=lambda di: di * 2))
        ]),
    ]),
                            callbacks=[a_callback, b_callback],
                            expected_callbacks_data=[DATA_INPUTS, DATA_INPUTS],
                            hyperparams={
                                'ChooseOneOrManyStepsOf__c__enabled': True,
                                'ChooseOneOrManyStepsOf__b__enabled': False
                            },
                            hyperparams_space={
                                'ChooseOneOrManyStepsOf__a__enabled':
                                Boolean(),
                                'ChooseOneOrManyStepsOf__b__enabled':
                                Boolean()
                            },
                            expected_processed_outputs=np.array(
                                [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
Exemplo n.º 2
0
def create_test_case_fit_multiple_steps_choosen():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()
    c_callback = TapeCallbackFunction()
    d_callback = TapeCallbackFunction()

    return NeuraxleTestCase(
        pipeline=Pipeline([
            ChooseOneOrManyStepsOf([
                ('a', FitTransformCallbackStep(a_callback, c_callback, transform_function=lambda di: di * 2)),
                ('b', FitTransformCallbackStep(b_callback, d_callback, transform_function=lambda di: di * 2))
            ]),
        ]),
        callbacks=[a_callback, c_callback, b_callback, d_callback],
        expected_callbacks_data=[
            [],
            (DATA_INPUTS, EXPECTED_OUTPUTS),
            [],
            (DATA_INPUTS, EXPECTED_OUTPUTS)
        ],
        hyperparams={
            'ChooseOneOrManyStepsOf__a__enabled': True,
            'ChooseOneOrManyStepsOf__b__enabled': True
        },
        hyperparams_space={
            'ChooseOneOrManyStepsOf__a__enabled': Boolean(),
            'ChooseOneOrManyStepsOf__b__enabled': Boolean()
        },
        expected_processed_outputs=np.array([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18])
    )
Exemplo n.º 3
0
def test_pipeline_nested_mutate_inverse_transform_without_identities():
    """
    This test was required for a strange bug at the border of the pipelines
    that happened when the identities were not used.
    """
    expected_tape = [
        "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"
    ]
    tape = TapeCallbackFunction()

    p = Pipeline([
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating, inversing, and calling each inverse_transform]")
    reversed(p).transform(np.ones(
        (1, 1)
    ))  # will add reversed(range(1, 8)) to tape, calling inverse_transforms.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Exemplo n.º 4
0
def test_pipeline_nested_mutate_inverse_transform():
    expected_tape = [
        "1", "2", "3", "4", "5", "6", "7", "7", "6", "5", "4", "3", "2", "1"
    ]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        Pipeline([
            Identity(),
            TransformCallbackStep(tape.callback, ["3"]),
            TransformCallbackStep(tape.callback, ["4"]),
            TransformCallbackStep(tape.callback, ["5"]),
            Identity()
        ]),
        TransformCallbackStep(tape.callback, ["6"]),
        TransformCallbackStep(tape.callback, ["7"]),
        Identity()
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))  # will add range(1, 8) to tape.

    print("[mutating]")
    p = p.mutate(new_method="inverse_transform",
                 method_to_assign_to="transform")

    p.transform(np.ones((1, 1)))  # will add reversed(range(1, 8)) to tape.

    print(expected_tape)
    print(tape.get_name_tape())
    assert expected_tape == tape.get_name_tape()
Exemplo n.º 5
0
def test_when_hyperparams_and_saved_same_pipeline_should_load_checkpoint_pickle(tmpdir: LocalPath):
    # Given
    tape = TapeCallbackFunction()

    # When
    pipeline_save = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=DefaultCheckpoint(),
        tape=TapeCallbackFunction(),
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1})
    )
    pipeline_save.fit_transform(data_inputs, expected_outputs)

    pipeline_load = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=DefaultCheckpoint(),
        tape=tape,
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1})
    )
    pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs)

    # Then
    actual_tape = tape.get_name_tape()
    assert np.array_equal(actual_data_inputs, data_inputs)
    assert actual_tape == EXPECTED_TAPE_AFTER_CHECKPOINT
Exemplo n.º 6
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Exemplo n.º 7
0
def test_when_hyperparams_and_saved_no_pipeline_should_not_load_checkpoint_pickle(tmpdir: LocalPath):
    # Given
    tape = TapeCallbackFunction()
    pickle_checkpoint_step = DefaultCheckpoint()

    # When
    pipeline_save = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=Identity(),
        tape=TapeCallbackFunction(),
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1}),
        different=True,
        save_pipeline=False
    )
    pipeline_save.fit_transform(data_inputs, expected_outputs)

    pipeline_load = create_pipeline(
        tmpdir=tmpdir,
        pickle_checkpoint_step=pickle_checkpoint_step,
        tape=tape,
        hyperparameters=HyperparameterSamples({"a__learning_rate": 1})
    )
    pipeline_load, actual_data_inputs = pipeline_load.fit_transform(data_inputs, expected_outputs)

    # Then
    actual_tape = tape.get_name_tape()
    assert np.array_equal(actual_data_inputs, data_inputs)
    assert actual_tape == ["1", "2", "3"]
def test_validation_split_wrapper_should_split_data(tmpdir):
    transform_callback = TapeCallbackFunction()
    fit_callback = TapeCallbackFunction()
    random_search = RandomSearch(
        ValidationSplitWrapper(FitTransformCallbackStep(
            transform_callback_function=transform_callback,
            fit_callback_function=fit_callback,
            transform_function=lambda di: di * 2),
                               test_size=0.1))
    data_inputs = np.random.randint(1, 100, (100, 5))
    expected_outputs = np.random.randint(1, 100, (100, 5))

    random_search, outputs = random_search.fit_transform(
        data_inputs, expected_outputs)

    assert np.array_equal(outputs, data_inputs * 2)

    # should fit on train split
    assert np.array_equal(fit_callback.data[0][0], data_inputs[0:90])
    assert np.array_equal(fit_callback.data[0][1], expected_outputs[0:90])

    # should transform on test split
    assert np.array_equal(transform_callback.data[0], data_inputs[0:90])
    assert np.array_equal(transform_callback.data[1], data_inputs[90:])

    # should transform on all data at the end
    assert np.array_equal(transform_callback.data[2], data_inputs)

    assert random_search.best_model.scores_train is not None
    assert random_search.best_model.scores_validation is not None
    assert random_search.best_model.scores_train_mean is not None
    assert random_search.best_model.scores_validation_mean is not None
    assert random_search.best_model.scores_train_std is not None
    assert random_search.best_model.scores_validation_std is not None
Exemplo n.º 9
0
def choose_one_step_single_step_chosen_transform():
    a_callback = TapeCallbackFunction()
    b_callback = TapeCallbackFunction()
    c_callback = TapeCallbackFunction()
    d_callback = TapeCallbackFunction()

    return NeuraxleTestCase(pipeline=Pipeline([
        ChooseOneStepOf([
            ('a',
             FitTransformCallbackStep(a_callback,
                                      c_callback,
                                      transform_function=lambda di: di * 2)),
            ('b',
             FitTransformCallbackStep(b_callback,
                                      d_callback,
                                      transform_function=lambda di: di * 2))
        ]),
    ]),
                            callbacks=[
                                a_callback, c_callback, b_callback, d_callback
                            ],
                            expected_callbacks_data=[DATA_INPUTS, [], [], []],
                            hyperparams={
                                'ChooseOneOrManyStepsOf__choice': 'a'
                            },
                            expected_processed_outputs=np.array(
                                [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]))
def test_when_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath):
    tape = TapeCallbackFunction()
    pickle_checkpoint_step = DefaultCheckpoint()
    pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape,
                               HyperparameterSamples({"a__learning_rate": 1}))

    pipeline, actual_data_inputs = pipeline.fit_transform(
        data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert np.array_equal(actual_data_inputs, data_inputs)
    assert actual_tape == ["1", "2", "3"]

    assert os.path.exists(
        os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di',
                     '44f9d6dd8b6ccae571ca04525c3eaffa.pickle'))
    assert os.path.exists(
        os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di',
                     '898a67b2f5eeae6393ca4b3162ba8e3d.pickle'))
    assert os.path.exists(
        os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo',
                     '44f9d6dd8b6ccae571ca04525c3eaffa.pickle'))
    assert os.path.exists(
        os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo',
                     '898a67b2f5eeae6393ca4b3162ba8e3d.pickle'))
Exemplo n.º 11
0
def test_transform_should_transform_all_steps_for_each_data_inputs_expected_outputs():
    tape = TapeCallbackFunction()
    p = Pipeline([
        ForEachDataInput(Pipeline([
            TransformCallbackStep(tape.callback, ["1"]),
            TransformCallbackStep(tape.callback, ["2"]),
        ]))
    ])
    data_inputs = [[0, 1], [1, 2]]

    outputs = p.transform(data_inputs)

    assert tape.get_name_tape() == ["1", "2", "1", "2"]
Exemplo n.º 12
0
def create_checkpoint_test_case(tmpdir):
    tape_transform_1 = TapeCallbackFunction()
    tape_fit_1 = TapeCallbackFunction()
    tape_transform_2 = TapeCallbackFunction()
    tape_fit_2 = TapeCallbackFunction()
    pipeline = ResumablePipeline(
        [('step1', FitTransformCallbackStep(tape_transform_1, tape_fit_1)),
         ('checkpoint', DefaultCheckpoint()),
         ('step2', FitTransformCallbackStep(tape_transform_2, tape_fit_2))],
        cache_folder=tmpdir)

    return CheckpointTest(tape_transform_1, tape_fit_1, tape_transform_2,
                          tape_fit_2, pipeline)
Exemplo n.º 13
0
def test_predict_should_predict_in_test_mode():
    tape_fit = TapeCallbackFunction()
    tape_transform = TapeCallbackFunction()
    p = Pipeline([
        TestOnlyWrapper(
            CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
        TrainOnlyWrapper(
            CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
    ])

    outputs = p.predict(np.array([1, 1]))

    assert np.array_equal(outputs, np.array([2, 2]))
Exemplo n.º 14
0
def test_predict_should_transform_with_initial_is_train_mode_after_predict():
    tape_fit = TapeCallbackFunction()
    tape_transform = TapeCallbackFunction()
    p = Pipeline([
        TestOnlyWrapper(
            CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
        TrainOnlyWrapper(
            CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
    ])

    p.predict(np.array([1, 1]))
    outputs = p.transform(np.array([1, 1]))

    assert np.array_equal(outputs, np.array([4, 4]))
Exemplo n.º 15
0
def test_when_no_hyperparams_should_save_checkpoint_pickle(tmpdir: LocalPath):
    tape = TapeCallbackFunction()
    pickle_checkpoint_step = DefaultCheckpoint()
    pipeline = create_pipeline(tmpdir, pickle_checkpoint_step, tape)

    pipeline, actual_data_inputs = pipeline.fit_transform(data_inputs, expected_outputs)

    actual_tape = tape.get_name_tape()
    assert np.array_equal(actual_data_inputs, data_inputs)
    assert actual_tape == ["1", "2", "3"]
    assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '0.pickle'))
    assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'di', '1.pickle'))
    assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '0.pickle'))
    assert os.path.exists(os.path.join(tmpdir, 'ResumablePipeline', 'pickle_checkpoint', 'eo', '1.pickle'))
Exemplo n.º 16
0
def test_handle_predict_should_predict_in_test_mode():
    tape_fit = TapeCallbackFunction()
    tape_transform = TapeCallbackFunction()
    p = Pipeline([
        TestOnlyWrapper(
            CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
        TrainOnlyWrapper(
            CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
    ])

    data_container = p.handle_predict(data_container=DataContainer(
        data_inputs=np.array([1, 1]), expected_outputs=np.array([1, 1])),
                                      context=ExecutionContext())

    assert np.array_equal(data_container.data_inputs, np.array([2, 2]))
Exemplo n.º 17
0
def test_fit_transform_should_fit_then_use_cache(tmpdir):
    tape_transform = TapeCallbackFunction()
    tape_fit = TapeCallbackFunction()
    p = Pipeline([
        JoblibValueCachingWrapper(
            LogFitTransformCallbackStep(tape_transform,
                                        tape_fit,
                                        transform_function=np.log), tmpdir)
    ])

    p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4])

    assert outputs == EXPECTED_OUTPUTS
    assert tape_transform.data == [[1], [2]]
    assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
Exemplo n.º 18
0
def test_step_cloner_should_fit_transform():
    # Given
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))

    # When
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    # Then
    assert isinstance(p.steps[0], Pipeline)
    assert np.array_equal(p.steps[0][0].callback_function.data[0][0],
                          data_inputs[0])
    assert np.array_equal(p.steps[0][0].callback_function.data[0][1],
                          expected_outputs[0])

    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(p.steps[1][0].callback_function.data[0][0],
                          data_inputs[1])
    assert np.array_equal(p.steps[1][0].callback_function.data[0][1],
                          expected_outputs[1])

    assert np.array_equal(processed_outputs, data_inputs * 2)
Exemplo n.º 19
0
def test_should_flush_cache_on_every_fit(tmpdir):
    tape_transform = TapeCallbackFunction()
    tape_fit = TapeCallbackFunction()
    wrapper = JoblibValueCachingWrapper(LogFitTransformCallbackStep(
        tape_transform, tape_fit, transform_function=np.log),
                                        cache_folder=tmpdir)
    p = Pipeline([wrapper])
    wrapper.create_checkpoint_path()
    wrapper.write_cache(1, 10)
    wrapper.write_cache(2, 20)

    p, outputs = p.fit_transform([1, 1, 2, 2], [2, 2, 4, 4])

    assert outputs == EXPECTED_OUTPUTS
    assert tape_transform.data == [[1], [2]]
    assert tape_fit.data == [([1, 1, 2, 2], [2, 2, 4, 4])]
Exemplo n.º 20
0
def test_data_shuffling_should_shuffle_data_inputs_and_expected_outputs():
    callback_fit = TapeCallbackFunction()
    callback_transform = TapeCallbackFunction()
    data_shuffler = Pipeline([
        DataShuffler(seed=42, increment_seed_after_each_fit=True),
        FitTransformCallbackStep(callback_transform, callback_fit)
    ])
    data_inputs = np.array(range(10))
    expected_outputs = np.array(range(10, 20))

    outputs = data_shuffler.fit_transform(data_inputs, expected_outputs)

    assert not np.array_equal(outputs, data_inputs)
    assert not np.array_equal(callback_fit.data[0][0], data_inputs)
    assert not np.array_equal(callback_fit.data[0][1], expected_outputs)
    assert not np.array_equal(callback_transform.data, data_inputs)
Exemplo n.º 21
0
def create_callback_step(tape_step_name, hyperparams):
    step = (tape_step_name,
            TransformCallbackStepWithMockHasher(
                callback_function=TapeCallbackFunction().callback,
                more_arguments=[tape_step_name],
                hyperparams=HyperparameterSamples(hyperparams)))
    return step
Exemplo n.º 22
0
def test_fit_for_each_should_fit_all_steps_for_each_data_inputs_expected_outputs():
    tape = TapeCallbackFunction()
    p = Pipeline([
        ForEachDataInput(Pipeline([
            FitCallbackStep(tape.callback, ["1"]),
            FitCallbackStep(tape.callback, ["2"]),
        ]))
    ])
    data_inputs = [[0, 1], [1, 2]]
    expected_outputs = [[2, 3], [4, 5]]

    p = p.fit(data_inputs, expected_outputs)

    assert isinstance(p, Pipeline)
    assert tape.get_name_tape() == ["1", "2", "1", "2"]
    assert tape.data == [([0, 1], [2, 3]), ([0, 1], [2, 3]), ([1, 2], [4, 5]), ([1, 2], [4, 5])]
Exemplo n.º 23
0
def test_input_and_output_transformer_wrapper_should_fit_with_data_inputs_and_expected_outputs_as_data_inputs(
):
    tape = TapeCallbackFunction()
    p = InputAndOutputTransformerWrapper(FitCallbackStep(tape))
    data_inputs, expected_outputs = _create_data_source((10, 10))

    p.fit(data_inputs, expected_outputs)

    assert np.array_equal(tape.data[0][0][0], data_inputs)
    assert np.array_equal(tape.data[0][0][1], expected_outputs)
def test_step_cloner_should_save_sub_steps(tmpdir):
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(Pipeline(
        [FitCallbackStep(tape), MultiplyByN(2)]),
                                   cache_folder_when_no_handle=tmpdir)
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.save(ExecutionContext(tmpdir), full_dump=True)

    saved_paths = [
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(tmpdir,
                     'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib')
    ]

    for p in saved_paths:
        assert os.path.exists(p)
Exemplo n.º 25
0
def test_step_cloner_should_transform():
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(
        Pipeline([FitCallbackStep(tape), MultiplyByN(2)]))
    data_inputs = _create_data((2, 2))

    processed_outputs = p.transform(data_inputs)

    assert isinstance(p.steps[0], Pipeline)
    assert isinstance(p.steps[1], Pipeline)
    assert np.array_equal(processed_outputs, data_inputs * 2)
def test_minibatch_sequential_pipeline_change_batch_size_works():
    tape1 = TapeCallbackFunction()
    tape1_fit = TapeCallbackFunction()
    tape2 = TapeCallbackFunction()
    tape2_fit = TapeCallbackFunction()

    p = MiniBatchSequentialPipeline([
        MultiplyBy2FitTransformCallbackStep(tape1, tape1_fit, ["1"]),
        Joiner(batch_size=10),
        MultiplyBy2FitTransformCallbackStep(tape2, tape2_fit, ["2"]),
        Joiner(batch_size=10)
    ])

    # When
    p, outputs = p.fit_transform(list(range(20)), list(range(20)))
    p.set_batch_size(5)
    p, outputs = p.fit_transform(list(range(20, 30)), list(range(20, 30)))

    # Then

    assert tape1.data == [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
                          [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]]
    assert tape1_fit.data == [([0, 1, 2, 3, 4, 5, 6, 7, 8,
                                9], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                              ([10, 11, 12, 13, 14, 15, 16, 17, 18,
                                19], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
                              ([20, 21, 22, 23, 24], [20, 21, 22, 23, 24]),
                              ([25, 26, 27, 28, 29], [25, 26, 27, 28, 29])]
    assert tape1.name_tape == ["1", "1", "1", "1"]

    assert tape2.data == [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18],
                          [20, 22, 24, 26, 28, 30, 32, 34, 36, 38],
                          [40, 42, 44, 46, 48], [50, 52, 54, 56, 58]]
    assert tape2_fit.data == [([0, 2, 4, 6, 8, 10, 12, 14, 16,
                                18], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
                              ([20, 22, 24, 26, 28, 30, 32, 34, 36,
                                38], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]),
                              ([40, 42, 44, 46, 48], [20, 21, 22, 23, 24]),
                              ([50, 52, 54, 56, 58], [25, 26, 27, 28, 29])]
    assert tape2.name_tape == ["2", "2", "2", "2"]
Exemplo n.º 27
0
def test_pipeline_simple_mutate_inverse_transform():
    expected_tape = ["1", "2", "3", "4", "4", "3", "2", "1"]
    tape = TapeCallbackFunction()

    p = Pipeline([
        Identity(),
        TransformCallbackStep(tape.callback, ["1"]),
        TransformCallbackStep(tape.callback, ["2"]),
        TransformCallbackStep(tape.callback, ["3"]),
        TransformCallbackStep(tape.callback, ["4"]),
        Identity()
    ])

    p, _ = p.fit_transform(np.ones((1, 1)))

    print("[mutating]")
    p = p.mutate(new_method="inverse_transform", method_to_assign_to="transform")

    p.transform(np.ones((1, 1)))

    assert expected_tape == tape.get_name_tape()
def test_mini_batch_sequential_pipeline_should_transform_steps_sequentially_for_each_barrier_for_each_batch(
):
    # Given
    tape1 = TapeCallbackFunction()
    tape2 = TapeCallbackFunction()
    tape3 = TapeCallbackFunction()
    tape4 = TapeCallbackFunction()
    p = MiniBatchSequentialPipeline([
        MultiplyBy2TransformCallbackStep(tape1, ["1"]),
        MultiplyBy2TransformCallbackStep(tape2, ["2"]),
        Joiner(batch_size=10),
        MultiplyBy2TransformCallbackStep(tape3, ["3"]),
        MultiplyBy2TransformCallbackStep(tape4, ["4"]),
        Joiner(batch_size=10)
    ])

    # When
    outputs = p.transform(list(range(20)))

    # Then
    assert outputs == [
        0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
        256, 272, 288, 304
    ]

    assert tape1.data == [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
                          [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]
    assert tape1.name_tape == ["1", "1"]

    assert tape2.data == [[0, 2, 4, 6, 8, 10, 12, 14, 16, 18],
                          [20, 22, 24, 26, 28, 30, 32, 34, 36, 38]]
    assert tape2.name_tape == ["2", "2"]

    assert tape3.data == [[0, 4, 8, 12, 16, 20, 24, 28, 32, 36],
                          [40, 44, 48, 52, 56, 60, 64, 68, 72, 76]]
    assert tape3.name_tape == ["3", "3"]

    assert tape4.data == [[0, 8, 16, 24, 32, 40, 48, 56, 64, 72],
                          [80, 88, 96, 104, 112, 120, 128, 136, 144, 152]]
    assert tape4.name_tape == ["4", "4"]
Exemplo n.º 29
0
def test_expand_dim_fit():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])

    p = p.fit(np.array(range(10)), np.array(range(10)))

    assert handle_transform_callback.data == []
    assert handle_fit_transform_callback.data == []
    assert handle_fit_callback.data[0][0].current_ids == [
        '781e5e245d69b566979b86e28d23f2c7'
    ]
    assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs),
                          np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))
Exemplo n.º 30
0
def test_expand_dim_fit():
    handle_fit_callback = TapeCallbackFunction()
    handle_transform_callback = TapeCallbackFunction()
    handle_fit_transform_callback = TapeCallbackFunction()
    p = Pipeline([
        ExpandDim(
            HandleCallbackStep(handle_fit_callback, handle_transform_callback,
                               handle_fit_transform_callback))
    ])
    p['ExpandDim'].hashers = [SomeSummaryHasher(fake_summary_id=SUMMARY_ID)]

    p = p.fit(np.array(range(10)), np.array(range(10)))

    assert handle_transform_callback.data == []
    assert handle_fit_transform_callback.data == []
    assert handle_fit_callback.data[0][0].current_ids == [SUMMARY_ID]
    assert handle_fit_callback.data[0][0].summary_id == SUMMARY_ID
    assert np.array_equal(np.array(handle_fit_callback.data[0][0].data_inputs),
                          np.array([np.array(range(10))]))
    assert np.array_equal(
        np.array(handle_fit_callback.data[0][0].expected_outputs),
        np.array([np.array(range(10))]))