Exemplo n.º 1
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Exemplo n.º 2
0
def test_queued_pipeline_saving(tmpdir):
    # Given
    p = ParallelQueuedFeatureUnion([
        ('1', FitTransformCallbackStep()),
        ('2', FitTransformCallbackStep()),
        ('3', FitTransformCallbackStep()),
        ('4', FitTransformCallbackStep()),
    ], n_workers_per_step=1, max_queue_size=10, batch_size=10)

    # When
    p, outputs = p.fit_transform(list(range(100)), list(range(100)))
    p.save(ExecutionContext(tmpdir))
    p.apply('clear_callbacks')

    # Then

    assert len(p[0].wrapped.transform_callback_function.data) == 0
    assert len(p[0].wrapped.fit_callback_function.data) == 0
    assert len(p[1].wrapped.transform_callback_function.data) == 0
    assert len(p[1].wrapped.fit_callback_function.data) == 0
    assert len(p[2].wrapped.transform_callback_function.data) == 0
    assert len(p[2].wrapped.fit_callback_function.data) == 0
    assert len(p[3].wrapped.transform_callback_function.data) == 0
    assert len(p[3].wrapped.fit_callback_function.data) == 0

    p = p.load(ExecutionContext(tmpdir))

    assert len(p[0].wrapped.transform_callback_function.data) == 10
    assert len(p[0].wrapped.fit_callback_function.data) == 10
    assert len(p[1].wrapped.transform_callback_function.data) == 10
    assert len(p[1].wrapped.fit_callback_function.data) == 10
    assert len(p[2].wrapped.transform_callback_function.data) == 10
    assert len(p[2].wrapped.fit_callback_function.data) == 10
    assert len(p[3].wrapped.transform_callback_function.data) == 10
    assert len(p[3].wrapped.fit_callback_function.data) == 10
def test_step_with_context_should_only_save_wrapped_step(tmpdir):
    context = ExecutionContext(root=tmpdir)
    service = SomeService()
    context.set_service_locator({BaseService: service})
    p = Pipeline([SomeStep().assert_has_services(BaseService)
                  ]).with_context(context=context)

    p.save(context, full_dump=True)

    p: Pipeline = ExecutionContext(root=tmpdir).load(
        os.path.join('StepWithContext', 'Pipeline'))
    assert isinstance(p, Pipeline)
def test_tensorflowv2_saver(tmpdir):
    dataset = toy_dataset()
    model = Pipeline([create_model_step(tmpdir)])
    loss_first_fit = evaluate_model_on_dataset(model, dataset)

    model.save(ExecutionContext(root=tmpdir))

    loaded = Pipeline([create_model_step(tmpdir)
                       ]).load(ExecutionContext(root=tmpdir))
    loss_second_fit = evaluate_model_on_dataset(loaded, dataset)

    assert loss_second_fit < (loss_first_fit / 2)
Exemplo n.º 5
0
    def fit_data_container(self, data_container):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(self.cache_folder, ExecutionMode.FIT)
        context = context.push(self)
        new_self = self._fit_data_container(data_container, context)

        return new_self
Exemplo n.º 6
0
    def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial:
        """
        Train pipeline using the validation splitter.
        Track training, and validation metrics for each epoch.
        Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. 
Refer to `execute_trial` for full flexibility

        :param pipeline: pipeline to train on
        :param data_inputs: data inputs
        :param expected_outputs: expected ouptuts to fit on
        :return: executed trial
        """
        validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container(
            DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs)
        )

        repo_trial: Trial = Trial(
            pipeline=pipeline,
            hyperparams=pipeline.get_hyperparams(),
            main_metric_name=self.get_main_metric_name()
        )

        self.execute_trial(
            pipeline=pipeline,
            trial_number=1,
            repo_trial=repo_trial,
            context=ExecutionContext(),
            validation_splits=validation_splits,
            n_trial=1,
            delete_pipeline_on_completion=False
        )

        return repo_trial
Exemplo n.º 7
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
Exemplo n.º 8
0
def test_validation_splitter_should_split_data_properly():
    # Given
    data_inputs = np.random.random((4, 2, 2048, 6)).astype(np.float32)
    expected_outputs = np.random.random((4, 2, 2048, 1)).astype(np.float32)
    splitter = ValidationSplitter(test_size=0.2)

    # When
    validation_splits = splitter.split_data_container(
        data_container=DataContainer(data_inputs=data_inputs,
                                     expected_outputs=expected_outputs),
        context=ExecutionContext())
    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(
        validation_splits)

    train_di = train_di[0]
    train_eo = train_eo[0]

    validation_di = validation_di[0]
    validation_eo = validation_eo[0]

    # Then
    assert len(train_di) == 3
    assert np.array_equal(np.array(train_di), data_inputs[0:3])
    assert len(train_eo) == 3
    assert np.array_equal(np.array(train_eo), expected_outputs[0:3])

    assert len(validation_di) == 1
    assert np.array_equal(validation_di[0], data_inputs[-1])
    assert len(validation_eo) == 1
    assert np.array_equal(validation_eo[0], expected_outputs[-1])
Exemplo n.º 9
0
def test_kfold_cross_validation_should_split_data_properly_bug():
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = np.array([0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40])
    data_container = DataContainer(data_inputs=data_inputs,
                                   expected_outputs=expected_outputs)
    splitter = KFoldCrossValidationSplitter(k_fold=2)

    # When
    validation_splits = splitter.split_data_container(data_container,
                                                      ExecutionContext())

    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(
        validation_splits)

    # Then
    assert len(train_di[0]) == 6
    assert np.array_equal(np.array(train_di[0]), data_inputs[5:])
    assert len(train_eo[0]) == 6
    assert np.array_equal(np.array(train_eo[0]), expected_outputs[5:])

    assert len(train_di[1]) == 5
    assert np.array_equal(np.array(train_di[1]), data_inputs[:5])
    assert len(train_eo[1]) == 5
    assert np.array_equal(np.array(train_eo[1]), expected_outputs[:5])

    assert len(validation_di[0]) == 5
    assert np.array_equal(np.array(validation_di[0]), data_inputs[:5])
    assert len(validation_eo[0]) == 5
    assert np.array_equal(np.array(validation_eo[0]), expected_outputs[:5])

    assert len(validation_di[1]) == 6
    assert np.array_equal(np.array(validation_di[1]), data_inputs[5:])
    assert len(validation_eo[1]) == 6
    assert np.array_equal(validation_eo[1], expected_outputs[5:])
Exemplo n.º 10
0
def test_trainer_train():
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 4
    p = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        linear_model.LinearRegression()
    ])

    trainer: Trainer = Trainer(
        epochs=10,
        scoring_callback=ScoringCallback(mean_squared_error,
                                         higher_score_is_better=False),
        validation_splitter=ValidationSplitter(test_size=0.20))

    repo_trial: Trial = trainer.train(pipeline=p,
                                      data_inputs=data_inputs,
                                      expected_outputs=expected_outputs,
                                      context=ExecutionContext())

    trained_pipeline = repo_trial.get_trained_pipeline(split_number=0)

    outputs = trained_pipeline.transform(data_inputs)
    mse = mean_squared_error(expected_outputs, outputs)

    assert mse < 1
Exemplo n.º 11
0
def test_inner_concatenate_data_should_merge_1d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
    data_container_1d = DataContainer(data_inputs=data_inputs_1d,
                                      expected_outputs=expected_outputs_1d)
    data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('1d', data_container_1d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['1d'])])

    data_container = p.handle_transform(data_container, ExecutionContext())

    # Then
    broadcasted_data_inputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.data_inputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))
    broadcasted_expected_outputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.expected_outputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))

    assert np.array_equal(data_container.data_inputs[..., -1],
                          broadcasted_data_inputs_1d)
    assert np.array_equal(data_container.expected_outputs[..., -1],
                          broadcasted_expected_outputs_1d)

    assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                SHAPE_3D[2] + 1)
    assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                     SHAPE_3D[2] + 1)
Exemplo n.º 12
0
def test_inner_concatenate_data_should_merge_2d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container_3d = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('2d', data_container_2d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['2d'])])

    data_container_3d = p.handle_transform(data_container_3d,
                                           ExecutionContext())

    # Then
    assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                   SHAPE_3D[2] + 1)
    assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0],
                                                        SHAPE_3D[1],
                                                        SHAPE_3D[2] + 1)
    assert np.array_equal(data_container_3d.data_inputs[..., -1],
                          data_container_2d.data_inputs)
    assert np.array_equal(data_container_3d.expected_outputs[..., -1],
                          data_container_2d.expected_outputs)
Exemplo n.º 13
0
def test_step_with_context_saver(tmpdir):
    context = ExecutionContext(root=tmpdir)
    service = SomeService()
    pipeline_name = 'testname'
    context.set_service_locator({SomeBaseService: service})
    p = Pipeline([
        SomeStep().assert_has_services(SomeBaseService)
    ]).with_context(context=context)
    p.set_name(pipeline_name)
    p.save(context, full_dump=True)

    p: StepWithContext = ExecutionContext(root=tmpdir).load(pipeline_name)
    assert isinstance(p, StepWithContext)

    p: Pipeline = ExecutionContext(root=tmpdir).load(os.path.join(pipeline_name, 'Pipeline'))
    assert isinstance(p, Pipeline)
Exemplo n.º 14
0
    def fit_transform_data_container(
            self, data_container) -> Tuple['Pipeline', DataContainer]:
        context = ExecutionContext(root=self.cache_folder,
                                   execution_mode=ExecutionMode.FIT_TRANSFORM)
        new_self, data_container = self.handle_fit_transform(
            data_container, context)

        return new_self, data_container
def test_step_cloner_should_load_sub_steps(tmpdir):
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(Pipeline(
        [FitCallbackStep(tape), MultiplyByN(2)]),
                                   cache_folder_when_no_handle=tmpdir)
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.save(ExecutionContext(tmpdir), full_dump=True)

    loaded_step_cloner = ExecutionContext(tmpdir).load(
        'StepClonerForEachDataInput')
    assert isinstance(loaded_step_cloner.wrapped, Pipeline)
    assert len(loaded_step_cloner.steps_as_tuple) == len(data_inputs)
    assert isinstance(loaded_step_cloner.steps_as_tuple[0][1], Pipeline)
    assert isinstance(loaded_step_cloner.steps_as_tuple[1][1], Pipeline)
Exemplo n.º 16
0
def test_kfold_cross_validation_should_split_data_properly():
    # Given
    data_inputs = np.random.random((4, 2, 2048, 6)).astype(np.float32)
    expected_outputs = np.random.random((4, 2, 2048, 1)).astype(np.float32)
    splitter = KFoldCrossValidationSplitter(k_fold=4)

    # When
    validation_splits = splitter.split_data_container(
        data_container=DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs),
        context=ExecutionContext()
    )
    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(validation_splits)

    # Then
    assert len(train_di[0]) == 3
    assert np.array_equal(np.array(train_di[0]), data_inputs[1:])
    assert len(train_eo[0]) == 3
    assert np.array_equal(np.array(train_eo[0]), expected_outputs[1:])

    assert len(train_di[1]) == 3
    assert np.array_equal(np.array(train_di[1]),
                          np.concatenate((np.expand_dims(data_inputs[0], axis=0), data_inputs[2:]), axis=0))
    assert len(train_eo[1]) == 3
    assert np.array_equal(np.array(train_eo[1]),
                          np.concatenate((np.expand_dims(expected_outputs[0], axis=0), expected_outputs[2:]), axis=0))

    assert len(train_di[2]) == 3
    assert np.array_equal(np.array(train_di[2]),
                          np.concatenate((data_inputs[0:2], np.expand_dims(data_inputs[3], axis=0)), axis=0))
    assert len(train_eo[2]) == 3
    assert np.array_equal(np.array(train_eo[2]),
                          np.concatenate((expected_outputs[0:2], np.expand_dims(expected_outputs[3], axis=0)), axis=0))

    assert len(train_di[3]) == 3
    assert np.array_equal(np.array(train_di[3]), data_inputs[0:3])
    assert len(train_eo[3]) == 3
    assert np.array_equal(np.array(train_eo[3]), expected_outputs[0:3])

    assert len(validation_di[0]) == 1
    assert np.array_equal(validation_di[0][0], data_inputs[0])
    assert len(validation_eo[0]) == 1
    assert np.array_equal(validation_eo[0][0], expected_outputs[0])

    assert len(validation_di[1]) == 1
    assert np.array_equal(validation_di[1][0], data_inputs[1])
    assert len(validation_eo[1]) == 1
    assert np.array_equal(validation_eo[1][0], expected_outputs[1])

    assert len(validation_di[2]) == 1
    assert np.array_equal(validation_di[2][0], data_inputs[2])
    assert len(validation_eo[2]) == 1
    assert np.array_equal(validation_eo[2][0], expected_outputs[2])

    assert len(validation_di[3]) == 1
    assert np.array_equal(validation_di[3][0], data_inputs[3])
    assert len(validation_eo[3]) == 1
    assert np.array_equal(validation_eo[3][0], expected_outputs[3])
Exemplo n.º 17
0
    def fit_transform_data_container(self, data_container):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(root=self.cache_folder,
                                   execution_mode=ExecutionMode.FIT_TRANSFORM)
        context = context.push(self)
        new_self, data_container = self._fit_transform_data_container(
            data_container, context)

        return new_self, data_container.data_inputs
Exemplo n.º 18
0
 def save_model(self):
     """
     Save fitted model in the trial hash folder.
     """
     hyperparams = self.hyperparams.to_flat_as_dict_primitive()
     trial_hash = self._get_trial_hash(hyperparams)
     self.pipeline.set_name(trial_hash).save(ExecutionContext(
         self.cache_folder),
                                             full_dump=True)
Exemplo n.º 19
0
def test_handle_predict_should_handle_transform_with_initial_is_train_mode_after_predict(
):
    tape_fit = TapeCallbackFunction()
    tape_transform = TapeCallbackFunction()
    p = Pipeline([
        TestOnlyWrapper(
            CallbackWrapper(MultiplyByN(2), tape_transform, tape_fit)),
        TrainOnlyWrapper(
            CallbackWrapper(MultiplyByN(4), tape_transform, tape_fit))
    ])
    data_container = DataContainer(data_inputs=np.array([1, 1]),
                                   expected_outputs=np.array([1, 1]))

    p.handle_predict(data_container=data_container.copy(),
                     context=ExecutionContext())
    data_container = p.handle_transform(data_container, ExecutionContext())

    assert np.array_equal(data_container.data_inputs, np.array([4, 4]))
Exemplo n.º 20
0
    def transform_data_container(self, data_container: DataContainer):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(root=self.cache_folder,
                                   execution_mode=ExecutionMode.TRANSFORM)
        context = context.push(self)
        data_container = self._transform_data_container(
            data_container, context)

        return data_container.data_inputs
Exemplo n.º 21
0
def test_input_and_output_transformer_wrapper_should_not_return_a_different_amount_of_data_inputs_and_expected_outputs(
):
    with pytest.raises(AssertionError):
        p = InputAndOutputTransformerWrapper(ChangeLenDataInputs())
        data_inputs, expected_outputs = _create_data_source((10, 10))

        p.handle_transform(
            DataContainer(data_inputs=data_inputs,
                          expected_outputs=expected_outputs),
            ExecutionContext())
Exemplo n.º 22
0
def test_add_service_assertions_should_fail_when_services_are_missing(tmpdir):
    with pytest.raises(AssertionError) as exception_info:
        context = ExecutionContext(root=tmpdir)
        p = Pipeline([SomeStep().assert_has_services(BaseService)
                      ]).with_context(context=context)
        data_inputs = np.array([0, 1, 2, 3])

        p.transform(data_inputs=data_inputs)

    assert 'BaseService dependency missing' in exception_info.value.args[0]
Exemplo n.º 23
0
    def get_model(self, label: str) -> BaseStep:
        """
        Load model in the trial hash folder.
        """
        assert self.cache_folder is not None

        hyperparams = self.hyperparams.to_flat_dict()
        trial_hash = self._get_trial_hash(hyperparams)
        path = os.path.join(self.cache_folder, label)
        return ExecutionContext(path).load(trial_hash)
Exemplo n.º 24
0
def _run(tmpdir, phase, expected):
    context = ExecutionContext(root=tmpdir, execution_phase=phase)
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

    some_step = SomeStep()
    p = IfExecutionPhaseIsThenDo(ExecutionPhase.TRAIN, some_step)
    p = p.with_context(context)

    p.fit_transform(data_inputs)
    assert some_step.did_process is expected
def test_tensorflowv1_saver(tmpdir):
    data_inputs = np.array([
        3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167, 7.042,
        10.791, 5.313, 7.997, 5.654, 9.27, 3.1
    ])
    expected_ouptuts = np.array([
        1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221, 2.827,
        3.465, 1.65, 2.904, 2.42, 2.94, 1.3
    ])
    model = Pipeline([create_model_step()])

    for i in range(50):
        model, outputs = model.fit_transform(data_inputs, expected_ouptuts)

    model.save(ExecutionContext(root=tmpdir))

    model = Pipeline([create_model_step()]).load(ExecutionContext(root=tmpdir))
    model, outputs = model.fit_transform(data_inputs, expected_ouptuts)
    assert ((outputs - expected_ouptuts)**2).mean() < 0.25
def test_step_cloner_should_save_sub_steps(tmpdir):
    tape = TapeCallbackFunction()
    p = StepClonerForEachDataInput(Pipeline(
        [FitCallbackStep(tape), MultiplyByN(2)]),
                                   cache_folder_when_no_handle=tmpdir)
    data_inputs = _create_data((2, 2))
    expected_outputs = _create_data((2, 2))
    p, processed_outputs = p.fit_transform(data_inputs, expected_outputs)

    p.save(ExecutionContext(tmpdir), full_dump=True)

    saved_paths = [
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[0]/Pipeline[0].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline[1]/Pipeline[1].joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/FitCallbackStep/FitCallbackStep.joblib'
        ),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/Pipeline/MultiplyByN/MultiplyByN.joblib'
        ),
        os.path.join(tmpdir,
                     'StepClonerForEachDataInput/Pipeline/Pipeline.joblib'),
        os.path.join(
            tmpdir,
            'StepClonerForEachDataInput/StepClonerForEachDataInput.joblib')
    ]

    for p in saved_paths:
        assert os.path.exists(p)
Exemplo n.º 27
0
def test_queued_pipeline_saving(tmpdir, use_processes, use_savers):
    # Given
    p = ParallelQueuedFeatureUnion([
        ('1', 4, 10, FitTransformCallbackStep()),
        ('2', 4, 10, FitTransformCallbackStep()),
        ('3', 4, 10, FitTransformCallbackStep()),
        ('4', 4, 10, FitTransformCallbackStep()),
    ],
                                   n_workers_per_step=4,
                                   max_queue_size=10,
                                   batch_size=10,
                                   use_processes=use_processes,
                                   use_savers=use_savers).with_context(
                                       ExecutionContext(tmpdir))

    # When
    p, _ = p.fit_transform(list(range(200)), list(range(200)))
    p = p.wrapped  # clear execution context wrapper
    p.save(ExecutionContext(tmpdir))
    p.apply('clear_callbacks')

    # Then

    assert len(p[0].wrapped.transform_callback_function.data) == 0
    assert len(p[0].wrapped.fit_callback_function.data) == 0
    assert len(p[1].wrapped.transform_callback_function.data) == 0
    assert len(p[1].wrapped.fit_callback_function.data) == 0
    assert len(p[2].wrapped.transform_callback_function.data) == 0
    assert len(p[2].wrapped.fit_callback_function.data) == 0
    assert len(p[3].wrapped.transform_callback_function.data) == 0
    assert len(p[3].wrapped.fit_callback_function.data) == 0

    p = p.load(ExecutionContext(tmpdir))

    assert len(p[0].wrapped.transform_callback_function.data) == 20
    assert len(p[0].wrapped.fit_callback_function.data) == 20
    assert len(p[1].wrapped.transform_callback_function.data) == 20
    assert len(p[1].wrapped.fit_callback_function.data) == 20
    assert len(p[2].wrapped.transform_callback_function.data) == 20
    assert len(p[2].wrapped.fit_callback_function.data) == 20
    assert len(p[3].wrapped.transform_callback_function.data) == 20
    assert len(p[3].wrapped.fit_callback_function.data) == 20
Exemplo n.º 28
0
def test_localassert_should_assert_dependencies_properly_at_exec(tmpdir):
    data_inputs = np.array([0, 1, 2, 3])
    context = ExecutionContext(root=tmpdir)
    p = Pipeline([
        RegisterServiceDynamically(),
        SomeStep().assert_has_services_at_execution(SomeBaseService)
    ]).with_context(context=context)

    p.transform(data_inputs=data_inputs)
    service = context.get_service(SomeBaseService)
    assert np.array_equal(service.data, data_inputs)
Exemplo n.º 29
0
def test_input_and_output_transformer_wrapper_should_raise_an_assertion_error_if_current_ids_have_not_been_resampled_correctly(
):
    with pytest.raises(AssertionError) as e:
        p = InputAndOutputTransformerWrapper(
            ChangeLenDataInputsAndExpectedOutputs())
        data_inputs, expected_outputs = _create_data_source((10, 10))

        p.handle_transform(
            DataContainer(data_inputs=data_inputs,
                          expected_outputs=expected_outputs),
            ExecutionContext())
Exemplo n.º 30
0
    def get_best_model(self):
        """
        Load the best model saved inside the best retrained model folder.

        :return:
        """
        hyperparams: HyperparameterSamples = self.get_best_hyperparams()
        trial_hash: str = self._get_trial_hash(HyperparameterSamples(hyperparams).to_flat_as_dict_primitive())
        p: BaseStep = ExecutionContext(str(self.best_retrained_model_folder)).load(trial_hash)

        return p