def test_pickle_checkpoint_step_should_load_data_container(tmpdir: LocalPath):
    initial_data_inputs = [1, 2]
    initial_expected_outputs = [2, 3]

    create_pipeline_output_transformer = lambda: ResumablePipeline([
        ('output_transformer_1', MultiplyBy2OutputTransformer()),
        ('pickle_checkpoint', DefaultCheckpoint()),
        ('output_transformer_2', MultiplyBy2OutputTransformer()),
    ],
                                                                   cache_folder
                                                                   =tmpdir)

    create_pipeline_output_transformer().fit_transform(
        data_inputs=initial_data_inputs,
        expected_outputs=initial_expected_outputs)
    transformer = create_pipeline_output_transformer()
    actual_data_container = transformer.handle_transform(
        DataContainer(current_ids=[0, 1],
                      data_inputs=initial_data_inputs,
                      expected_outputs=initial_expected_outputs),
        ExecutionContext.create_from_root(transformer, ExecutionMode.TRANSFORM,
                                          tmpdir))

    assert np.array_equal(actual_data_container.data_inputs, [4, 8])
    assert np.array_equal(actual_data_container.expected_outputs, [8, 12])
def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output(
        tmpdir):
    pipeline = Pipeline([MultiplyBy2OutputTransformer()])

    pipeline, new_data_container = pipeline.handle_fit_transform(
        DataContainer(current_ids=[0, 1, 2],
                      data_inputs=[1, 2, 3],
                      expected_outputs=[2, 3, 4]),
        ExecutionContext.create_from_root(pipeline,
                                          ExecutionMode.FIT_TRANSFORM, tmpdir))

    assert new_data_container.data_inputs == [2, 4, 6]
    assert new_data_container.expected_outputs == [4, 6, 8]
Exemplo n.º 3
0
    def transform(self, data_inputs: Any):
        """
        :param data_inputs: the data input to transform
        :return: transformed data inputs
        """
        data_container = DataContainer(current_ids=None,
                                       data_inputs=data_inputs)
        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        context = ExecutionContext.create_from_root(self,
                                                    ExecutionMode.TRANSFORM,
                                                    self.cache_folder)
        data_container = self.handle_transform(data_container, context)

        return data_container.data_inputs
Exemplo n.º 4
0
    def transform(self, data_inputs: Any):
        """
        After loading the last checkpoint, transform each pipeline steps

        :param data_inputs: the data input to transform
        :return: transformed data inputs
        """
        data_container = DataContainer(current_ids=None,
                                       data_inputs=data_inputs)
        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        context = ExecutionContext.create_from_root(self,
                                                    ExecutionMode.TRANSFORM,
                                                    self.cache_folder)

        data_container = self._transform_core(data_container, context)

        return data_container.data_inputs
Exemplo n.º 5
0
    def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline':
        """
        :param data_inputs: the data input to fit on
        :param expected_outputs: the expected data output to fit on
        :return: the pipeline itself
        """
        self.setup()

        data_container = DataContainer(current_ids=None,
                                       data_inputs=data_inputs,
                                       expected_outputs=expected_outputs)
        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        context = ExecutionContext.create_from_root(
            self, ExecutionMode.FIT_TRANSFORM, self.cache_folder)
        new_self, data_container = self.handle_fit(data_container, context)

        return new_self
Exemplo n.º 6
0
    def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline':
        """
        After loading the last checkpoint, fit each pipeline steps

        :param data_inputs: the data input to fit on
        :param expected_outputs: the expected data output to fit on
        :return: the pipeline itself
        """
        data_container = DataContainer(current_ids=None,
                                       data_inputs=data_inputs,
                                       expected_outputs=expected_outputs)
        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        context = ExecutionContext.create_from_root(self, ExecutionMode.FIT,
                                                    self.cache_folder)

        new_self, data_container = self._fit_core(data_container, context)

        return new_self