Exemplo n.º 1
0
def test_load_full_dump_from_path(tmpdir):
    # Given
    tape_fit_callback_function = TapeCallbackFunction()
    tape_transform_callback_function = TapeCallbackFunction()
    pipeline = Pipeline(
        [('step_a', Identity()),
         ('step_b',
          OutputTransformerWrapper(
              FitTransformCallbackStep(tape_fit_callback_function,
                                       tape_transform_callback_function)))],
        cache_folder=tmpdir).set_name(PIPELINE_NAME)

    # When
    pipeline, outputs = pipeline.fit_transform(DATA_INPUTS, EXPECTED_OUTPUTS)
    pipeline.save(ExecutionContext(tmpdir), full_dump=True)

    # Then
    loaded_pipeline = ExecutionContext(tmpdir).load(
        os.path.join(PIPELINE_NAME, 'step_b'))

    assert isinstance(loaded_pipeline, OutputTransformerWrapper)
    loaded_step_b_wrapped_step = loaded_pipeline.wrapped
    assert np.array_equal(
        loaded_step_b_wrapped_step.transform_callback_function.data[0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][0],
        EXPECTED_OUTPUTS)
    assert np.array_equal(
        loaded_step_b_wrapped_step.fit_callback_function.data[0][1],
        [None] * len(EXPECTED_OUTPUTS))
Exemplo n.º 2
0
    def _fit_transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> (
    'BaseStep', DataContainer):
        """
        Fit Transform given data inputs without splitting.

        :param context:
        :param data_container: DataContainer
        :type data_container: DataContainer
        :type context: ExecutionContext
        :return: outputs
        """
        train_data_container, validation_data_container = self.split_data_container(data_container)

        # add sub data container for the validation metrics calculated in MetricsWrapper
        train_data_container.add_sub_data_container(
            name=VALIDATION_SUB_DATA_CONTAINER_NAME,
            data_container=validation_data_container
        )

        self.wrapped, results_data_container = self.wrapped.handle_fit_transform(train_data_container,
                                                                                 context.push(self.wrapped))

        self._update_scores_train(results_data_container.data_inputs, results_data_container.expected_outputs)

        results_data_container = self.wrapped.handle_predict(validation_data_container, context.push(self.wrapped))

        self._update_scores_validation(results_data_container.data_inputs, results_data_container.expected_outputs)

        self.wrapped.apply('disable_metrics')
        data_container = self.wrapped.handle_predict(data_container, context.push(self.wrapped))
        self.wrapped.apply('enable_metrics')

        return self, data_container
Exemplo n.º 3
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseStep', DataContainer):
        """
        Fit Transform given data inputs without splitting.

        :param context:
        :param data_container: DataContainer
        :type data_container: DataContainer
        :type context: ExecutionContext
        :return: outputs
        """
        train_data_container, validation_data_container = self.split_data_container(
            data_container)

        self.wrapped, results_data_container = self.wrapped.handle_fit_transform(
            train_data_container, context.push(self.wrapped))

        self._update_scores_train(results_data_container.data_inputs,
                                  results_data_container.expected_outputs)

        results_data_container = self.wrapped.handle_predict(
            validation_data_container, context.push(self.wrapped))

        self._update_scores_validation(results_data_container.data_inputs,
                                       results_data_container.expected_outputs)

        self.wrapped.apply('disable_metrics')
        data_container = self.wrapped.handle_predict(
            data_container, context.push(self.wrapped))
        self.wrapped.apply('enable_metrics')

        return self, data_container
Exemplo n.º 4
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseStep', DataContainer):
        """
        According to the idiom of `(1, 2, reversed(1))`, we do this, in order:

            - `1`. Fit Transform preprocessing step
            - `2`. Fit Transform postprocessing step
            - `reversed(1)`. Inverse transform preprocessing step

        :param data_container: data container to transform
        :param context: execution context
        :return: (self, data_container)
        """
        self["preprocessing_step"], data_container = self[
            "preprocessing_step"].handle_fit_transform(
                data_container, context.push(self["preprocessing_step"]))
        self["postprocessing_step"], data_container = self[
            "postprocessing_step"].handle_fit_transform(
                data_container, context.push(self["postprocessing_step"]))

        data_container = self["preprocessing_step"].handle_inverse_transform(
            data_container, context.push(self["preprocessing_step"]))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return self, data_container
Exemplo n.º 5
0
    def handle_fit(self, data_container: DataContainer,
                   context: ExecutionContext):
        """
        Fit the parallel steps on the data. It will make use of some parallel processing.

        :param data_container: The input data to fit onto
        :param context: execution context
        :return: self
        """
        # Actually fit:
        if self.n_jobs != 1:
            fitted_steps_data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_fit)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            fitted_steps_data_containers = [
                step.handle_fit(data_container.copy(), context.push(step))
                for _, step in self.steps_as_tuple
            ]

        # Save fitted steps
        for i, (fitted_step, _) in enumerate(fitted_steps_data_containers):
            self.steps_as_tuple[i] = (self.steps_as_tuple[i][0], fitted_step)
        self._refresh_steps()

        return self, data_container
Exemplo n.º 6
0
    def save_checkpoint(self, data_container: DataContainer,
                        context: ExecutionContext) -> DataContainer:
        if self.is_for_execution_mode(context.get_execution_mode()):
            # TODO: save the context by execution mode AND data container ids / summary
            context.copy().save()

        return data_container
Exemplo n.º 7
0
    def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
        """
        According to the idiom of `(1, 2, reversed(1))`, we do this, in order:

            - `1`. Transform preprocessing step
            - `2`. Transform postprocessing step
            - `reversed(1)`. Inverse transform preprocessing step

        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: data_container
        :rtype: DataContainer
        """
        data_container = self["preprocessing_step"].handle_transform(data_container,
                                                                     context.push(self["preprocessing_step"]))
        data_container = self["postprocessing_step"].handle_transform(data_container,
                                                                      context.push(self["postprocessing_step"]))

        data_container = self["preprocessing_step"].handle_inverse_transform(data_container,
                                                                             context.push(self["preprocessing_step"]))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return data_container
Exemplo n.º 8
0
    def save_checkpoint(self, data_container: DataContainer,
                        context: ExecutionContext) -> DataContainer:
        """
        Save data container data inputs with :py:attr:`~data_input_checkpointer`.
        Save data container expected outputs with :py:attr:`~expected_output_checkpointer`.

        :param data_container: data container to checkpoint
        :type data_container: neuraxle.data_container.DataContainer
        :param context: execution context to checkpoint from
        :type context: ExecutionContext
        :return:
        """
        if not self.is_for_execution_mode(context.get_execution_mode()):
            return data_container

        context.mkdir()

        self.summary_checkpointer.save_summary(
            checkpoint_path=context.get_path(), data_container=data_container)

        for current_id, data_input, expected_output in data_container:
            self.data_input_checkpointer.save_checkpoint(
                checkpoint_path=self._get_data_input_checkpoint_path(context),
                current_id=current_id,
                data=data_input)

            self.expected_output_checkpointer.save_checkpoint(
                checkpoint_path=self._get_expected_output_checkpoint_path(
                    context),
                current_id=current_id,
                data=expected_output)

        return data_container
Exemplo n.º 9
0
    def should_resume(self, data_container: DataContainer,
                      context: ExecutionContext) -> bool:
        """
        Returns if the whole data container has been checkpointed.

        :param data_container: data container to read checkpoint for
        :type data_container: neuraxle.data_container.DataContainer
        :param context: execution context to read checkpoint from
        :type context: ExecutionContext
        :return: data container checkpoint
        :rtype: neuraxle.data_container.DataContainer
        """
        if not self.summary_checkpointer.checkpoint_exists(
                context.get_path(), data_container):
            return False

        current_ids = self.summary_checkpointer.read_summary(
            checkpoint_path=context.get_path(), data_container=data_container)

        for current_id in current_ids:
            if not self.data_input_checkpointer.checkpoint_exists(
                    checkpoint_path=self._get_data_input_checkpoint_path(
                        context),
                    current_id=current_id):
                return False

            if not self.expected_output_checkpointer.checkpoint_exists(
                    checkpoint_path=self._get_expected_output_checkpoint_path(
                        context),
                    current_id=current_id):
                return False

        return True
Exemplo n.º 10
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext):
        """
        Transform the data with the unions. It will make use of some parallel processing.

        :param data_container: data container
        :param context: execution context
        :return: the transformed data_inputs.
        """
        if self.n_jobs != 1:
            data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_transform)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            data_containers = [
                step.handle_transform(data_container.copy(),
                                      context.push(step))
                for _, step in self.steps_as_tuple
            ]

        new_current_ids = self.hash(data_container)

        data_container = self.joiner.handle_transform(data_containers,
                                                      new_current_ids)

        return data_container
Exemplo n.º 11
0
    def fit_data_container(self, data_container):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(self.cache_folder, ExecutionMode.FIT)
        context = context.push(self)
        new_self = self._fit_data_container(data_container, context)

        return new_self
Exemplo n.º 12
0
def test_queued_pipeline_saving(tmpdir):
    # Given
    p = ParallelQueuedFeatureUnion([
        ('1', FitTransformCallbackStep()),
        ('2', FitTransformCallbackStep()),
        ('3', FitTransformCallbackStep()),
        ('4', FitTransformCallbackStep()),
    ], n_workers_per_step=1, max_queue_size=10, batch_size=10)

    # When
    p, outputs = p.fit_transform(list(range(100)), list(range(100)))
    p.save(ExecutionContext(tmpdir))
    p.apply('clear_callbacks')

    # Then

    assert len(p[0].wrapped.transform_callback_function.data) == 0
    assert len(p[0].wrapped.fit_callback_function.data) == 0
    assert len(p[1].wrapped.transform_callback_function.data) == 0
    assert len(p[1].wrapped.fit_callback_function.data) == 0
    assert len(p[2].wrapped.transform_callback_function.data) == 0
    assert len(p[2].wrapped.fit_callback_function.data) == 0
    assert len(p[3].wrapped.transform_callback_function.data) == 0
    assert len(p[3].wrapped.fit_callback_function.data) == 0

    p = p.load(ExecutionContext(tmpdir))

    assert len(p[0].wrapped.transform_callback_function.data) == 10
    assert len(p[0].wrapped.fit_callback_function.data) == 10
    assert len(p[1].wrapped.transform_callback_function.data) == 10
    assert len(p[1].wrapped.fit_callback_function.data) == 10
    assert len(p[2].wrapped.transform_callback_function.data) == 10
    assert len(p[2].wrapped.fit_callback_function.data) == 10
    assert len(p[3].wrapped.transform_callback_function.data) == 10
    assert len(p[3].wrapped.fit_callback_function.data) == 10
Exemplo n.º 13
0
    def fit_transform_data_container(self, data_container):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(root=self.cache_folder,
                                   execution_mode=ExecutionMode.FIT_TRANSFORM)
        context = context.push(self)
        new_self, data_container = self._fit_transform_data_container(
            data_container, context)

        return new_self, data_container.data_inputs
Exemplo n.º 14
0
    def transform_data_container(self, data_container: DataContainer):
        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(root=self.cache_folder,
                                   execution_mode=ExecutionMode.TRANSFORM)
        context = context.push(self)
        data_container = self._transform_data_container(
            data_container, context)

        return data_container.data_inputs
Exemplo n.º 15
0
def test_localassert_should_assert_dependencies_properly_at_exec(tmpdir):
    data_inputs = np.array([0, 1, 2, 3])
    context = ExecutionContext(root=tmpdir)
    p = Pipeline([
        RegisterServiceDynamically(),
        SomeStep().assert_has_services_at_execution(SomeBaseService)
    ]).with_context(context=context)

    p.transform(data_inputs=data_inputs)
    service = context.get_service(SomeBaseService)
    assert np.array_equal(service.data, data_inputs)
Exemplo n.º 16
0
def test_with_context_should_inject_dependencies_properly(tmpdir):
    data_inputs = np.array([0, 1, 2, 3])
    context = ExecutionContext(root=tmpdir)
    service = SomeService()
    context.set_service_locator({BaseService: service})
    p = Pipeline([SomeStep().assert_has_services(BaseService)
                  ]).with_context(context=context)

    p.transform(data_inputs=data_inputs)

    assert np.array_equal(service.data, data_inputs)
def test_tensorflowv2_saver(tmpdir):
    dataset = toy_dataset()
    model = Pipeline([create_model_step(tmpdir)])
    loss_first_fit = evaluate_model_on_dataset(model, dataset)

    model.save(ExecutionContext(root=tmpdir))

    loaded = Pipeline([create_model_step(tmpdir)
                       ]).load(ExecutionContext(root=tmpdir))
    loss_second_fit = evaluate_model_on_dataset(loaded, dataset)

    assert loss_second_fit < (loss_first_fit / 2)
Exemplo n.º 18
0
def test_step_with_context_should_only_save_wrapped_step(tmpdir):
    context = ExecutionContext(root=tmpdir)
    service = SomeService()
    context.set_service_locator({BaseService: service})
    p = Pipeline([SomeStep().assert_has_services(BaseService)
                  ]).with_context(context=context)

    p.save(context, full_dump=True)

    p: Pipeline = ExecutionContext(root=tmpdir).load(
        os.path.join('StepWithContext', 'Pipeline'))
    assert isinstance(p, Pipeline)
Exemplo n.º 19
0
    def transform(self, data_inputs: Any):
        """
        After loading the last checkpoint, transform each pipeline steps

        :param data_inputs: the data input to transform
        :return: transformed data inputs
        """
        data_container = DataContainer(current_ids=None, data_inputs=data_inputs)

        data_container = self.hash_data_container(data_container)
        context = ExecutionContext(root=self.cache_folder, execution_mode=ExecutionMode.TRANSFORM)
        context = context.push(self)
        data_container = self._transform_data_container(data_container, context)

        return data_container.data_inputs
Exemplo n.º 20
0
def test_auto_ml_should_assert_dependecies_properly_at_exec(tmpdir):
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = data_inputs * 2
    p = Pipeline([
        RegisterServiceDynamically(),
        SomeStep().assert_has_services_at_execution(SomeBaseService),
    ])
    context = ExecutionContext(root=tmpdir)

    auto_ml: AutoML = _make_autoML_loop(tmpdir, p)
    auto_ml: StepWithContext = auto_ml.with_context(context=context)
    assert isinstance(auto_ml, StepWithContext)
    auto_ml.fit(data_inputs, expected_outputs)

    service = context.get_service(SomeBaseService)
    assert np.array_equal(service.data, data_inputs)
Exemplo n.º 21
0
    def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \
            Tuple['Any', DataContainer]:
        """
        Concatenate the pipeline fit transform output of each batch of self.batch_size together.
        :param step: pipeline to fit transform on
        :type step: Pipeline
        :param data_container: data container to fit transform on
        :type data_container: DataContainer
        :param context: execution context
        :return: fitted self, transformed data inputs
        :rtype: Tuple[Any, DataContainer]
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(
                data_container_batch, context)
            output_data_container.concat(data_container_batch)

        return step, output_data_container
Exemplo n.º 22
0
    def join_transform(self, step: Pipeline, data_container: DataContainer,
                       context: ExecutionContext) -> DataContainer:
        """
        Concatenate the pipeline transform output of each batch of self.batch_size together.
        :param step: pipeline to transform on
        :type step: Pipeline
        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :return: transformed data container
        :rtype: DataContainer
        """
        context = context.push(step)
        data_container_batches = data_container.minibatches(
            batch_size=self.batch_size,
            include_incomplete_batch=self.include_incomplete_batch,
            default_value_data_inputs=self.default_value_data_inputs,
            default_value_expected_outputs=self.default_value_expected_outputs)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            output_data_container.concat(
                step._transform_data_container(data_container_batch, context))

        return output_data_container
Exemplo n.º 23
0
    def _load_checkpoint(
            self, data_container: DataContainer,
            context: ExecutionContext) -> Tuple[NamedTupleList, DataContainer]:
        """
        Try loading a pipeline cache with the passed data container.
        If pipeline cache loading succeeds, find steps left to do,
        and load the latest data container.

        :param data_container: the data container to resume
        :param context: the execution context to resume
        :return: tuple(steps left to do, last checkpoint data container)
        """
        new_starting_step_index, starting_step_data_container = \
            self._get_starting_step_info(data_container, context)

        loading_context = context.copy()
        loading_context.pop()
        loaded_pipeline = self.load(loading_context)

        if not self.are_steps_before_index_the_same(loaded_pipeline,
                                                    new_starting_step_index):
            return self.steps_as_tuple, data_container

        self._assign_loaded_pipeline_into_self(loaded_pipeline)

        step = self[new_starting_step_index]
        if isinstance(step, Checkpoint) or (isinstance(
                step, MetaStep) and isinstance(step.wrapped, Checkpoint)):
            starting_step_data_container = step.resume(
                starting_step_data_container, context)

        return self[new_starting_step_index:], starting_step_data_container
Exemplo n.º 24
0
    def read_checkpoint(self, data_container: DataContainer,
                        context: ExecutionContext) -> DataContainer:
        """
        Read data container data inputs checkpoint with :py:attr:`~data_input_checkpointer`.
        Read data container expected outputs checkpoint with :py:attr:`~expected_output_checkpointer`.

        :param data_container: data container to read checkpoint for
        :type data_container: neuraxle.data_container.DataContainer
        :param context: execution context to read checkpoint from
        :type context: ExecutionContext
        :return: data container checkpoint
        :rtype: neuraxle.data_container.DataContainer
        """
        data_container_checkpoint = ListDataContainer.empty(
            original_data_container=data_container)

        current_ids = self.summary_checkpointer.read_summary(
            checkpoint_path=context.get_path(), data_container=data_container)

        for current_id in current_ids:
            data_input = self.data_input_checkpointer.read_checkpoint(
                checkpoint_path=self._get_data_input_checkpoint_path(context),
                current_id=current_id)

            expected_output = self.expected_output_checkpointer.read_checkpoint(
                checkpoint_path=self._get_expected_output_checkpoint_path(
                    context),
                current_id=current_id)

            data_container_checkpoint.append(current_id, data_input,
                                             expected_output)

        return data_container_checkpoint
Exemplo n.º 25
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
Exemplo n.º 26
0
    def start(self, context: ExecutionContext):
        """
        Start multiple processes or threads with the worker function as a target.

        :param context: execution context
        :type context: ExecutionContext
        :return:
        """
        thread_safe_context = context
        thread_safe_self = self
        parallel_call = Thread

        if self.use_processes:
            # New process requires trimming the references to other processes
            # when we create many processes: https://stackoverflow.com/a/65749012
            thread_safe_context = context.thread_safe()
            parallel_call = Process

        if self.use_savers:
            _ = thread_safe_self.save(thread_safe_context, full_dump=True)  # Cannot delete queue worker self.
            del thread_safe_self.wrapped
            # del thread_safe_self.queue

        self.workers = []
        for _, worker_arguments in zip(range(self.n_workers), self.additional_worker_arguments):
            p = parallel_call(
                target=worker_function,
                args=(thread_safe_self, thread_safe_context, self.use_savers, worker_arguments)
            )
            p.daemon = True
            p.start()
            self.workers.append(p)
Exemplo n.º 27
0
    def refit(self, p: BaseStep, data_container: DataContainer,
              context: ExecutionContext) -> BaseStep:
        """
        Refit the pipeline on the whole dataset (without any validation technique).

        :param p: trial to refit
        :param data_container: data container
        :param context: execution context

        :return: fitted pipeline
        """
        context.set_execution_phase(ExecutionPhase.TRAIN)
        for i in range(self.epochs):
            p = p.handle_fit(data_container, context)

        return p
Exemplo n.º 28
0
    def handle_inverse_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
        """
        Handle inverse transform by passing expected outputs to the wrapped step inverse transform method.
        Update the expected outputs with the outputs.

        :param context: execution context
        :param data_container:
        :return: data container
        :rtype: DataContainer
        """
        new_expected_outputs_data_container = self.wrapped.handle_inverse_transform(
            DataContainer(
                current_ids=data_container.current_ids,
                data_inputs=data_container.expected_outputs,
                expected_outputs=None
            ),
            context.push(self.wrapped)
        )

        data_container.set_expected_outputs(new_expected_outputs_data_container.data_inputs)

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return data_container
Exemplo n.º 29
0
def worker_function(queue_worker: QueueWorker, context: ExecutionContext, use_savers: bool, additional_worker_arguments):
    """
    Worker function that transforms the items inside the queue of items to process.

    :param queue_worker: step to transform
    :param context: execution context
    :param use_savers: use savers
    :param additional_worker_arguments: any additional arguments that need to be passed to the workers
    :return:
    """
    step = queue_worker.get_step()
    if use_savers:
        saved_queue_worker: QueueWorker = context.load(queue_worker.get_name())
        step = saved_queue_worker.get_step()

    additional_worker_arguments = tuple(
        additional_worker_arguments[i: i + 2] for i in range(0, len(additional_worker_arguments), 2)
    )

    for argument_name, argument_value in additional_worker_arguments:
        step.__dict__.update({argument_name: argument_value})

    while True:
        task: QueuedPipelineTask = queue_worker.get()
        summary_id = task.data_container.summary_id
        data_container = step.handle_transform(task.data_container, context)
        data_container = data_container.set_summary_id(summary_id)
        queue_worker.notify(QueuedPipelineTask(step_name=queue_worker.name, data_container=data_container))
Exemplo n.º 30
0
    def join_fit_transform(
            self, step: Pipeline, data_container: DataContainer,
            context: ExecutionContext) -> Tuple['Any', DataContainer]:
        """
        Concatenate the pipeline fit transform output of each batch of self.batch_size together.
        :param step: pipeline to fit transform on
        :type step: Pipeline
        :param data_container: data container to fit transform on
        :type data_container: DataContainer
        :param context: execution context
        :return: fitted self, transformed data inputs
        :rtype: Tuple[Any, DataContainer]
        """
        context = context.push(step)

        data_container_batches = data_container.convolved_1d(
            stride=self.batch_size, kernel_size=self.batch_size)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in data_container_batches:
            step, data_container_batch = step._fit_transform_data_container(
                data_container_batch, context)
            output_data_container.concat(data_container_batch)

        return step, output_data_container