示例#1
0
    def split_data_container(
            self, data_container) -> Tuple[DataContainer, DataContainer]:
        """
        Split data container into a training set, and a validation set.

        :param data_container: data container
        :type data_container: DataContainer
        :return: train_data_container, validation_data_container
        """

        train_data_inputs, train_expected_outputs, validation_data_inputs, validation_expected_outputs = \
            self.split(data_container.data_inputs, data_container.expected_outputs)

        train_ids = self.train_split(data_container.current_ids)
        train_data_container = DataContainer(
            data_inputs=train_data_inputs,
            current_ids=train_ids,
            summary_id=data_container.summary_id,
            expected_outputs=train_expected_outputs)

        validation_ids = self.validation_split(data_container.current_ids)
        validation_data_container = DataContainer(
            data_inputs=validation_data_inputs,
            current_ids=validation_ids,
            summary_id=data_container.summary_id,
            expected_outputs=validation_expected_outputs)

        return train_data_container, validation_data_container
示例#2
0
def test_inner_concatenate_data_should_merge_1d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_1d, expected_outputs_1d = _create_data_source(SHAPE_1D)
    data_container_1d = DataContainer(data_inputs=data_inputs_1d,
                                      expected_outputs=expected_outputs_1d)
    data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('1d', data_container_1d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['1d'])])

    data_container = p.handle_transform(data_container, ExecutionContext())

    # Then
    broadcasted_data_inputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.data_inputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))
    broadcasted_expected_outputs_1d = np.broadcast_to(
        np.expand_dims(data_container_1d.expected_outputs, axis=-1),
        shape=(SHAPE_3D[0], SHAPE_3D[1]))

    assert np.array_equal(data_container.data_inputs[..., -1],
                          broadcasted_data_inputs_1d)
    assert np.array_equal(data_container.expected_outputs[..., -1],
                          broadcasted_expected_outputs_1d)

    assert data_container.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                SHAPE_3D[2] + 1)
    assert data_container.expected_outputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                     SHAPE_3D[2] + 1)
示例#3
0
def test_inner_concatenate_data_should_merge_2d_with_3d():
    # Given
    data_inputs_3d, expected_outputs_3d = _create_data_source(SHAPE_3D)
    data_inputs_2d, expected_outputs_2d = _create_data_source(SHAPE_2D)
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container_3d = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) \
        .add_sub_data_container('2d', data_container_2d)

    # When
    p = Pipeline(
        [InnerConcatenateDataContainer(sub_data_container_names=['2d'])])

    data_container_3d = p.handle_transform(data_container_3d,
                                           ExecutionContext())

    # Then
    assert data_container_3d.data_inputs.shape == (SHAPE_3D[0], SHAPE_3D[1],
                                                   SHAPE_3D[2] + 1)
    assert data_container_3d.expected_outputs.shape == (SHAPE_3D[0],
                                                        SHAPE_3D[1],
                                                        SHAPE_3D[2] + 1)
    assert np.array_equal(data_container_3d.data_inputs[..., -1],
                          data_container_2d.data_inputs)
    assert np.array_equal(data_container_3d.expected_outputs[..., -1],
                          data_container_2d.expected_outputs)
示例#4
0
    def split_data_container(self, data_container: DataContainer) -> List[Tuple[DataContainer, DataContainer]]:
        """
        Wrap a validation split function with a split data container function.
        A validation split function takes two arguments:  data inputs, and expected outputs.

        :param data_container: data container to split
        :return: a function that returns the pairs of training, and validation data containers for each validation split.
        """
        train_data_inputs, train_expected_outputs, validation_data_inputs, validation_expected_outputs = self.split(
            data_inputs=data_container.data_inputs,
            expected_outputs=data_container.expected_outputs
        )

        train_data_container = DataContainer(data_inputs=train_data_inputs, expected_outputs=train_expected_outputs)
        validation_data_container = DataContainer(data_inputs=validation_data_inputs,
                                                  expected_outputs=validation_expected_outputs)

        splits = []
        for (train_current_id, train_di, train_eo), (validation_current_id, validation_di, validation_eo) in zip(
                train_data_container, validation_data_container):
            train_data_container_split = DataContainer(
                summary_id=train_current_id,
                data_inputs=train_di,
                expected_outputs=train_eo
            )

            validation_data_container_split = DataContainer(
                summary_id=validation_current_id,
                data_inputs=validation_di,
                expected_outputs=validation_eo
            )

            splits.append((train_data_container_split, validation_data_container_split))

        return splits
示例#5
0
    def split_data_container(self, data_container: DataContainer) -> Tuple[DataContainer, DataContainer]:
        train_data_inputs, train_expected_outputs, validation_data_inputs, validation_expected_outputs = self.split(
            data_container.data_inputs,
            data_container.expected_outputs
        )

        train_data_container = DataContainer(data_inputs=train_data_inputs, expected_outputs=train_expected_outputs)
        validation_data_container = DataContainer(data_inputs=validation_data_inputs,
                                                  expected_outputs=validation_expected_outputs)

        return train_data_container, validation_data_container
示例#6
0
def test_zip_data_container_should_merge_two_data_sources_together():
    data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
    data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container = DataContainer(data_inputs=data_inputs_3d,
                                   expected_outputs=expected_outputs_3d)

    zip_data_container = ZipDataContainer.create_from(data_container,
                                                      data_container_2d)

    assert zip_data_container.current_ids == data_container.current_ids
    for i, di in enumerate(zip_data_container.data_inputs):
        assert np.array_equal(di[0], data_inputs_3d[i])
        assert np.array_equal(di[1], data_inputs_2d[i])
示例#7
0
    def train(self, pipeline: BaseStep, data_inputs, expected_outputs=None) -> Trial:
        """
        Train pipeline using the validation splitter.
        Track training, and validation metrics for each epoch.
        Note: the present method is just a shortcut to using the `execute_trial` method with less boilerplate code needed. 
Refer to `execute_trial` for full flexibility

        :param pipeline: pipeline to train on
        :param data_inputs: data inputs
        :param expected_outputs: expected ouptuts to fit on
        :return: executed trial
        """
        validation_splits: List[Tuple[DataContainer, DataContainer]] = self.validation_split_function.split_data_container(
            DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs)
        )

        repo_trial: Trial = Trial(
            pipeline=pipeline,
            hyperparams=pipeline.get_hyperparams(),
            main_metric_name=self.get_main_metric_name()
        )

        self.execute_trial(
            pipeline=pipeline,
            trial_number=1,
            repo_trial=repo_trial,
            context=ExecutionContext(),
            validation_splits=validation_splits,
            n_trial=1,
            delete_pipeline_on_completion=False
        )

        return repo_trial
示例#8
0
def test_data_container_batching(batch_size, include_incomplete_pass,
                                 default_value, expected_data_containers):
    data_container = DataContainer(current_ids=[str(i) for i in range(10)],
                                   data_inputs=np.array(list(range(10))),
                                   expected_outputs=np.array(
                                       list(range(10, 20))))

    # When
    data_containers = []
    for dc in data_container.minibatches(
            batch_size=batch_size,
            include_incomplete_batch=include_incomplete_pass,
            default_value_data_inputs=default_value):
        data_containers.append(dc)

    # Then
    assert len(expected_data_containers) == len(data_containers)
    for expected_data_container, actual_data_container in zip(
            expected_data_containers, data_containers):
        np.array_equal(expected_data_container.current_ids,
                       actual_data_container.current_ids)
        np.array_equal(expected_data_container.data_inputs,
                       actual_data_container.data_inputs)
        np.array_equal(expected_data_container.expected_outputs,
                       actual_data_container.expected_outputs)
def test_pickle_checkpoint_step_should_load_data_container(tmpdir: LocalPath):
    initial_data_inputs = [1, 2]
    initial_expected_outputs = [2, 3]

    create_pipeline_output_transformer = lambda: ResumablePipeline([
        ('output_transformer_1', MultiplyBy2OutputTransformer()),
        ('pickle_checkpoint', DefaultCheckpoint()),
        ('output_transformer_2', MultiplyBy2OutputTransformer()),
    ],
                                                                   cache_folder
                                                                   =tmpdir)

    create_pipeline_output_transformer().fit_transform(
        data_inputs=initial_data_inputs,
        expected_outputs=initial_expected_outputs)
    transformer = create_pipeline_output_transformer()
    actual_data_container = transformer.handle_transform(
        DataContainer(current_ids=[0, 1],
                      data_inputs=initial_data_inputs,
                      expected_outputs=initial_expected_outputs),
        ExecutionContext.create_from_root(transformer, ExecutionMode.TRANSFORM,
                                          tmpdir))

    assert np.array_equal(actual_data_container.data_inputs, [4, 8])
    assert np.array_equal(actual_data_container.expected_outputs, [8, 12])
示例#10
0
def test_kfold_cross_validation_should_split_data_properly_bug():
    data_inputs = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    expected_outputs = np.array([0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40])
    data_container = DataContainer(data_inputs=data_inputs,
                                   expected_outputs=expected_outputs)
    splitter = KFoldCrossValidationSplitter(k_fold=2)

    # When
    validation_splits = splitter.split_data_container(data_container)

    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(
        validation_splits)

    # Then
    assert len(train_di[0]) == 6
    assert np.array_equal(np.array(train_di[0]), data_inputs[5:])
    assert len(train_eo[0]) == 6
    assert np.array_equal(np.array(train_eo[0]), expected_outputs[5:])

    assert len(train_di[1]) == 5
    assert np.array_equal(np.array(train_di[1]), data_inputs[:5])
    assert len(train_eo[1]) == 5
    assert np.array_equal(np.array(train_eo[1]), expected_outputs[:5])

    assert len(validation_di[0]) == 5
    assert np.array_equal(np.array(validation_di[0]), data_inputs[:5])
    assert len(validation_eo[0]) == 5
    assert np.array_equal(np.array(validation_eo[0]), expected_outputs[:5])

    assert len(validation_di[1]) == 6
    assert np.array_equal(np.array(validation_di[1]), data_inputs[5:])
    assert len(validation_eo[1]) == 6
    assert np.array_equal(validation_eo[1], expected_outputs[5:])
示例#11
0
def test_logger():
    file_path = "test.log"

    if os.path.exists(file_path):
        os.remove(file_path)

    # Given
    logger = logging.getLogger('test')
    file_handler = logging.FileHandler(file_path)
    file_handler.setLevel('DEBUG')
    logger.addHandler(file_handler)
    logger.setLevel('DEBUG')
    context = ExecutionContext(logger=logger)
    pipeline = Pipeline([
        MultiplyByN(2).set_hyperparams_space(
            HyperparameterSpace({'multiply_by': FixedHyperparameter(2)})),
        NumpyReshape(new_shape=(-1, 1)),
        LoggingStep()
    ])

    # When
    data_container = DataContainer(
        data_inputs=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))
    pipeline.handle_fit(data_container, context)

    # Then
    assert os.path.exists(file_path)
    with open(file_path) as f:
        l = f.read()

    # Teardown
    file_handler.close()
    os.remove(file_path)
示例#12
0
def test_validation_splitter_should_split_data_properly():
    # Given
    data_inputs = np.random.random((4, 2, 2048, 6)).astype(np.float32)
    expected_outputs = np.random.random((4, 2, 2048, 1)).astype(np.float32)
    splitter = ValidationSplitter(test_size=0.2)

    # When
    validation_splits = splitter.split_data_container(
        DataContainer(data_inputs=data_inputs,
                      expected_outputs=expected_outputs))
    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(
        validation_splits)

    train_di = train_di[0]
    train_eo = train_eo[0]

    validation_di = validation_di[0]
    validation_eo = validation_eo[0]

    # Then
    assert len(train_di) == 3
    assert np.array_equal(np.array(train_di), data_inputs[0:3])
    assert len(train_eo) == 3
    assert np.array_equal(np.array(train_eo), expected_outputs[0:3])

    assert len(validation_di) == 1
    assert np.array_equal(validation_di[0], data_inputs[-1])
    assert len(validation_eo) == 1
    assert np.array_equal(validation_eo[0], expected_outputs[-1])
示例#13
0
def test_list_data_container_concat():
    # Given
    data_container = ListDataContainer(
        current_ids=[str(i) for i in range(100)],
        data_inputs=np.array(list(range(100))),
        expected_outputs=np.array(list(range(100, 200))))

    # When
    data_container.concat(
        DataContainer(current_ids=[str(i) for i in range(100, 200)],
                      data_inputs=np.array(list(range(100, 200))),
                      expected_outputs=np.array(list(range(200, 300)))))

    # Then
    assert np.array_equal(np.array(data_container.current_ids),
                          np.array(list(range(0, 200))).astype(np.str))

    expected_data_inputs = np.array(list(range(0, 200))).astype(np.int)
    actual_data_inputs = np.array(data_container.data_inputs).astype(np.int)
    assert np.array_equal(actual_data_inputs, expected_data_inputs)

    expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int)
    assert np.array_equal(
        np.array(data_container.expected_outputs).astype(np.int),
        expected_expected_outputs)
示例#14
0
    def handle_inverse_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
        """
        Handle inverse transform by passing expected outputs to the wrapped step inverse transform method.
        Update the expected outputs with the outputs.

        :param context: execution context
        :param data_container:
        :return: data container
        :rtype: DataContainer
        """
        new_expected_outputs_data_container = self.wrapped.handle_inverse_transform(
            DataContainer(
                current_ids=data_container.current_ids,
                data_inputs=data_container.expected_outputs,
                expected_outputs=None
            ),
            context.push(self.wrapped)
        )

        data_container.set_expected_outputs(new_expected_outputs_data_container.data_inputs)

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return data_container
示例#15
0
def test_zip_data_container_should_concatenate_inner_features():
    data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2))
    data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10))
    data_container_2d = DataContainer(data_inputs=data_inputs_2d,
                                      expected_outputs=expected_outputs_2d)
    data_container = DataContainer(data_inputs=data_inputs_3d,
                                   expected_outputs=expected_outputs_3d)

    zip_data_container = ZipDataContainer.create_from(data_container,
                                                      data_container_2d)
    zip_data_container.concatenate_inner_features()

    assert np.array_equal(
        np.array(zip_data_container.data_inputs)[..., -1],
        data_container_2d.data_inputs)
    assert np.array_equal(np.array(zip_data_container.expected_outputs),
                          expected_outputs_3d)
示例#16
0
    def transform(self, data_inputs: Any):
        """
        After loading the last checkpoint, transform each pipeline steps

        :param data_inputs: the data input to transform
        :return: transformed data inputs
        """
        data_container = self.transform_data_container(DataContainer(data_inputs=data_inputs, current_ids=None))
        return data_container.data_inputs
示例#17
0
def test_kfold_cross_validation_should_split_data_properly():
    # Given
    data_inputs = np.random.random((4, 2, 2048, 6)).astype(np.float32)
    expected_outputs = np.random.random((4, 2, 2048, 1)).astype(np.float32)
    splitter = KFoldCrossValidationSplitter(k_fold=4)

    # When
    validation_splits = splitter.split_data_container(
        data_container=DataContainer(data_inputs=data_inputs, expected_outputs=expected_outputs),
        context=ExecutionContext()
    )
    train_di, train_eo, validation_di, validation_eo = extract_validation_split_data(validation_splits)

    # Then
    assert len(train_di[0]) == 3
    assert np.array_equal(np.array(train_di[0]), data_inputs[1:])
    assert len(train_eo[0]) == 3
    assert np.array_equal(np.array(train_eo[0]), expected_outputs[1:])

    assert len(train_di[1]) == 3
    assert np.array_equal(np.array(train_di[1]),
                          np.concatenate((np.expand_dims(data_inputs[0], axis=0), data_inputs[2:]), axis=0))
    assert len(train_eo[1]) == 3
    assert np.array_equal(np.array(train_eo[1]),
                          np.concatenate((np.expand_dims(expected_outputs[0], axis=0), expected_outputs[2:]), axis=0))

    assert len(train_di[2]) == 3
    assert np.array_equal(np.array(train_di[2]),
                          np.concatenate((data_inputs[0:2], np.expand_dims(data_inputs[3], axis=0)), axis=0))
    assert len(train_eo[2]) == 3
    assert np.array_equal(np.array(train_eo[2]),
                          np.concatenate((expected_outputs[0:2], np.expand_dims(expected_outputs[3], axis=0)), axis=0))

    assert len(train_di[3]) == 3
    assert np.array_equal(np.array(train_di[3]), data_inputs[0:3])
    assert len(train_eo[3]) == 3
    assert np.array_equal(np.array(train_eo[3]), expected_outputs[0:3])

    assert len(validation_di[0]) == 1
    assert np.array_equal(validation_di[0][0], data_inputs[0])
    assert len(validation_eo[0]) == 1
    assert np.array_equal(validation_eo[0][0], expected_outputs[0])

    assert len(validation_di[1]) == 1
    assert np.array_equal(validation_di[1][0], data_inputs[1])
    assert len(validation_eo[1]) == 1
    assert np.array_equal(validation_eo[1][0], expected_outputs[1])

    assert len(validation_di[2]) == 1
    assert np.array_equal(validation_di[2][0], data_inputs[2])
    assert len(validation_eo[2]) == 1
    assert np.array_equal(validation_eo[2][0], expected_outputs[2])

    assert len(validation_di[3]) == 1
    assert np.array_equal(validation_di[3][0], data_inputs[3])
    assert len(validation_eo[3]) == 1
    assert np.array_equal(validation_eo[3][0], expected_outputs[3])
示例#18
0
def test_data_container_iter_method_should_iterate_with_none_expected_outputs(
):
    data_container = DataContainer(current_ids=[str(i) for i in range(100)],
                                   data_inputs=np.array(list(range(100))),
                                   expected_outputs=None)

    for i, (current_id, data_input,
            expected_outputs) in enumerate(data_container):
        assert data_input == i
        assert expected_outputs is None
示例#19
0
def test_input_and_output_transformer_wrapper_should_not_return_a_different_amount_of_data_inputs_and_expected_outputs(
):
    with pytest.raises(AssertionError):
        p = InputAndOutputTransformerWrapper(ChangeLenDataInputs())
        data_inputs, expected_outputs = _create_data_source((10, 10))

        p.handle_transform(
            DataContainer(data_inputs=data_inputs,
                          expected_outputs=expected_outputs),
            ExecutionContext())
示例#20
0
    def fit(self, data_inputs, expected_outputs=None) -> 'Pipeline':
        """
        After loading the last checkpoint, fit each pipeline steps

        :param data_inputs: the data input to fit on
        :param expected_outputs: the expected data output to fit on
        :return: the pipeline itself
        """
        return self.fit_data_container(
            DataContainer(data_inputs=data_inputs, current_ids=None, expected_outputs=expected_outputs))
示例#21
0
def test_input_and_output_transformer_wrapper_should_raise_an_assertion_error_if_current_ids_have_not_been_resampled_correctly(
):
    with pytest.raises(AssertionError) as e:
        p = InputAndOutputTransformerWrapper(
            ChangeLenDataInputsAndExpectedOutputs())
        data_inputs, expected_outputs = _create_data_source((10, 10))

        p.handle_transform(
            DataContainer(data_inputs=data_inputs,
                          expected_outputs=expected_outputs),
            ExecutionContext())
示例#22
0
    def fit_transform(self, data_inputs, expected_outputs=None) -> ('Pipeline', Any):
        """
        After loading the last checkpoint, fit transform each pipeline steps

        :param data_inputs: the data input to fit on
        :param expected_outputs: the expected data output to fit on
        :return: the pipeline itself
        """
        new_self, data_container = self.fit_transform_data_container(
            DataContainer(data_inputs=data_inputs, current_ids=None, expected_outputs=expected_outputs))
        return new_self, data_container.data_inputs
示例#23
0
def test_output_transformer_wrapper_should_transform_with_data_inputs_and_expected_outputs():
    p = OutputTransformerWrapper(MultiplyByN(2))
    data_inputs, expected_outputs = _create_data_source((10, 10))

    data_container = p.handle_transform(DataContainer(
        data_inputs=data_inputs,
        expected_outputs=expected_outputs
    ), ExecutionContext())

    assert np.array_equal(data_container.data_inputs, data_inputs)
    assert np.array_equal(data_container.expected_outputs, expected_outputs * 2)
def test_output_transformer_should_zip_data_input_and_expected_output_in_the_transformed_output(
        tmpdir: LocalPath):
    pipeline = Pipeline([MultiplyBy2OutputTransformer()])

    pipeline, new_data_container = pipeline.handle_fit_transform(
        DataContainer(data_inputs=[1, 2, 3],
                      current_ids=[0, 1, 2],
                      expected_outputs=[2, 3, 4]), ExecutionContext(tmpdir))

    assert new_data_container.data_inputs == [2, 4, 6]
    assert new_data_container.expected_outputs == [4, 6, 8]
示例#25
0
def test_data_container_iter_method_should_iterate_with_none_current_ids():
    data_container = DataContainer(data_inputs=np.array(list(range(100))),
                                   expected_outputs=np.array(
                                       list(range(100,
                                                  200)))).set_current_ids(None)

    for i, (current_id, data_input,
            expected_outputs) in enumerate(data_container):
        assert current_id is None
        assert data_input == i
        assert expected_outputs == i + 100
示例#26
0
def test_fit_transform_input_and_output_transformer_mixin_with_hashlib_md5_value_hasher():
    step, data_container = WindowTimeSeries().handle_fit_transform(
        data_container=DataContainer(
            data_inputs=np.array(list(range(10))),
            expected_outputs=np.array(list(range(10)))
        ),
        context=ExecutionContext()
    )

    assert np.array_equal(data_container.data_inputs, np.array(list(range(0, 5))))
    assert np.array_equal(data_container.expected_outputs, np.array(list(range(5, 10))))
示例#27
0
    def handle_fit(self, data_container: DataContainer,
                   context: ExecutionContext) -> (BaseStep, DataContainer):
        self.wrapped = self.wrapped.handle_fit(
            DataContainer(current_ids=data_container.current_ids,
                          data_inputs=data_container.expected_outputs,
                          expected_outputs=None), context.push(self.wrapped))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return self, data_container
示例#28
0
def plot_predictions(data_inputs, expected_outputs, pipeline,
                     max_plotted_predictions):
    _, _, data_inputs_validation, expected_outputs_validation = \
        pipeline.get_step_by_name('ValidationSplitWrapper').split(data_inputs, expected_outputs)

    pipeline.apply('toggle_plotting')
    pipeline.apply('set_max_plotted_predictions', max_plotted_predictions)

    signal_prediction_pipeline = pipeline.get_step_by_name('SignalPrediction')
    signal_prediction_pipeline.transform_data_container(
        DataContainer(data_inputs=data_inputs_validation,
                      expected_outputs=expected_outputs_validation))
示例#29
0
def test_data_doubler():
    p = InputAndOutputTransformerWrapper(DoubleData())
    data_inputs, expected_outputs = _create_data_source((10, 10))

    out = p.handle_transform(
        DataContainer(data_inputs=data_inputs,
                      expected_outputs=expected_outputs), ExecutionContext())

    doubled_length = len(out.data_inputs)
    assert doubled_length == 2 * len(data_inputs)
    assert doubled_length == len(out.expected_outputs)
    assert doubled_length == len(out.current_ids)
示例#30
0
    def transform(self, data_inputs: Any):
        """
        :param data_inputs: the data input to transform
        :return: transformed data inputs
        """
        data_container = DataContainer(current_ids=None, data_inputs=data_inputs)

        self.hash_data_container(data_container)

        context = ExecutionContext(self.cache_folder, ExecutionMode.TRANSFORM)
        data_container = self.handle_transform(data_container, context)

        return data_container.data_inputs