Пример #1
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext):
        """
        Transform the data with the unions. It will make use of some parallel processing.

        :param data_container: data container
        :param context: execution context
        :return: the transformed data_inputs.
        """
        if self.n_jobs != 1:
            data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_transform)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            data_containers = [
                step.handle_transform(data_container.copy(),
                                      context.push(step))
                for _, step in self.steps_as_tuple
            ]

        new_current_ids = self.hash(data_container)

        data_container = self.joiner.handle_transform(data_containers,
                                                      new_current_ids)

        return data_container
Пример #2
0
 def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer:
     if any(not isinstance(di, DataContainer) for di in data_container.data_inputs):
         raise ValueError("data_inputs given to ZipFeatures must be a list of DataContainer instances")
     data_container = ZipDataContainer.create_from(*data_container.data_inputs)
     if self.concatenate_inner_features:
         data_container.concatenate_inner_features()
     return data_container
Пример #3
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext) -> DataContainer:
        """
        According to the idiom of `(1, 2, reversed(1))`, we do this, in order:

            - `1`. Transform preprocessing step
            - `2`. Transform postprocessing step
            - `reversed(1)`. Inverse transform preprocessing step

        :param data_container: data container to transform
        :param context: execution context
        :return: data_container
        """
        data_container = self["preprocessing_step"].handle_transform(
            data_container, context.push(self["preprocessing_step"]))
        data_container = self["postprocessing_step"].handle_transform(
            data_container, context.push(self["postprocessing_step"]))

        data_container = self["preprocessing_step"].handle_inverse_transform(
            data_container, context.push(self["preprocessing_step"]))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return data_container
Пример #4
0
    def handle_fit(self, data_container: DataContainer,
                   context: ExecutionContext):
        """
        Fit the parallel steps on the data. It will make use of some parallel processing.

        :param data_container: The input data to fit onto
        :param context: execution context
        :return: self
        """
        # Actually fit:
        if self.n_jobs != 1:
            fitted_steps_data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_fit)(
                    data_container.copy(), context.push(step))
                                    for _, step in self.steps_as_tuple)
        else:
            fitted_steps_data_containers = [
                step.handle_fit(data_container.copy(), context.push(step))
                for _, step in self.steps_as_tuple
            ]

        # Save fitted steps
        for i, (fitted_step, _) in enumerate(fitted_steps_data_containers):
            self.steps_as_tuple[i] = (self.steps_as_tuple[i][0], fitted_step)
        self._refresh_steps()

        return self, data_container
Пример #5
0
    def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> (
    'ReversiblePreprocessingWrapper', DataContainer):
        """
        According to the idiom of `(1, 2, reversed(1))`, we do this, in order:

            - `1`. Fit Transform preprocessing step
            - `2`. Fit Transform postprocessing step
            - `reversed(1)`. Inverse transform preprocessing step

        :param data_container: data container to transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: (self, data_container)
        :rtype: (ReversiblePreprocessingWrapper, DataContainer)
        """
        self["preprocessing_step"], data_container = self["preprocessing_step"].handle_fit_transform(data_container,
                                                                                                     context.push(self[
                                                                                                                      "preprocessing_step"]))
        self["postprocessing_step"], data_container = self["postprocessing_step"].handle_fit_transform(data_container,
                                                                                                       context.push(
                                                                                                           self[
                                                                                                               "postprocessing_step"]))

        data_container = self["preprocessing_step"].handle_inverse_transform(data_container,
                                                                             context.push(self["preprocessing_step"]))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return self, data_container
Пример #6
0
    def _will_process(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseTransformer', DataContainer):
        """
        Flatten data container before any processing is done on the wrapped step.

        :param data_container: data container to flatten
        :param context: execution context
        :return: (data container, execution context)
        :rtype: ('BaseTransformer', DataContainer)
        """
        data_container, context = super()._will_process(
            data_container, context)

        if data_container.expected_outputs is None:
            expected_outputs = np.empty_like(
                np.array(data_container.data_inputs))
            expected_outputs.fill(np.nan)
            data_container.set_expected_outputs(expected_outputs)

        di, self.len_di = self._flatten_list(data_container.data_inputs)
        eo, self.len_eo = self._flatten_list(data_container.expected_outputs)

        flattened_data_container = DataContainer(
            summary_id=data_container.summary_id,
            data_inputs=di,
            expected_outputs=eo,
            sub_data_containers=data_container.sub_data_containers)

        return flattened_data_container, context
Пример #7
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext) -> DataContainer:
        data_container = FeatureUnion.handle_transform(self, data_container,
                                                       context)
        results = self.judge.transform(data_container.data_inputs)
        data_container.set_data_inputs(results)

        return data_container
Пример #8
0
    def _transform_data_container(self, data_container, context):
        """
        Handle transform.

        :param data_container: the data container to join
        :param context: execution context
        :return: transformed data container
        """
        data_inputs = self.transform([dc.data_inputs for dc in data_container.data_inputs])
        data_container = DataContainer(data_inputs=data_inputs, current_ids=data_container.current_ids,
                                       expected_outputs=data_container.expected_outputs)
        data_container.set_data_inputs(data_inputs)

        return data_container
Пример #9
0
    def handle_fit_transform(self, data_container: DataContainer,
                             context: ExecutionContext):
        """
        Change the shape of the data container.
        and/or
        Apply any side effects based on the data container
        And/or
        Change the execution flow of the pipeline
        """

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return self, data_container
Пример #10
0
    def handle_transform(self, data_container: DataContainer,
                         context: ExecutionContext):
        """
        Transform each step for each data inputs.

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self
        """
        output_data_container = ListDataContainer.empty()

        for current_id, di, eo in data_container:
            output = self.wrapped.handle_transform(
                DataContainer(current_ids=None,
                              data_inputs=di,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)

        current_ids = self.hash(data_container)
        output_data_container.set_current_ids(current_ids)

        return output_data_container
Пример #11
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> Tuple[BaseStep, DataContainer]:
        """
        Fit transform each step for each data inputs, and expected outputs

        :param data_container: data container to fit transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext

        :return: self, transformed_data_container
        """
        output_data_container: DataContainer = ListDataContainer.empty(
            original_data_container=data_container)

        for current_id, di, eo in data_container:
            try:
                self.wrapped, output = self.wrapped.handle_fit_transform(
                    DataContainer(data_inputs=di,
                                  current_ids=None,
                                  expected_outputs=eo), context)
                output_data_container.append(current_id, output.data_inputs,
                                             output.expected_outputs)
            except ContinueInterrupt:
                continue
            except BreakInterrupt:
                break

        output_data_container.summary_id = data_container.summary_id

        return self, output_data_container
Пример #12
0
    def _transform_data_container(self, data_container, context):
        """
        Transform the data with the unions. It will make use of some parallel processing.
        :param data_container: data container
        :param context: execution context
        :return: the transformed data_inputs.
        """
        if self.n_jobs != 1:
            data_containers = Parallel(
                backend=self.backend,
                n_jobs=self.n_jobs)(delayed(step.handle_transform)(
                    data_container.copy(), context)
                                    for _, step in self.steps_as_tuple[:-1])
        else:
            data_containers = [
                step.handle_transform(data_container.copy(), context)
                for _, step in self.steps_as_tuple[:-1]
            ]

        return DataContainer(
            data_inputs=data_containers,
            current_ids=data_container.current_ids,
            summary_id=data_container.summary_id,
            expected_outputs=data_container.expected_outputs,
            sub_data_containers=data_container.sub_data_containers)
Пример #13
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext) -> DataContainer:
        """
        Transform each step for each data inputs.

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self
        """
        output_data_container: ListDataContainer = ListDataContainer.empty(
            original_data_container=data_container)

        for current_id, di, eo in data_container:
            output: DataContainer = self.wrapped.handle_transform(
                DataContainer(data_inputs=di,
                              current_ids=None,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)
        output_data_container.summary_id = data_container.summary_id

        return output_data_container
Пример #14
0
    def _fit_transform_data_container(self, data_container: DataContainer,
                                      context: ExecutionContext):
        """
        Fit transform each step for each data inputs, and expected outputs

        :param data_container: data container to fit transform
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext

        :return: self, transformed_data_container
        """
        output_data_container = ListDataContainer.empty()

        for current_id, di, eo in data_container:
            self.wrapped, output = self.wrapped.handle_fit_transform(
                DataContainer(current_ids=None,
                              data_inputs=di,
                              expected_outputs=eo), context)

            output_data_container.append(current_id, output.data_inputs,
                                         output.expected_outputs)

        output_data_container.summary_id = data_container.summary_id

        return self, output_data_container
Пример #15
0
    def _create_expanded_data_container(
            self, data_container: DataContainer) -> ExpandedDataContainer:
        """
        Create expanded data container.

        :param data_container: data container to expand
        :type data_container: DataContainer
        :return: expanded data container
        :rtype: ExpandedDataContainer
        """
        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        expanded_data_container = ExpandedDataContainer.create_from(
            data_container)

        return expanded_data_container
Пример #16
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext) -> DataContainer:
        """
        Nullify wrapped step hyperparams, and don't transform the wrapped step.

        :param data_container: data container
        :param context: execution context
        :return: step, data_container
        """
        if self.hyperparams[OPTIONAL_ENABLED_HYPERPARAM]:
            return self.wrapped.handle_transform(data_container, context)

        self._nullify_hyperparams()
        data_container.set_data_inputs(self.nullified_return_value)

        return DataContainer(data_inputs=self.nullified_return_value,
                             current_ids=data_container.current_ids,
                             expected_outputs=self.nullified_return_value)
Пример #17
0
    def handle_fit(self, data_container: DataContainer, context: ExecutionContext) -> 'ReversiblePreprocessingWrapper':
        """
        Handle fit by fitting preprocessing step, and postprocessing step.

        :param data_container: data container to fit on
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self, data_container
        :rtype: (ReversiblePreprocessingWrapper, DataContainer)
        """
        self["preprocessing_step"], data_container = \
            self["preprocessing_step"].handle_fit_transform(data_container, context.push(self["preprocessing_step"]))
        self["postprocessing_step"] = \
            self["postprocessing_step"].handle_fit(data_container, context.push(self["postprocessing_step"]))

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return self
Пример #18
0
    def _did_process(self, data_container: DataContainer,
                     context: ExecutionContext) -> DataContainer:
        """
        Reaugment the flattened data container.

        :param data_container: data container to then_unflatten
        :param context: execution context
        :return: data container
        """
        data_container = super()._did_process(data_container, context)

        if self.then_unflatten:
            data_container.set_data_inputs(
                self._reaugment_list(data_container.data_inputs, self.len_di))
            data_container.set_expected_outputs(
                self._reaugment_list(data_container.expected_outputs,
                                     self.len_eo))
            self.len_di = []
            self.len_eo = []

        return data_container
Пример #19
0
 def _fit_data_container(
         self, data_container: DataContainer,
         context: ExecutionContext) -> ('BaseStep', DataContainer):
     fitted_steps = []
     for i, (current_ids, data_inputs,
             expected_outputs) in enumerate(data_container):
         fitted_step = self.steps[i].handle_fit(
             DataContainer(current_ids=current_ids,
                           data_inputs=data_inputs,
                           expected_outputs=expected_outputs), context)
         fitted_steps.append(fitted_step)
     self.steps = fitted_steps
     return self
Пример #20
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext):

        data_containers = list(
            filter(
                lambda dc:
                (len(dc.data_inputs) > 0 and len(dc.expected_outputs) > 0),
                data_container.data_inputs))
        if len(data_containers) == 1:
            return data_containers[0]
        else:
            return DataContainer(
                data_inputs=list(map(attrgetter("data_inputs"))),
                expected_outputs=list(map(attrgetter("expected_outputs"))),
                current_ids=data_container.current_ids)
Пример #21
0
    def _inverse_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> DataContainer:
        inverse_transform_results = []
        for i, (current_ids, data_inputs,
                expected_outputs) in enumerate(data_container):
            inverse_transform_result = self[i].handle_inverse_transform(
                DataContainer(current_ids=current_ids,
                              data_inputs=data_inputs,
                              expected_outputs=expected_outputs), context)
            inverse_transform_results.append(inverse_transform_result)

        output_data_container = ListDataContainer.empty()
        for data_container_batch in inverse_transform_results:
            output_data_container.append_data_container(data_container_batch)
        return output_data_container
Пример #22
0
    def _fit_data_container(self, data_container: DataContainer,
                            context: ExecutionContext) -> BaseStep:
        """
        Fit each step for each data inputs, and expected outputs

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: self
        """
        for current_id, di, eo in data_container:
            self.wrapped = self.wrapped.handle_fit(
                DataContainer(data_inputs=di,
                              current_ids=None,
                              expected_outputs=eo), context)
        return self
Пример #23
0
    def _fit_transform_data_container(
            self, data_container: DataContainer,
            context: ExecutionContext) -> ('BaseStep', DataContainer):
        fitted_steps_data_containers = []
        for i, (current_ids, data_inputs,
                expected_outputs) in enumerate(data_container):
            fitted_step_data_container = self.steps[i].handle_fit_transform(
                DataContainer(current_ids=current_ids,
                              data_inputs=data_inputs,
                              expected_outputs=expected_outputs), context)
            fitted_steps_data_containers.append(fitted_step_data_container)
        self.steps = [step for step, _ in fitted_steps_data_containers]

        output_data_container = ListDataContainer.empty()
        for _, data_container_batch in fitted_steps_data_containers:
            output_data_container.append_data_container(data_container_batch)

        return self, output_data_container
Пример #24
0
    def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> (
            'BaseStep', DataContainer):
        """
        Nullify wrapped step hyperparams, and don't fit_transform the wrapped step.

        :param data_container: data container
        :type data_container: DataContainer
        :param context: execution context
        :type context: ExecutionContext
        :return: step, data_container
        :type: (BaseStep, DataContainer)
        """
        if self.hyperparams[OPTIONAL_ENABLED_HYPERPARAM]:
            self.wrapped, data_container = self.wrapped.handle_fit_transform(data_container, context)
            return self, data_container

        self._nullify_hyperparams()

        return self, DataContainer(data_inputs=self.nullified_return_value, current_ids=data_container.current_ids,
                                   expected_outputs=self.nullified_return_value)
Пример #25
0
    def _transform_data_container(self, data_container: DataContainer,
                                  context: ExecutionContext):
        """
        Handle transform.

        :param data_container: the data container to join
        :param context: execution context
        :return: transformed data container
        """
        data_inputs = [
            dc.data_inputs for dc in data_container.data_inputs
            if len(dc.data_inputs) > 0
        ]
        if len(data_inputs) == 1:
            data_inputs = data_inputs[0]

        data_container = DataContainer(
            data_inputs=data_inputs,
            current_ids=data_container.current_ids,
            expected_outputs=data_container.expected_outputs)

        return data_container
Пример #26
0
    def handle_transform(self, data_containers,
                         context: ExecutionContext) -> DataContainer:
        """
        Handle transform.

        :param data_containers: the data container to join
        :param context: execution context
        :return: transformed data container
        """
        data_inputs = self.transform(
            [dc.data_inputs for dc in data_containers])
        data_container = DataContainer(
            current_ids=data_containers[-1].current_ids,
            data_inputs=data_inputs,
            expected_outputs=data_containers[-1].expected_outputs)
        data_container.set_data_inputs(data_inputs)

        current_ids = self.hash(data_container)
        data_container.set_current_ids(current_ids)

        return data_container
Пример #27
0
 def _will_process(
         self, data_container: DataContainer,
         context: ExecutionContext) -> (DataContainer, ExecutionContext):
     return data_container.to_numpy(), context
Пример #28
0
 def _did_process(self, data_container: DataContainer,
                  context: ExecutionContext):
     data_container = super()._did_process(data_container, context)
     return data_container.reduce_dim()