def handle_transform(self, data_container: DataContainer, context: ExecutionContext): """ Transform the data with the unions. It will make use of some parallel processing. :param data_container: data container :param context: execution context :return: the transformed data_inputs. """ if self.n_jobs != 1: data_containers = Parallel( backend=self.backend, n_jobs=self.n_jobs)(delayed(step.handle_transform)( data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple) else: data_containers = [ step.handle_transform(data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple ] new_current_ids = self.hash(data_container) data_container = self.joiner.handle_transform(data_containers, new_current_ids) return data_container
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: if any(not isinstance(di, DataContainer) for di in data_container.data_inputs): raise ValueError("data_inputs given to ZipFeatures must be a list of DataContainer instances") data_container = ZipDataContainer.create_from(*data_container.data_inputs) if self.concatenate_inner_features: data_container.concatenate_inner_features() return data_container
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ According to the idiom of `(1, 2, reversed(1))`, we do this, in order: - `1`. Transform preprocessing step - `2`. Transform postprocessing step - `reversed(1)`. Inverse transform preprocessing step :param data_container: data container to transform :param context: execution context :return: data_container """ data_container = self["preprocessing_step"].handle_transform( data_container, context.push(self["preprocessing_step"])) data_container = self["postprocessing_step"].handle_transform( data_container, context.push(self["postprocessing_step"])) data_container = self["preprocessing_step"].handle_inverse_transform( data_container, context.push(self["preprocessing_step"])) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def handle_fit(self, data_container: DataContainer, context: ExecutionContext): """ Fit the parallel steps on the data. It will make use of some parallel processing. :param data_container: The input data to fit onto :param context: execution context :return: self """ # Actually fit: if self.n_jobs != 1: fitted_steps_data_containers = Parallel( backend=self.backend, n_jobs=self.n_jobs)(delayed(step.handle_fit)( data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple) else: fitted_steps_data_containers = [ step.handle_fit(data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple ] # Save fitted steps for i, (fitted_step, _) in enumerate(fitted_steps_data_containers): self.steps_as_tuple[i] = (self.steps_as_tuple[i][0], fitted_step) self._refresh_steps() return self, data_container
def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> ( 'ReversiblePreprocessingWrapper', DataContainer): """ According to the idiom of `(1, 2, reversed(1))`, we do this, in order: - `1`. Fit Transform preprocessing step - `2`. Fit Transform postprocessing step - `reversed(1)`. Inverse transform preprocessing step :param data_container: data container to transform :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: (self, data_container) :rtype: (ReversiblePreprocessingWrapper, DataContainer) """ self["preprocessing_step"], data_container = self["preprocessing_step"].handle_fit_transform(data_container, context.push(self[ "preprocessing_step"])) self["postprocessing_step"], data_container = self["postprocessing_step"].handle_fit_transform(data_container, context.push( self[ "postprocessing_step"])) data_container = self["preprocessing_step"].handle_inverse_transform(data_container, context.push(self["preprocessing_step"])) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return self, data_container
def _will_process( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseTransformer', DataContainer): """ Flatten data container before any processing is done on the wrapped step. :param data_container: data container to flatten :param context: execution context :return: (data container, execution context) :rtype: ('BaseTransformer', DataContainer) """ data_container, context = super()._will_process( data_container, context) if data_container.expected_outputs is None: expected_outputs = np.empty_like( np.array(data_container.data_inputs)) expected_outputs.fill(np.nan) data_container.set_expected_outputs(expected_outputs) di, self.len_di = self._flatten_list(data_container.data_inputs) eo, self.len_eo = self._flatten_list(data_container.expected_outputs) flattened_data_container = DataContainer( summary_id=data_container.summary_id, data_inputs=di, expected_outputs=eo, sub_data_containers=data_container.sub_data_containers) return flattened_data_container, context
def handle_transform(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: data_container = FeatureUnion.handle_transform(self, data_container, context) results = self.judge.transform(data_container.data_inputs) data_container.set_data_inputs(results) return data_container
def _transform_data_container(self, data_container, context): """ Handle transform. :param data_container: the data container to join :param context: execution context :return: transformed data container """ data_inputs = self.transform([dc.data_inputs for dc in data_container.data_inputs]) data_container = DataContainer(data_inputs=data_inputs, current_ids=data_container.current_ids, expected_outputs=data_container.expected_outputs) data_container.set_data_inputs(data_inputs) return data_container
def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext): """ Change the shape of the data container. and/or Apply any side effects based on the data container And/or Change the execution flow of the pipeline """ current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return self, data_container
def handle_transform(self, data_container: DataContainer, context: ExecutionContext): """ Transform each step for each data inputs. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self """ output_data_container = ListDataContainer.empty() for current_id, di, eo in data_container: output = self.wrapped.handle_transform( DataContainer(current_ids=None, data_inputs=di, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) current_ids = self.hash(data_container) output_data_container.set_current_ids(current_ids) return output_data_container
def _fit_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> Tuple[BaseStep, DataContainer]: """ Fit transform each step for each data inputs, and expected outputs :param data_container: data container to fit transform :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self, transformed_data_container """ output_data_container: DataContainer = ListDataContainer.empty( original_data_container=data_container) for current_id, di, eo in data_container: try: self.wrapped, output = self.wrapped.handle_fit_transform( DataContainer(data_inputs=di, current_ids=None, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) except ContinueInterrupt: continue except BreakInterrupt: break output_data_container.summary_id = data_container.summary_id return self, output_data_container
def _transform_data_container(self, data_container, context): """ Transform the data with the unions. It will make use of some parallel processing. :param data_container: data container :param context: execution context :return: the transformed data_inputs. """ if self.n_jobs != 1: data_containers = Parallel( backend=self.backend, n_jobs=self.n_jobs)(delayed(step.handle_transform)( data_container.copy(), context) for _, step in self.steps_as_tuple[:-1]) else: data_containers = [ step.handle_transform(data_container.copy(), context) for _, step in self.steps_as_tuple[:-1] ] return DataContainer( data_inputs=data_containers, current_ids=data_container.current_ids, summary_id=data_container.summary_id, expected_outputs=data_container.expected_outputs, sub_data_containers=data_container.sub_data_containers)
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform each step for each data inputs. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self """ output_data_container: ListDataContainer = ListDataContainer.empty( original_data_container=data_container) for current_id, di, eo in data_container: output: DataContainer = self.wrapped.handle_transform( DataContainer(data_inputs=di, current_ids=None, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) output_data_container.summary_id = data_container.summary_id return output_data_container
def _fit_transform_data_container(self, data_container: DataContainer, context: ExecutionContext): """ Fit transform each step for each data inputs, and expected outputs :param data_container: data container to fit transform :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self, transformed_data_container """ output_data_container = ListDataContainer.empty() for current_id, di, eo in data_container: self.wrapped, output = self.wrapped.handle_fit_transform( DataContainer(current_ids=None, data_inputs=di, expected_outputs=eo), context) output_data_container.append(current_id, output.data_inputs, output.expected_outputs) output_data_container.summary_id = data_container.summary_id return self, output_data_container
def _create_expanded_data_container( self, data_container: DataContainer) -> ExpandedDataContainer: """ Create expanded data container. :param data_container: data container to expand :type data_container: DataContainer :return: expanded data container :rtype: ExpandedDataContainer """ current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) expanded_data_container = ExpandedDataContainer.create_from( data_container) return expanded_data_container
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Nullify wrapped step hyperparams, and don't transform the wrapped step. :param data_container: data container :param context: execution context :return: step, data_container """ if self.hyperparams[OPTIONAL_ENABLED_HYPERPARAM]: return self.wrapped.handle_transform(data_container, context) self._nullify_hyperparams() data_container.set_data_inputs(self.nullified_return_value) return DataContainer(data_inputs=self.nullified_return_value, current_ids=data_container.current_ids, expected_outputs=self.nullified_return_value)
def handle_fit(self, data_container: DataContainer, context: ExecutionContext) -> 'ReversiblePreprocessingWrapper': """ Handle fit by fitting preprocessing step, and postprocessing step. :param data_container: data container to fit on :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self, data_container :rtype: (ReversiblePreprocessingWrapper, DataContainer) """ self["preprocessing_step"], data_container = \ self["preprocessing_step"].handle_fit_transform(data_container, context.push(self["preprocessing_step"])) self["postprocessing_step"] = \ self["postprocessing_step"].handle_fit(data_container, context.push(self["postprocessing_step"])) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return self
def _did_process(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Reaugment the flattened data container. :param data_container: data container to then_unflatten :param context: execution context :return: data container """ data_container = super()._did_process(data_container, context) if self.then_unflatten: data_container.set_data_inputs( self._reaugment_list(data_container.data_inputs, self.len_di)) data_container.set_expected_outputs( self._reaugment_list(data_container.expected_outputs, self.len_eo)) self.len_di = [] self.len_eo = [] return data_container
def _fit_data_container( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): fitted_steps = [] for i, (current_ids, data_inputs, expected_outputs) in enumerate(data_container): fitted_step = self.steps[i].handle_fit( DataContainer(current_ids=current_ids, data_inputs=data_inputs, expected_outputs=expected_outputs), context) fitted_steps.append(fitted_step) self.steps = fitted_steps return self
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext): data_containers = list( filter( lambda dc: (len(dc.data_inputs) > 0 and len(dc.expected_outputs) > 0), data_container.data_inputs)) if len(data_containers) == 1: return data_containers[0] else: return DataContainer( data_inputs=list(map(attrgetter("data_inputs"))), expected_outputs=list(map(attrgetter("expected_outputs"))), current_ids=data_container.current_ids)
def _inverse_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: inverse_transform_results = [] for i, (current_ids, data_inputs, expected_outputs) in enumerate(data_container): inverse_transform_result = self[i].handle_inverse_transform( DataContainer(current_ids=current_ids, data_inputs=data_inputs, expected_outputs=expected_outputs), context) inverse_transform_results.append(inverse_transform_result) output_data_container = ListDataContainer.empty() for data_container_batch in inverse_transform_results: output_data_container.append_data_container(data_container_batch) return output_data_container
def _fit_data_container(self, data_container: DataContainer, context: ExecutionContext) -> BaseStep: """ Fit each step for each data inputs, and expected outputs :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: self """ for current_id, di, eo in data_container: self.wrapped = self.wrapped.handle_fit( DataContainer(data_inputs=di, current_ids=None, expected_outputs=eo), context) return self
def _fit_transform_data_container( self, data_container: DataContainer, context: ExecutionContext) -> ('BaseStep', DataContainer): fitted_steps_data_containers = [] for i, (current_ids, data_inputs, expected_outputs) in enumerate(data_container): fitted_step_data_container = self.steps[i].handle_fit_transform( DataContainer(current_ids=current_ids, data_inputs=data_inputs, expected_outputs=expected_outputs), context) fitted_steps_data_containers.append(fitted_step_data_container) self.steps = [step for step, _ in fitted_steps_data_containers] output_data_container = ListDataContainer.empty() for _, data_container_batch in fitted_steps_data_containers: output_data_container.append_data_container(data_container_batch) return self, output_data_container
def handle_fit_transform(self, data_container: DataContainer, context: ExecutionContext) -> ( 'BaseStep', DataContainer): """ Nullify wrapped step hyperparams, and don't fit_transform the wrapped step. :param data_container: data container :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: step, data_container :type: (BaseStep, DataContainer) """ if self.hyperparams[OPTIONAL_ENABLED_HYPERPARAM]: self.wrapped, data_container = self.wrapped.handle_fit_transform(data_container, context) return self, data_container self._nullify_hyperparams() return self, DataContainer(data_inputs=self.nullified_return_value, current_ids=data_container.current_ids, expected_outputs=self.nullified_return_value)
def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext): """ Handle transform. :param data_container: the data container to join :param context: execution context :return: transformed data container """ data_inputs = [ dc.data_inputs for dc in data_container.data_inputs if len(dc.data_inputs) > 0 ] if len(data_inputs) == 1: data_inputs = data_inputs[0] data_container = DataContainer( data_inputs=data_inputs, current_ids=data_container.current_ids, expected_outputs=data_container.expected_outputs) return data_container
def handle_transform(self, data_containers, context: ExecutionContext) -> DataContainer: """ Handle transform. :param data_containers: the data container to join :param context: execution context :return: transformed data container """ data_inputs = self.transform( [dc.data_inputs for dc in data_containers]) data_container = DataContainer( current_ids=data_containers[-1].current_ids, data_inputs=data_inputs, expected_outputs=data_containers[-1].expected_outputs) data_container.set_data_inputs(data_inputs) current_ids = self.hash(data_container) data_container.set_current_ids(current_ids) return data_container
def _will_process( self, data_container: DataContainer, context: ExecutionContext) -> (DataContainer, ExecutionContext): return data_container.to_numpy(), context
def _did_process(self, data_container: DataContainer, context: ExecutionContext): data_container = super()._did_process(data_container, context) return data_container.reduce_dim()