def handle_transform(self, data_container: DataContainer, context: ExecutionContext): """ Transform the data with the unions. It will make use of some parallel processing. :param data_container: data container :param context: execution context :return: the transformed data_inputs. """ if self.n_jobs != 1: data_containers = Parallel( backend=self.backend, n_jobs=self.n_jobs)(delayed(step.handle_transform)( data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple) else: data_containers = [ step.handle_transform(data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple ] new_current_ids = self.hash(data_container) data_container = self.joiner.handle_transform(data_containers, new_current_ids) return data_container
def handle_fit(self, data_container: DataContainer, context: ExecutionContext): """ Fit the parallel steps on the data. It will make use of some parallel processing. :param data_container: The input data to fit onto :param context: execution context :return: self """ # Actually fit: if self.n_jobs != 1: fitted_steps_data_containers = Parallel( backend=self.backend, n_jobs=self.n_jobs)(delayed(step.handle_fit)( data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple) else: fitted_steps_data_containers = [ step.handle_fit(data_container.copy(), context.push(step)) for _, step in self.steps_as_tuple ] # Save fitted steps for i, (fitted_step, _) in enumerate(fitted_steps_data_containers): self.steps_as_tuple[i] = (self.steps_as_tuple[i][0], fitted_step) self._refresh_steps() return self, data_container