def transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Transform data container :param data_container: data container to transform. :type data_container: DataContainer :param context: execution context :type context: ExecutionContext :return: data container """ data_container_batches = data_container.convolved_1d(stride=self.batch_size, kernel_size=self.batch_size) n_batches = self.get_n_batches(data_container) self[-1].set_n_batches(n_batches) for name, step in self[:-1]: step.start(context) batch_index = 0 for data_container_batch in data_container_batches: self.send_batch_to_queued_pipeline(batch_index=batch_index, data_container=data_container_batch) batch_index += 1 data_container = self[-1].join(original_data_container=data_container) return data_container
def join_fit_transform( self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> Tuple['Any', DataContainer]: """ Concatenate the pipeline fit transform output of each batch of self.batch_size together. :param step: pipeline to fit transform on :type step: Pipeline :param data_container: data container to fit transform on :type data_container: DataContainer :param context: execution context :return: fitted self, transformed data inputs :rtype: Tuple[Any, DataContainer] """ context = context.push(step) data_container_batches = data_container.convolved_1d( stride=self.batch_size, kernel_size=self.batch_size) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: step, data_container_batch = step._fit_transform_data_container( data_container_batch, context) output_data_container.concat(data_container_batch) return step, output_data_container
def join_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> DataContainer: """ Concatenate the pipeline transform output of each batch of self.batch_size together. :param step: pipeline to transform on :type step: Pipeline :param data_container: data container to transform :type data_container: DataContainer :param context: execution context :return: transformed data container :rtype: DataContainer """ context = context.push(step) data_container_batches = data_container.convolved_1d( stride=self.batch_size, kernel_size=self.batch_size ) output_data_container = ListDataContainer.empty() for data_container_batch in data_container_batches: output_data_container.concat( step._transform_data_container(data_container_batch, context) ) return output_data_container
def test_data_container_should_iterate_through_batches_using_convolved(): data_container = DataContainer(current_ids=[str(i) for i in range(100)], data_inputs=np.array(list(range(100))), expected_outputs=np.array( list(range(100, 200)))) batches = [] for b in data_container.convolved_1d(stride=10, kernel_size=10): batches.append(b) for i, batch in enumerate(batches): assert np.array_equal(np.array(batch.data_inputs), np.array(list(range(i * 10, (i * 10) + 10)))) assert np.array_equal( np.array(batch.expected_outputs), np.array(list(range((i * 10) + 100, (i * 10) + 100 + 10))))