def _transform_data_container(self, data_container: DataContainer, context: ExecutionContext) -> DataContainer: if any(not isinstance(di, DataContainer) for di in data_container.data_inputs): raise ValueError("data_inputs given to ZipFeatures must be a list of DataContainer instances") data_container = ZipDataContainer.create_from(*data_container.data_inputs) if self.concatenate_inner_features: data_container.concatenate_inner_features() return data_container
def join_transform(self, step: TruncableSteps, data_container: DataContainer, context: ExecutionContext) -> ZipDataContainer: context = context.push(step) data_container_batches = data_container.minibatches( batch_size=self.batch_size, keep_incomplete_batch=self.keep_incomplete_batch, default_value_data_inputs=self.default_value_data_inputs, default_value_expected_outputs=self.default_value_expected_outputs ) output_data_container = [] for data_container_batch in data_container_batches: output_data_container.append(step._transform_data_container(data_container_batch, context)) return ZipDataContainer.create_from(*output_data_container)
def test_zip_data_container_should_merge_two_data_sources_together(): data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10)) data_container_2d = DataContainer(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d) assert zip_data_container.current_ids == data_container.current_ids for i, di in enumerate(zip_data_container.data_inputs): assert np.array_equal(di[0], data_inputs_3d[i]) assert np.array_equal(di[1], data_inputs_2d[i])
def join_fit_transform(self, step: Pipeline, data_container: DataContainer, context: ExecutionContext) -> \ Tuple['Any', DataContainer]: context = context.push(step) data_container_batches = data_container.minibatches( batch_size=self.batch_size, keep_incomplete_batch=self.keep_incomplete_batch, default_value_data_inputs=self.default_value_data_inputs, default_value_expected_outputs=self.default_value_expected_outputs ) output_data_container = [] for data_container_batch in data_container_batches: step, data_container_batch = step._fit_transform_data_container(data_container_batch, context) output_data_container.append(data_container_batch) return step, ZipDataContainer.create_from(*output_data_container)
def test_zip_data_container_should_concatenate_inner_features(): data_inputs_3d, expected_outputs_3d = _create_data_source((10, 10, 2)) data_inputs_2d, expected_outputs_2d = _create_data_source((10, 10)) data_container_2d = DataContainer(data_inputs=data_inputs_2d, expected_outputs=expected_outputs_2d) data_container = DataContainer(data_inputs=data_inputs_3d, expected_outputs=expected_outputs_3d) zip_data_container = ZipDataContainer.create_from(data_container, data_container_2d) zip_data_container.concatenate_inner_features() assert np.array_equal( np.array(zip_data_container.data_inputs)[..., -1], data_container_2d.data_inputs) assert np.array_equal(np.array(zip_data_container.expected_outputs), expected_outputs_3d)