def _check_data_format(self, x, y, validation=False, predict=False): """Check if the dataset has the same number of IOs with the model.""" if validation: in_val = ' in validation_data' else: in_val = '' if isinstance(x, tf.data.Dataset) and y is not None: raise ValueError('Expect y is None when x is ' 'tf.data.Dataset{in_val}.'.format(in_val=in_val)) if isinstance(x, tf.data.Dataset): if not predict: x_shapes, y_shapes = data_utils.dataset_shape(x) x_shapes = nest.flatten(x_shapes) y_shapes = nest.flatten(y_shapes) else: x_shapes = nest.flatten(data_utils.dataset_shape(x)) else: x_shapes = [a.shape for a in nest.flatten(x)] if not predict: y_shapes = [a.shape for a in nest.flatten(y)] if len(x_shapes) != len(self.inputs): raise ValueError('Expect x{in_val} to have {input_num} arrays, ' 'but got {data_num}'.format( in_val=in_val, input_num=len(self.inputs), data_num=len(x_shapes))) if not predict and len(y_shapes) != len(self.outputs): raise ValueError('Expect y{in_val} to have {output_num} arrays, ' 'but got {data_num}'.format( in_val=in_val, output_num=len(self.outputs), data_num=len(y_shapes)))
def fit_before_convert(self, dataset): # If in tf.data.Dataset, must be encoded already. if isinstance(dataset, tf.data.Dataset): if not self.num_classes: shape = data_utils.dataset_shape(dataset)[0] # Single column with 0s and 1s. if shape == 1: self.num_classes = 2 else: self.num_classes = shape return if isinstance(dataset, pd.DataFrame): dataset = dataset.values if isinstance(dataset, pd.Series): dataset = dataset.values.reshape(-1, 1) # Not label. if len(dataset.flatten()) != len(dataset): self.num_classes = dataset.shape[1] return labels = set(dataset.flatten()) if self.num_classes is None: self.num_classes = len(labels) if self.num_classes == 2: self.label_encoder = encoders.LabelEncoder() elif self.num_classes > 2: self.label_encoder = encoders.OneHotEncoder() elif self.num_classes < 2: raise ValueError( 'Expect the target data for {name} to have ' 'at least 2 classes, but got {num_classes}.'.format( name=self.name, num_classes=self.num_classes)) self.label_encoder.fit(dataset)
def fit(self, dataset): super().fit(dataset) shape = tuple(data_utils.dataset_shape(dataset).as_list()[1:]) # Infer the num_classes. if not self.num_classes: # Single column with 0s and 1s. if shape == (1, ): self.num_classes = 2 else: self.num_classes = shape[0] return # Compute expected shape from num_classes. if self.num_classes == 2 and not self.multi_label: expected = (1, ) else: expected = (self.num_classes, ) # Check shape equals expected shape. if shape != expected: raise ValueError('Expect the target data for {name} to have ' 'shape {expected}, but got {actual}.'.format( name=self.name, expected=expected, actual=shape))
def test_text_dataset_batch(): x = tf.data.Dataset.from_tensor_slices(np.array(["a b c", "b b c"])).batch(32) adapter = input_adapter.TextInputAdapter() x = adapter.transform(x) assert data_utils.dataset_shape(x).as_list() == [None, 1] assert isinstance(x, tf.data.Dataset)
def test_text_adapt_np(): x = np.array(["a b c", "b b c"]) adapter = input_adapters.TextAdapter() x = adapter.adapt(x, batch_size=32) assert data_utils.dataset_shape(x).as_list() == [None] assert isinstance(x, tf.data.Dataset)
def test_text_adapt_unbatched_dataset(): x = tf.data.Dataset.from_tensor_slices(np.array(["a b c", "b b c"])) adapter = input_adapters.TextAdapter() x = adapter.adapt(x, batch_size=32) assert data_utils.dataset_shape(x).as_list() == [None] assert isinstance(x, tf.data.Dataset)
def test_label_encoder_encode_to_correct_shape(): encoder = encoders.LabelEncoder(["a", "b"]) dataset = tf.data.Dataset.from_tensor_slices([["a"], ["b"]]).batch(32) result = encoder.transform(dataset) assert data_utils.dataset_shape(result).as_list() == [None, 1]
def _check_data_format(self, dataset, validation=False, predict=False): """Check if the dataset has the same number of IOs with the model.""" if validation: in_val = " in validation_data" if isinstance(dataset, tf.data.Dataset): x = dataset y = None else: x, y = dataset else: in_val = "" x, y = dataset if isinstance(x, tf.data.Dataset) and y is not None: raise ValueError( "Expected y to be None when x is " "tf.data.Dataset{in_val}.".format(in_val=in_val) ) if isinstance(x, tf.data.Dataset): if not predict: x_shapes, y_shapes = data_utils.dataset_shape(x) x_shapes = nest.flatten(x_shapes) y_shapes = nest.flatten(y_shapes) else: x_shapes = nest.flatten(data_utils.dataset_shape(x)) else: x_shapes = [a.shape for a in nest.flatten(x)] if not predict: y_shapes = [a.shape for a in nest.flatten(y)] if len(x_shapes) != len(self.inputs): raise ValueError( "Expected x{in_val} to have {input_num} arrays, " "but got {data_num}".format( in_val=in_val, input_num=len(self.inputs), data_num=len(x_shapes) ) ) if not predict and len(y_shapes) != len(self.outputs): raise ValueError( "Expected y{in_val} to have {output_num} arrays, " "but got {data_num}".format( in_val=in_val, output_num=len(self.outputs), data_num=len(y_shapes), ) )
def test_text_np(): x = np.array([ 'a b c', 'b b c', ]) adapter = input_adapter.TextInputAdapter() x = adapter.transform(x) assert data_utils.dataset_shape(x).as_list() == [None, 1] assert isinstance(x, tf.data.Dataset)
def _prepare_model_build(self, hp, dataset, validation_data=None): """Prepare for building the Keras model. It build the Pipeline from HyperPipeline, transform the dataset to set the input shapes and output shapes of the HyperModel. """ pipeline = self.hyper_pipeline.build(hp, dataset) pipeline.fit(dataset) dataset = pipeline.transform(dataset) self.hypermodel.hypermodel.set_io_shapes( data_utils.dataset_shape(dataset)) if validation_data is not None: validation_data = pipeline.transform(validation_data) return pipeline, dataset, validation_data
def _has_y(self, dataset): """Remove y from the tf.data.Dataset if exists.""" shapes = data_utils.dataset_shape(dataset) # Only one or less element in the first level. if len(shapes) <= 1: return False # The first level has more than 1 element. # The nest has 2 levels. for shape in shapes: if isinstance(shape, tuple): return True # The nest has one level. # It matches the single IO case. return len(shapes) == 2 and len(self.inputs) == 1 and len(self.outputs) == 1
def _get_x(self, dataset): """Remove y from the tf.data.Dataset if exists.""" shapes = data_utils.dataset_shape(dataset) # Only one or less element in the first level. if len(shapes) <= 1: return dataset.map(lambda *x: x[0]) # The first level has more than 1 element. # The nest has 2 levels. for shape in shapes: if isinstance(shape, tuple): return dataset.map(lambda x, y: x) # The nest has one level. # It matches the single IO case. if len(shapes) == 2 and len(self.inputs) == 1 and len(self.outputs) == 1: return dataset.map(lambda x, y: x) return dataset
def test_predict_tuple_x_and_tuple_y_call_model_predict_with_x( tuner_fn, tmp_path): model = mock.Mock() tuner = mock.Mock() tuner.get_best_model.return_value = model tuner_fn.return_value.return_value = tuner auto_model = ak.AutoModel(ak.ImageInput(), ak.RegressionHead(), directory=tmp_path) dataset = tf.data.Dataset.from_tensor_slices( ((np.random.rand(100, 32, 32, 3), ), (np.random.rand(100, 1), ))) auto_model.fit(dataset) auto_model.predict(dataset) assert data_utils.dataset_shape( model.predict.call_args_list[0][0][0]).as_list() == [None, 32, 32, 3]
def _prepare_model_build(self, hp, **kwargs): """Prepare for building the Keras model. It build the Pipeline from HyperPipeline, transform the dataset to set the input shapes and output shapes of the HyperModel. """ dataset = kwargs["x"] pipeline = self.hyper_pipeline.build(hp, dataset) pipeline.fit(dataset) dataset = pipeline.transform(dataset) self.hypermodel.hypermodel.set_io_shapes(data_utils.dataset_shape(dataset)) if "validation_data" in kwargs: validation_data = pipeline.transform(kwargs["validation_data"]) else: validation_data = None return pipeline, dataset, validation_data
def test_time_series_input_transform(): dataset = tf.data.Dataset.from_tensor_slices(np.random.rand(100, 32)).batch(32) preprocessor = common.SlidingWindow(lookback=2, batch_size=32) x = preprocessor.transform(dataset) assert data_utils.dataset_shape(x).as_list() == [None, 2, 32]
def _record_dataset_shape(self, dataset): self.shape = data_utils.dataset_shape(dataset)[1:].as_list()
def convert_to_dataset(self, x): x = super().convert_to_dataset(x) shape = data_utils.dataset_shape(x) if len(shape) == 1: x = x.map(lambda a: tf.reshape(a, [-1, 1])) return x
def record_dataset_shape(self, dataset): self.shape = data_utils.dataset_shape(dataset)
def test_unzip_dataset_doesnt_unzip_single_dataset(): dataset = tf.data.Dataset.from_tensor_slices(np.random.rand(10, 32, 2)) dataset = data_utils.unzip_dataset(dataset)[0] dataset = data_utils.unzip_dataset(dataset)[0] assert data_utils.dataset_shape(dataset).as_list() == [32, 2]
def test_multi_label_two_classes_has_two_columns(): adapter = output_adapter.ClassificationHeadAdapter(name="a", multi_label=True) y = adapter.fit_transform(np.random.rand(10, 2)) assert data_utils.dataset_shape(y).as_list() == [None, 2]