示例#1
0
    def _check_data_format(self, x, y, validation=False, predict=False):
        """Check if the dataset has the same number of IOs with the model."""
        if validation:
            in_val = ' in validation_data'
        else:
            in_val = ''

        if isinstance(x, tf.data.Dataset) and y is not None:
            raise ValueError('Expect y is None when x is '
                             'tf.data.Dataset{in_val}.'.format(in_val=in_val))

        if isinstance(x, tf.data.Dataset):
            if not predict:
                x_shapes, y_shapes = data_utils.dataset_shape(x)
                x_shapes = nest.flatten(x_shapes)
                y_shapes = nest.flatten(y_shapes)
            else:
                x_shapes = nest.flatten(data_utils.dataset_shape(x))
        else:
            x_shapes = [a.shape for a in nest.flatten(x)]
            if not predict:
                y_shapes = [a.shape for a in nest.flatten(y)]

        if len(x_shapes) != len(self.inputs):
            raise ValueError('Expect x{in_val} to have {input_num} arrays, '
                             'but got {data_num}'.format(
                                 in_val=in_val,
                                 input_num=len(self.inputs),
                                 data_num=len(x_shapes)))
        if not predict and len(y_shapes) != len(self.outputs):
            raise ValueError('Expect y{in_val} to have {output_num} arrays, '
                             'but got {data_num}'.format(
                                 in_val=in_val,
                                 output_num=len(self.outputs),
                                 data_num=len(y_shapes)))
示例#2
0
 def fit_before_convert(self, dataset):
     # If in tf.data.Dataset, must be encoded already.
     if isinstance(dataset, tf.data.Dataset):
         if not self.num_classes:
             shape = data_utils.dataset_shape(dataset)[0]
             # Single column with 0s and 1s.
             if shape == 1:
                 self.num_classes = 2
             else:
                 self.num_classes = shape
         return
     if isinstance(dataset, pd.DataFrame):
         dataset = dataset.values
     if isinstance(dataset, pd.Series):
         dataset = dataset.values.reshape(-1, 1)
     # Not label.
     if len(dataset.flatten()) != len(dataset):
         self.num_classes = dataset.shape[1]
         return
     labels = set(dataset.flatten())
     if self.num_classes is None:
         self.num_classes = len(labels)
     if self.num_classes == 2:
         self.label_encoder = encoders.LabelEncoder()
     elif self.num_classes > 2:
         self.label_encoder = encoders.OneHotEncoder()
     elif self.num_classes < 2:
         raise ValueError(
             'Expect the target data for {name} to have '
             'at least 2 classes, but got {num_classes}.'.format(
                 name=self.name, num_classes=self.num_classes))
     self.label_encoder.fit(dataset)
示例#3
0
    def fit(self, dataset):
        super().fit(dataset)
        shape = tuple(data_utils.dataset_shape(dataset).as_list()[1:])
        # Infer the num_classes.
        if not self.num_classes:
            # Single column with 0s and 1s.
            if shape == (1, ):
                self.num_classes = 2
            else:
                self.num_classes = shape[0]
            return

        # Compute expected shape from num_classes.
        if self.num_classes == 2 and not self.multi_label:
            expected = (1, )
        else:
            expected = (self.num_classes, )

        # Check shape equals expected shape.
        if shape != expected:
            raise ValueError('Expect the target data for {name} to have '
                             'shape {expected}, but got {actual}.'.format(
                                 name=self.name,
                                 expected=expected,
                                 actual=shape))
示例#4
0
def test_text_dataset_batch():
    x = tf.data.Dataset.from_tensor_slices(np.array(["a b c",
                                                     "b b c"])).batch(32)
    adapter = input_adapter.TextInputAdapter()
    x = adapter.transform(x)
    assert data_utils.dataset_shape(x).as_list() == [None, 1]
    assert isinstance(x, tf.data.Dataset)
示例#5
0
def test_text_adapt_np():
    x = np.array(["a b c", "b b c"])
    adapter = input_adapters.TextAdapter()
    x = adapter.adapt(x, batch_size=32)

    assert data_utils.dataset_shape(x).as_list() == [None]
    assert isinstance(x, tf.data.Dataset)
示例#6
0
def test_text_adapt_unbatched_dataset():
    x = tf.data.Dataset.from_tensor_slices(np.array(["a b c", "b b c"]))
    adapter = input_adapters.TextAdapter()
    x = adapter.adapt(x, batch_size=32)

    assert data_utils.dataset_shape(x).as_list() == [None]
    assert isinstance(x, tf.data.Dataset)
示例#7
0
def test_label_encoder_encode_to_correct_shape():
    encoder = encoders.LabelEncoder(["a", "b"])
    dataset = tf.data.Dataset.from_tensor_slices([["a"], ["b"]]).batch(32)

    result = encoder.transform(dataset)

    assert data_utils.dataset_shape(result).as_list() == [None, 1]
示例#8
0
    def _check_data_format(self, dataset, validation=False, predict=False):
        """Check if the dataset has the same number of IOs with the model."""
        if validation:
            in_val = " in validation_data"
            if isinstance(dataset, tf.data.Dataset):
                x = dataset
                y = None
            else:
                x, y = dataset
        else:
            in_val = ""
            x, y = dataset

        if isinstance(x, tf.data.Dataset) and y is not None:
            raise ValueError(
                "Expected y to be None when x is "
                "tf.data.Dataset{in_val}.".format(in_val=in_val)
            )

        if isinstance(x, tf.data.Dataset):
            if not predict:
                x_shapes, y_shapes = data_utils.dataset_shape(x)
                x_shapes = nest.flatten(x_shapes)
                y_shapes = nest.flatten(y_shapes)
            else:
                x_shapes = nest.flatten(data_utils.dataset_shape(x))
        else:
            x_shapes = [a.shape for a in nest.flatten(x)]
            if not predict:
                y_shapes = [a.shape for a in nest.flatten(y)]

        if len(x_shapes) != len(self.inputs):
            raise ValueError(
                "Expected x{in_val} to have {input_num} arrays, "
                "but got {data_num}".format(
                    in_val=in_val, input_num=len(self.inputs), data_num=len(x_shapes)
                )
            )
        if not predict and len(y_shapes) != len(self.outputs):
            raise ValueError(
                "Expected y{in_val} to have {output_num} arrays, "
                "but got {data_num}".format(
                    in_val=in_val,
                    output_num=len(self.outputs),
                    data_num=len(y_shapes),
                )
            )
示例#9
0
def test_text_np():
    x = np.array([
        'a b c',
        'b b c',
    ])
    adapter = input_adapter.TextInputAdapter()
    x = adapter.transform(x)
    assert data_utils.dataset_shape(x).as_list() == [None, 1]
    assert isinstance(x, tf.data.Dataset)
示例#10
0
    def _prepare_model_build(self, hp, dataset, validation_data=None):
        """Prepare for building the Keras model.

        It build the Pipeline from HyperPipeline, transform the dataset to set
        the input shapes and output shapes of the HyperModel.
        """
        pipeline = self.hyper_pipeline.build(hp, dataset)
        pipeline.fit(dataset)
        dataset = pipeline.transform(dataset)
        self.hypermodel.hypermodel.set_io_shapes(
            data_utils.dataset_shape(dataset))
        if validation_data is not None:
            validation_data = pipeline.transform(validation_data)
        return pipeline, dataset, validation_data
示例#11
0
 def _has_y(self, dataset):
     """Remove y from the tf.data.Dataset if exists."""
     shapes = data_utils.dataset_shape(dataset)
     # Only one or less element in the first level.
     if len(shapes) <= 1:
         return False
     # The first level has more than 1 element.
     # The nest has 2 levels.
     for shape in shapes:
         if isinstance(shape, tuple):
             return True
     # The nest has one level.
     # It matches the single IO case.
     return len(shapes) == 2 and len(self.inputs) == 1 and len(self.outputs) == 1
示例#12
0
 def _get_x(self, dataset):
     """Remove y from the tf.data.Dataset if exists."""
     shapes = data_utils.dataset_shape(dataset)
     # Only one or less element in the first level.
     if len(shapes) <= 1:
         return dataset.map(lambda *x: x[0])
     # The first level has more than 1 element.
     # The nest has 2 levels.
     for shape in shapes:
         if isinstance(shape, tuple):
             return dataset.map(lambda x, y: x)
     # The nest has one level.
     # It matches the single IO case.
     if len(shapes) == 2 and len(self.inputs) == 1 and len(self.outputs) == 1:
         return dataset.map(lambda x, y: x)
     return dataset
示例#13
0
def test_predict_tuple_x_and_tuple_y_call_model_predict_with_x(
        tuner_fn, tmp_path):
    model = mock.Mock()
    tuner = mock.Mock()
    tuner.get_best_model.return_value = model
    tuner_fn.return_value.return_value = tuner

    auto_model = ak.AutoModel(ak.ImageInput(),
                              ak.RegressionHead(),
                              directory=tmp_path)
    dataset = tf.data.Dataset.from_tensor_slices(
        ((np.random.rand(100, 32, 32, 3), ), (np.random.rand(100, 1), )))
    auto_model.fit(dataset)
    auto_model.predict(dataset)

    assert data_utils.dataset_shape(
        model.predict.call_args_list[0][0][0]).as_list() == [None, 32, 32, 3]
示例#14
0
    def _prepare_model_build(self, hp, **kwargs):
        """Prepare for building the Keras model.

        It build the Pipeline from HyperPipeline, transform the dataset to set
        the input shapes and output shapes of the HyperModel.
        """
        dataset = kwargs["x"]
        pipeline = self.hyper_pipeline.build(hp, dataset)
        pipeline.fit(dataset)
        dataset = pipeline.transform(dataset)
        self.hypermodel.hypermodel.set_io_shapes(data_utils.dataset_shape(dataset))

        if "validation_data" in kwargs:
            validation_data = pipeline.transform(kwargs["validation_data"])
        else:
            validation_data = None
        return pipeline, dataset, validation_data
示例#15
0
def test_time_series_input_transform():
    dataset = tf.data.Dataset.from_tensor_slices(np.random.rand(100,
                                                                32)).batch(32)
    preprocessor = common.SlidingWindow(lookback=2, batch_size=32)
    x = preprocessor.transform(dataset)
    assert data_utils.dataset_shape(x).as_list() == [None, 2, 32]
示例#16
0
 def _record_dataset_shape(self, dataset):
     self.shape = data_utils.dataset_shape(dataset)[1:].as_list()
示例#17
0
 def convert_to_dataset(self, x):
     x = super().convert_to_dataset(x)
     shape = data_utils.dataset_shape(x)
     if len(shape) == 1:
         x = x.map(lambda a: tf.reshape(a, [-1, 1]))
     return x
示例#18
0
 def record_dataset_shape(self, dataset):
     self.shape = data_utils.dataset_shape(dataset)
示例#19
0
def test_unzip_dataset_doesnt_unzip_single_dataset():
    dataset = tf.data.Dataset.from_tensor_slices(np.random.rand(10, 32, 2))
    dataset = data_utils.unzip_dataset(dataset)[0]
    dataset = data_utils.unzip_dataset(dataset)[0]
    assert data_utils.dataset_shape(dataset).as_list() == [32, 2]
def test_multi_label_two_classes_has_two_columns():
    adapter = output_adapter.ClassificationHeadAdapter(name="a", multi_label=True)

    y = adapter.fit_transform(np.random.rand(10, 2))

    assert data_utils.dataset_shape(y).as_list() == [None, 2]