示例#1
0
 def prepare_data(self, x, y, validation_data, validation_split):
     # Initialize HyperGraph model
     x = nest.flatten(x)
     y = nest.flatten(y)
     # TODO: check x, y types to be numpy.ndarray or tf.data.Dataset.
     # TODO: y.reshape(-1, 1) if needed.
     y = self._label_encoding(y)
     # Split the data with validation_split
     if (all([isinstance(temp_x, np.ndarray) for temp_x in x])
             and all([isinstance(temp_y, np.ndarray) for temp_y in y])
             and validation_data is None and validation_split):
         (x,
          y), (x_val,
               y_val) = utils.split_train_to_valid(x, y, validation_split)
         validation_data = x_val, y_val
     # TODO: Handle other types of input, zip dataset, tensor, dict.
     # Prepare the dataset
     dataset = x if isinstance(x, tf.data.Dataset) \
         else utils.prepare_preprocess(x, y)
     if not isinstance(validation_data, tf.data.Dataset):
         x_val, y_val = validation_data
         validation_data = utils.prepare_preprocess(x_val, y_val)
     return dataset, validation_data
示例#2
0
    def fit(self,
            x=None,
            y=None,
            validation_split=0,
            validation_data=None,
            **kwargs):
        """Search for the best model and hyperparameters for the AutoModel.

        It will search for the best model based on the performances on
        validation data.

        Args:
            x: numpy.ndarray or tensorflow.Dataset. Training data x.
            y: numpy.ndarray or tensorflow.Dataset. Training data y.
            validation_split: Float between 0 and 1.
                Fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate
                the loss and any model metrics
                on this data at the end of each epoch.
                The validation data is selected from the last samples
                in the `x` and `y` data provided, before shuffling. This argument is
                not supported when `x` is a dataset.
            validation_data: Data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
                `validation_data` will override `validation_split`.
                `validation_data` could be:
                  - tuple `(x_val, y_val)` of Numpy arrays or tensors
                  - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
                  - dataset or a dataset iterator
                For the first two cases, `batch_size` must be provided.
                For the last case, `validation_steps` must be provided.
        """
        # Initialize HyperGraph model
        x = nest.flatten(x)
        y = nest.flatten(y)

        y = self._label_encoding(y)
        # TODO: Set the shapes only if they are not provided by the user when
        #  initiating the HyperHead or Block.
        for y_input, output_node in zip(y, self.outputs):
            if len(y_input.shape) == 1:
                y_input = np.reshape(y_input, y_input.shape + (1, ))
            output_node.shape = y_input.shape[1:]
            output_node.in_hypermodels[0].output_shape = output_node.shape

        # Split the data with validation_split
        if (all([isinstance(temp_x, np.ndarray) for temp_x in x])
                and all([isinstance(temp_y, np.ndarray) for temp_y in y])
                and validation_data is None and validation_split):
            (x,
             y), (x_val,
                  y_val) = utils.split_train_to_valid(x, y, validation_split)
            validation_data = x_val, y_val

        # TODO: Handle other types of input, zip dataset, tensor, dict.
        # Prepare the dataset
        x, y, validation_data = utils.prepare_preprocess(x, y, validation_data)

        self.preprocess(hp=kerastuner.HyperParameters(),
                        x=x,
                        y=y,
                        validation_data=validation_data,
                        fit=True)
        self.tuner = tuner.RandomSearch(hypermodel=self,
                                        objective='val_loss',
                                        max_trials=self.max_trials,
                                        directory=self.directory)

        # TODO: allow early stop if epochs is not specified.
        self.tuner.search(x=x, y=y, validation_data=validation_data, **kwargs)