def prepare_data(self, x, y, validation_data, validation_split): # Initialize HyperGraph model x = nest.flatten(x) y = nest.flatten(y) # TODO: check x, y types to be numpy.ndarray or tf.data.Dataset. # TODO: y.reshape(-1, 1) if needed. y = self._label_encoding(y) # Split the data with validation_split if (all([isinstance(temp_x, np.ndarray) for temp_x in x]) and all([isinstance(temp_y, np.ndarray) for temp_y in y]) and validation_data is None and validation_split): (x, y), (x_val, y_val) = utils.split_train_to_valid(x, y, validation_split) validation_data = x_val, y_val # TODO: Handle other types of input, zip dataset, tensor, dict. # Prepare the dataset dataset = x if isinstance(x, tf.data.Dataset) \ else utils.prepare_preprocess(x, y) if not isinstance(validation_data, tf.data.Dataset): x_val, y_val = validation_data validation_data = utils.prepare_preprocess(x_val, y_val) return dataset, validation_data
def fit(self, x=None, y=None, validation_split=0, validation_data=None, **kwargs): """Search for the best model and hyperparameters for the AutoModel. It will search for the best model based on the performances on validation data. Args: x: numpy.ndarray or tensorflow.Dataset. Training data x. y: numpy.ndarray or tensorflow.Dataset. Training data y. validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. The validation data is selected from the last samples in the `x` and `y` data provided, before shuffling. This argument is not supported when `x` is a dataset. validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. `validation_data` could be: - tuple `(x_val, y_val)` of Numpy arrays or tensors - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - dataset or a dataset iterator For the first two cases, `batch_size` must be provided. For the last case, `validation_steps` must be provided. """ # Initialize HyperGraph model x = nest.flatten(x) y = nest.flatten(y) y = self._label_encoding(y) # TODO: Set the shapes only if they are not provided by the user when # initiating the HyperHead or Block. for y_input, output_node in zip(y, self.outputs): if len(y_input.shape) == 1: y_input = np.reshape(y_input, y_input.shape + (1, )) output_node.shape = y_input.shape[1:] output_node.in_hypermodels[0].output_shape = output_node.shape # Split the data with validation_split if (all([isinstance(temp_x, np.ndarray) for temp_x in x]) and all([isinstance(temp_y, np.ndarray) for temp_y in y]) and validation_data is None and validation_split): (x, y), (x_val, y_val) = utils.split_train_to_valid(x, y, validation_split) validation_data = x_val, y_val # TODO: Handle other types of input, zip dataset, tensor, dict. # Prepare the dataset x, y, validation_data = utils.prepare_preprocess(x, y, validation_data) self.preprocess(hp=kerastuner.HyperParameters(), x=x, y=y, validation_data=validation_data, fit=True) self.tuner = tuner.RandomSearch(hypermodel=self, objective='val_loss', max_trials=self.max_trials, directory=self.directory) # TODO: allow early stop if epochs is not specified. self.tuner.search(x=x, y=y, validation_data=validation_data, **kwargs)