def main(path: list, xy: list, *args, **kwargs): """ :meta private: """ extension = lambda path: path.split(".")[-1] if len(path) == 1: if extension(path[0]) == "csv": plot_for_single_csv(path[0], xy) elif extension(path[0]) == "json": plot_for_single_json(path[0], xy) else: raise DeephyperRuntimeError( f"Extension of input file '{extension(path[0])}' is not yet supported." ) else: # Comparing multiple results.csv files (different search experiments) if all([extension(p) == "csv" for p in path]): raise DeephyperRuntimeError( "Comparison of multiple experiments is not yet supported.") # Comparing multiple history.json files (different neural networks) elif all([extension(p) == "json" for p in path]): plot_for_multiple_json(path, xy) else: raise DeephyperRuntimeError( "Multiple input files should all have the same extension '.csv' or '.json'" )
def main(path: list, xy: list, *args, **kwargs): if len(path) == 1: input_extension = path[0].split(".")[-1] if input_extension == "csv": plot_for_csv(path[0], xy) else: raise DeephyperRuntimeError(f"Extension of input file '{input_extension}' is not yet supported.") else: raise DeephyperRuntimeError("Multiple input files not yet supported for quickplot.")
def notebook_for_hps(path: list, output: str) -> None: output_file = "dh-analytics-hps.ipynb" if len(output) == 0 else output if len(path) == 1: hps.hps_analytics(path, output_file) else: raise DeephyperRuntimeError("Comparative analytics for HPS is not available yet!") # TODO
def preprocess_data(self): if self.data_config_type == "gen": return if not self.preprocessor is None: raise DeephyperRuntimeError("You can only preprocess data one time.") if self.preprocessing_func: logger.debug(f"preprocess_data with: {str(self.preprocessing_func)}") if len(np.shape(self.train_Y)) == 2: data_train = np.concatenate((*self.train_X, self.train_Y), axis=1) data_valid = np.concatenate((*self.valid_X, self.valid_Y), axis=1) data = np.concatenate((data_train, data_valid), axis=0) self.preprocessor = self.preprocessing_func() dt_shp = np.shape(data_train) tX_shp = [np.shape(x) for x in self.train_X] preproc_data = self.preprocessor.fit_transform(data) acc, self.train_X = 0, list() for shp in tX_shp: self.train_X.append(preproc_data[: dt_shp[0], acc : acc + shp[1]]) acc += shp[1] self.train_Y = preproc_data[: dt_shp[0], acc:] acc, self.valid_X = 0, list() for shp in tX_shp: self.valid_X.append(preproc_data[dt_shp[0] :, acc : acc + shp[1]]) acc += shp[1] self.valid_Y = preproc_data[dt_shp[0] :, acc:] else: logger.info("no preprocessing function")
def model_predict(model_path, X, batch_size=32, verbose=0): """Perform an inference of the model located at ``model_path``. :meta private: Args: model_path (str): Path to the ``h5`` file to load to perform the inferencec. X (array): array of input data for which we perform the inference. batch_size (int, optional): Batch size used to perform the inferencec. Defaults to 32. verbose (int, optional): Verbose option. Defaults to 0. Returns: array: The prediction based on the provided input data. """ import tensorflow as tf import tensorflow_probability as tfp # GPU Configuration if available set_memory_growth_for_visible_gpus(True) tf.keras.backend.clear_session() model_file = model_path.split("/")[-1] try: if verbose: print(f"Loading model {model_file}", end="\n", flush=True) model = tf.keras.models.load_model(model_path, compile=False) except: if verbose: print(f"Could not load model {model_file}", flush=True) traceback.print_exc() model = None dataset = tf.data.Dataset.from_tensor_slices(X) dataset = dataset.batch(batch_size) def batch_predict(dataset, convert_func=lambda x: x): y_list = [] for batch in dataset: y = model(batch, training=False) y_list.append(convert_func(y)) y = np.concatenate(y_list, axis=0) return y if model: y_dist = model(X[:1], training=False) # just to test the type of the output if isinstance(y_dist, tfp.distributions.Distribution): if hasattr(y_dist, "loc") and hasattr(y_dist, "scale"): convert_func = lambda y_dist: np.concatenate( [y_dist.loc, y_dist.scale], axis=1) y = batch_predict(dataset, convert_func) else: raise DeephyperRuntimeError( f"Distribution doesn't have 'loc' or 'scale' attributes!") else: y = model.predict(X, batch_size=batch_size) else: y = None return y
def plot_for_single_csv(path: str, xy: list): """Generate a plot from a single CSV file. :meta private: Args: path (str): Path to the CSV file. xy (list): If empty ``list`` then it will use ``"elapsed_sec"`` for x-axis and ``"objective"`` for the y-axis. Raises: DeephyperRuntimeError: if only 1 or more than 2 arguments are provided. """ if len(xy) == 0: xy = ["elapsed_sec", "objective"] elif len(xy) != 2: raise DeephyperRuntimeError( "--xy must take two arguments such as '--xy elapsed_sec objective'" ) df = pd.read_csv(path) plt.figure() plt.scatter(df[xy[0]], df[xy[1]], s=5, alpha=1.0) plt.xlabel(xy[0]) plt.ylabel(xy[1]) plt.grid() plt.tight_layout() plt.show()
def _setup_optimizer(self): optimizer_fn = U.selectOptimizer_keras(self.optimizer_name) opti_parameters = signature(optimizer_fn).parameters params = {} if "lr" in opti_parameters: params["lr"] = self.learning_rate elif "learning_rate" in opti_parameters: params["learning_rate"] = self.learning_rate else: raise DeephyperRuntimeError( f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}" ) if "epsilon" in opti_parameters: params["epsilon"] = self.optimizer_eps if "momentum" in opti_parameters: params["momentum"] = self.momentum if "nesterov" in opti_parameters: params["nesterov"] = self.nesterov self.optimizer = optimizer_fn(**params)
def create(run_function, method="subprocess", method_kwargs={}): """Create evaluator with a specific backend and configuration. Args: run_function (function): the function to execute in parallel. method (str, optional): the backend to use in ["thread", "process", "subprocess", "ray"]. Defaults to "subprocess". method_kwargs (dict, optional): configuration dictionnary of the corresponding backend. Keys corresponds to the keyword arguments of the corresponding implementation. Defaults to "{}". Raises: DeephyperRuntimeError: if the ``method is`` not acceptable. Returns: Evaluator: the ``Evaluator`` with the corresponding backend and configuration. """ if not method in EVALUATORS.keys(): val = ", ".join(EVALUATORS) raise DeephyperRuntimeError( f'The method "{method}" is not a valid method for an Evaluator!' f" Choose among the following evalutor types: " f"{val}.") # create the evaluator mod_name, attr_name = EVALUATORS[method].split(".") mod = importlib.import_module(f"deephyper.evaluator.{mod_name}") eval_cls = getattr(mod, attr_name) evaluator = eval_cls(run_function, **method_kwargs) return evaluator
def op(self): if self.num_ops != self.node.num_ops: raise DeephyperRuntimeError( f"{str(self)} and {str(self.node)} should have the same number of opertions, when {str(self)} has {self.num_ops} and {str(self.node)} has {self.node.num_ops}!" ) else: return self._ops[self.node._index]
def output_best_configuration_from_df(df: str, output: str, k: int, **kwargs) -> None: """Output the configuration based on the maximal objective found in the CSV input file. :meta private: Args: df (DataFrame): a Pandas DataFrame. output (str): Path of the output file ending in (.csv|.yaml|.json). k (int): Number of configuration to output. """ df = df.sort_values(by=["objective"], ascending=False, ignore_index=True) subdf = df.iloc[:k] if len(output) == 0: print(yaml.dump(json.loads(subdf.to_json(orient="index")))) else: output_extension = output.split(".")[-1] if output_extension == "yaml": with open(output, "w") as f: yaml.dump(json.loads(subdf.to_json(orient="index")), f) elif output_extension == "csv": subdf.to_csv(output) elif output_extension == "json": subdf.to_json(output, orient="index") else: raise DeephyperRuntimeError( f"The specified output extension is not supported: {output_extension}" )
def notebook_for_posttrain(path: list, output: str) -> None: output_file = "dh-analytics-posttrain.ipynb" if len(output) == 0 else output if len(path) == 1: post_train.post_train_analytics(path, output_file) else: raise DeephyperRuntimeError("Comparative analytics for Post-Train is not available yet!") # TODO
def __init__( self, run_function, cache_key=None, encoder=Encoder, seed=None, num_workers=None, **kwargs, ): self.encoder = encoder # dict --> uuid self.pending_evals = {} # uid --> Future self.finished_evals = OrderedDict() # uid --> scalar self.requested_evals = [] # keys self.key_uid_map = {} # map keys to uids self.uid_key_map = {} # map uids to keys self.seed = seed self.seed_high = 2**32 # exclusive self.stats = {"num_cache_used": 0} self.transaction_context = dummy_context self._start_sec = time.time() self.elapsed_times = {} self._run_function = run_function self.num_workers = num_workers if (cache_key is not None) and (cache_key != "to_dict"): if callable(cache_key): self._gen_uid = cache_key elif cache_key == "uuid": self._gen_uid = lambda d: uuid.uuid4() else: raise DeephyperRuntimeError( 'The "cache_key" parameter of an Evaluator must be a callable!' ) else: self._gen_uid = lambda d: self.encode(d) moduleName = self._run_function.__module__ logger.info( f'moduleName == {moduleName} run_function = {self._run_function}') if moduleName == "__main__": raise DeephyperRuntimeError( f'Evaluator will not execute function " {run_function.__name__}" because it is in the __main__ module. Please provide a function imported from an external module!' )
def output_best_configuration(path: str, output: str, k: int, **kwargs) -> None: """Output the configuration based on the maximal objective found in the CSV input file. Args: path (str): Path of the CSV input file. output (str): Path of the output file ending in (.csv|.yaml|.json). k (int): Number of configuration to output. """ input_extension = path.split(".")[-1] if input_extension == "csv": df = pd.read_csv(path) df = df.sort_values(by=["objective"], ascending=False, ignore_index=True) subdf = df.iloc[:k] if not ("arch_seq" in subdf.columns): subdf = pd.DataFrame({ "arch_seq": [str(list(el)) for el in subdf.to_numpy()[:, :-2].astype(int)], "objective": subdf.objective.tolist(), "elapsed_sec": subdf.elapsed_sec.tolist() }) if len(output) == 0: print(yaml.dump(json.loads(subdf.to_json(orient="index")))) else: output_extension = output.split(".")[-1] if output_extension == "yaml": with open(output, "w") as f: yaml.dump(json.loads(subdf.to_json(orient="index")), f) elif output_extension == "csv": subdf.to_csv(output) elif output_extension == "json": subdf.to_json(output, orient="index") else: raise DeephyperRuntimeError( f"The specified output extension is not supported: {output_extension}" ) else: raise DeephyperRuntimeError( f"The specified input file extension '{input_extension}' is not supported." )
def check_op_list(self, space: KSearchSpace, ops: list) -> list: if len(ops) == 0: ops = [random.random() for _ in range(space.num_nodes)] else: if not (len(ops) == space.num_nodes): raise DeephyperRuntimeError( f"The argument list 'ops' should be of length {space.num_nodes} but is {len(ops)}!" ) return ops
def main(type: str, path: list, *args, **kwargs) -> None: if type == "hps": notebook_for_hps(path, **kwargs) elif type == "nas": notebook_for_nas(path, **kwargs) elif type == "posttrain": notebook_for_posttrain(path, **kwargs) else: raise DeephyperRuntimeError(f"The notebook TYPE '{type}' passed is not supported.")
def create(run_function, cache_key=None, method="subprocess", redis_address=None, **kwargs): available_methods = [ "balsam", "subprocess", "processPool", "threadPool", "__mpiPool", "ray", ] if not method in available_methods: raise DeephyperRuntimeError( f'The method "{method}" is not a valid method for an Evaluator!' ) if method == "balsam": from deephyper.evaluator._balsam import BalsamEvaluator Eval = BalsamEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "subprocess": from deephyper.evaluator._subprocess import SubprocessEvaluator Eval = SubprocessEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "processPool": from deephyper.evaluator._processPool import ProcessPoolEvaluator Eval = ProcessPoolEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "threadPool": from deephyper.evaluator._threadPool import ThreadPoolEvaluator Eval = ThreadPoolEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "__mpiPool": from deephyper.evaluator._mpiWorkerPool import MPIWorkerPool Eval = MPIWorkerPool(run_function, cache_key=cache_key, **kwargs) elif method == "ray": from deephyper.evaluator.ray_evaluator import RayEvaluator Eval = RayEvaluator(run_function, cache_key=cache_key, redis_address=redis_address, **kwargs) return Eval
def predict(self, dataset: str = "valid", keep_normalize: bool = False) -> tuple: """[summary] Args: dataset (str, optional): 'valid' or 'train'. Defaults to 'valid'. keep_normalize (bool, optional): if False then the preprocessing will be reversed after prediction. if True nothing will be reversed. Defaults to False. Raises: DeephyperRuntimeError: [description] Returns: tuple: (y_true, y_pred) """ if not (dataset == "valid" or dataset == "train"): raise DeephyperRuntimeError( "dataset parameter should be equal to: 'valid' or 'train'" ) if dataset == "valid": valid_steps = self.valid_size // self.batch_size if valid_steps * self.batch_size < self.valid_size: valid_steps += 1 y_pred = self.model.predict(self.dataset_valid, steps=valid_steps) else: # dataset == 'train' y_pred = self.model.predict( self.dataset_train, steps=self.train_steps_per_epoch ) if ( self.preprocessing_func and not keep_normalize and not self.data_config_type == "gen" ): if dataset == "valid": data_X, data_Y = self.valid_X, self.valid_Y else: # dataset == 'train' data_X, data_Y = self.train_X, self.train_Y val_pred = np.concatenate((*data_X, y_pred), axis=1) val_orig = np.concatenate((*data_X, data_Y), axis=1) val_pred_trans = self.preprocessor.inverse_transform(val_pred) val_orig_trans = self.preprocessor.inverse_transform(val_orig) y_orig = val_orig_trans[:, -np.shape(data_Y)[1] :] y_pred = val_pred_trans[:, -np.shape(data_Y)[1] :] else: if self.data_config_type == "ndarray": y_orig = self.valid_Y if dataset == "valid" else self.train_Y else: gen = self.valid_gen() if dataset == "valid" else self.train_gen() y_orig = np.array([e[-1] for e in gen]) return y_orig, y_pred
def preprocess_data(self): logger.debug("Starting preprocess of data") if self.data_config_type == "gen": logger.warn("Cannot preprocess data with generator!") return if not self.preprocessor is None: raise DeephyperRuntimeError( "You can only preprocess data one time.") if self.preprocessing_func: logger.debug( f"preprocess_data with: {str(self.preprocessing_func)}") if all([ len(np.shape(tX)) == len(np.shape(self.train_Y)) for tX in self.train_X ]): data_train = np.concatenate((*self.train_X, self.train_Y), axis=-1) data_valid = np.concatenate((*self.valid_X, self.valid_Y), axis=-1) self.preprocessor = self.preprocessing_func() tX_shp = [np.shape(x) for x in self.train_X] preproc_data_train = self.preprocessor.fit_transform( data_train) preproc_data_valid = self.preprocessor.transform(data_valid) acc, self.train_X = 0, list() for shp in tX_shp: self.train_X.append(preproc_data_train[..., acc:acc + shp[1]]) acc += shp[1] self.train_Y = preproc_data_train[..., acc:] acc, self.valid_X = 0, list() for shp in tX_shp: self.valid_X.append(preproc_data_valid[..., acc:acc + shp[1]]) acc += shp[1] self.valid_Y = preproc_data_valid[..., acc:] else: logger.warn( f"Skipped preprocess because shape {np.shape(self.train_Y)} is not handled!" ) else: logger.info( "Skipped preprocess of data because no function is defined!")
def model_compile(self): optimizer_fn = U.selectOptimizer_keras(self.optimizer_name) decay_rate = self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1 opti_parameters = signature(optimizer_fn).parameters params = {} # "lr" and "learning_rate" is checked depending if Keras or Tensorflow optimizer is used if "lr" in opti_parameters: params["lr"] = self.learning_rate elif "learning_rate" in opti_parameters: params["learning_rate"] = self.learning_rate else: raise DeephyperRuntimeError( f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}" ) if "epsilon" in opti_parameters: params["epsilon"] = self.optimizer_eps if self.clipvalue is not None: params["clipvalue"] = self.clipvalue # if "decay" in opti_parameters: # decay_rate = ( # self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1 # ) # params["decay"] = decay_rate self.optimizer = hvd.DistributedOptimizer(optimizer_fn(**params)) if type(self.loss_metrics) is dict: self.model.compile( optimizer=self.optimizer, loss=self.loss_metrics, loss_weights=self.loss_weights, metrics=self.metrics_name, ) else: self.model.compile( optimizer=self.optimizer, loss=self.loss_metrics, metrics=self.metrics_name, )
def import_callback(cb_name: str) -> Type[tf.keras.callbacks.Callback]: """Import a callback class from its name. Args: cb_name (str): class name of the callback to import fron ``tensorflow.keras.callbacks`` or ``deephyper.contrib.callbacks``. Raises: DeephyperRuntimeError: raised if the class name of the callback is not registered in corresponding packages. Returns: tensorflow.keras.callbacks.Callback: the class corresponding to the given class name. """ if cb_name in dir(tf.keras.callbacks): return getattr(tf.keras.callbacks, cb_name) elif cb_name in dir(deephyper.contrib.callbacks): return getattr(deephyper.contrib.callbacks, cb_name) else: raise DeephyperRuntimeError( f"Callback '{cb_name}' is not registered in tensorflow.keras and deephyper.contrib.callbacks." )
def output_best_configuration_from_csv( path: str, output: str, k: int, **kwargs ) -> None: """Output the configuration based on the maximal objective found in the CSV input file. :meta private: Args: path (str): Path of the CSV input file. output (str): Path of the output file ending in (.csv|.yaml|.json). k (int): Number of configuration to output. """ input_extension = path.split(".")[-1] if input_extension == "csv": df = pd.read_csv(path) output_best_configuration_from_df(df, output, k) else: raise DeephyperRuntimeError( f"The specified input file extension '{input_extension}' is not supported." )
def load_data(self): logger.debug("load_data") self.data_config_type = U.check_data_config(self.data) logger.debug(f"data config type: {self.data_config_type}") if self.data_config_type == "gen": self.load_data_generator() elif self.data_config_type == "ndarray": self.load_data_ndarray() else: raise DeephyperRuntimeError( f"Data config is not supported by this Trainer: '{self.data_config_type}'!" ) # prepare number of steps for training and validation self.train_steps_per_epoch = self.train_size // self.batch_size if self.train_steps_per_epoch * self.batch_size < self.train_size: self.train_steps_per_epoch += 1 self.valid_steps_per_epoch = self.valid_size // self.batch_size if self.valid_steps_per_epoch * self.batch_size < self.valid_size: self.valid_steps_per_epoch += 1
def model_compile(self): optimizer_fn = U.selectOptimizer_keras(self.optimizer_name) decay_rate = self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1 opti_parameters = signature(optimizer_fn).parameters params = {} if "lr" in opti_parameters: params["lr"] = self.learning_rate elif "learning_rate" in opti_parameters: params["learning_rate"] = self.learning_rate else: raise DeephyperRuntimeError( f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}" ) if "epsilon" in opti_parameters: params["epsilon"] = self.optimizer_eps if "decay" in opti_parameters: decay_rate = (self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1) params["decay"] = decay_rate self.optimizer = optimizer_fn(**params) if type(self.loss_metrics) is dict: self.model.compile( optimizer=self.optimizer, loss=self.loss_metrics, loss_weights=self.loss_weights, metrics=self.metrics_name, ) else: self.model.compile( optimizer=self.optimizer, loss=self.loss_metrics, metrics=self.metrics_name, )
def plot_for_multiple_json(path: list, xy: list): """ :meta private: """ if len(xy) == 0: xy = ["epochs", "val_loss"] elif len(xy) != 2: raise DeephyperRuntimeError( "--xy must take two arguments such as '--xy epochs val_loss'") xlabel, ylabel = xy plt.figure() if xlabel == "epochs": plot_multiple_training(path, ylabel) elif xlabel == "time": plot_multiple_objective_wrp_time(path, ylabel) plt.ylabel(ylabel) plt.grid() plt.tight_layout() plt.show()
def plot_for_single_json(path: str, xy: list): """[summary] :meta private: Args: path (str): [description] xy (list): [description] Raises: DeephyperRuntimeError: [description] """ if len(xy) == 0: xy = ["epochs", "val_loss"] elif len(xy) != 2: raise DeephyperRuntimeError( "--xy must take two arguments such as '--xy epochs val_loss'") xlabel, ylabel = xy with open(path, "r") as f: history = json.load(f) x = list(range(len( history[ylabel]))) if xlabel == "epochs" else history[xlabel] y = history[ylabel] plt.figure() plt.plot(x, y) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.grid() plt.tight_layout() plt.show()
def train(self, num_epochs: int = None, with_pred: bool = False, last_only: bool = False): """Train the model. Args: num_epochs (int, optional): override the num_epochs passed to init the Trainer. Defaults to None, will use the num_epochs passed to init the Trainer. with_pred (bool, optional): will compute a prediction after the training and will add ('y_true', 'y_pred') to the output history. Defaults to False, will skip it (use it to save compute time). last_only (bool, optional): will compute metrics after the last epoch only. Defaults to False, will compute metrics after each training epoch (use it to save compute time). Raises: DeephyperRuntimeError: raised when the ``num_epochs < 0``. Returns: dict: a dictionnary corresponding to the training. """ num_epochs = self.num_epochs if num_epochs is None else num_epochs self.init_history() if num_epochs > 0: time_start_training = time.time() # TIMING if not last_only: logger.info( "Trainer is computing metrics on validation after each training epoch." ) history = self.model.fit( self.dataset_train, verbose=self.verbose, epochs=num_epochs, steps_per_epoch=self.train_steps_per_epoch, callbacks=self.callbacks, validation_data=self.dataset_valid, validation_steps=self.valid_steps_per_epoch, ) else: logger.info( "Trainer is computing metrics on validation after the last training epoch." ) if num_epochs > 1: self.model.fit( self.dataset_train, verbose=self.verbose, epochs=num_epochs - 1, steps_per_epoch=self.train_steps_per_epoch, callbacks=self.callbacks, ) history = self.model.fit( self.dataset_train, epochs=1, verbose=self.verbose, steps_per_epoch=self.train_steps_per_epoch, callbacks=self.callbacks, validation_data=self.dataset_valid, validation_steps=self.valid_steps_per_epoch, ) time_end_training = time.time() # TIMING self.train_history["training_time"] = (time_end_training - time_start_training) self.train_history.update(history.history) elif num_epochs < 0: raise DeephyperRuntimeError( f"Trainer: number of epochs should be >= 0: {num_epochs}") if with_pred: time_start_predict = time.time() y_true, y_pred = self.predict(dataset="valid") time_end_predict = time.time() self.train_history["val_predict_time"] = (time_end_predict - time_start_predict) self.train_history["y_true"] = y_true self.train_history["y_pred"] = y_pred return self.train_history
def load_data_ndarray(self): def f(x): return type(x) is np.ndarray # check data type # Output data if (type(self.config[a.data][a.train_Y]) is np.ndarray and type(self.config[a.data][a.valid_Y]) is np.ndarray): self.train_Y = self.config[a.data][a.train_Y] self.valid_Y = self.config[a.data][a.valid_Y] elif (type(self.config[a.data][a.train_Y]) is list and type(self.config[a.data][a.valid_Y]) is list): if not all(map(f, self.config[a.data][a.train_Y])) or not all( map(f, self.config[a.data][a.valid_Y])): raise DeephyperRuntimeError( f"all outputs data should be of type np.ndarray !") if (len(self.config[a.data][a.train_Y]) > 1 and len(self.config[a.data][a.valid_Y]) > 1): self.train_Y = self.config[a.data][a.train_Y] self.valid_Y = self.config[a.data][a.valid_Y] else: self.train_Y = self.config[a.data][a.train_Y][0] self.valid_Y = self.config[a.data][a.valid_Y][0] else: raise DeephyperRuntimeError( f"Data are of an unsupported type and should be of same type: type(self.config['data']['train_Y'])=={type(self.config[a.data][a.train_Y])} and type(self.config['data']['valid_Y'])=={type(self.config[a.valid_Y][a.valid_X])} !" ) # Input data if (type(self.config[a.data][a.train_X]) is np.ndarray and type(self.config[a.data][a.valid_X]) is np.ndarray): self.train_X = [self.config[a.data][a.train_X]] self.valid_X = [self.config[a.data][a.valid_X]] elif (type(self.config[a.data][a.train_X]) is list and type(self.config[a.data][a.valid_X]) is list): if not all(map(f, self.config[a.data][a.train_X])) or not all( map(f, self.config[a.data][a.valid_X])): raise DeephyperRuntimeError( f"all inputs data should be of type np.ndarray !") if (len(self.config[a.data][a.train_X]) > 1 and len(self.config[a.data][a.valid_X]) > 1): self.train_X = self.config[a.data][a.train_X] self.valid_X = self.config[a.data][a.valid_X] else: self.train_X = self.config[a.data][a.train_X][0] self.valid_X = self.config[a.data][a.valid_X][0] else: raise DeephyperRuntimeError( f"Data are of an unsupported type and should be of same type: type(self.config['data']['train_X'])=={type(self.config[a.data][a.train_X])} and type(self.config['data']['valid_X'])=={type(self.config[a.data][a.valid_X])} !" ) # check data length self.train_size = np.shape(self.train_X[0])[0] if not all( map(lambda x: np.shape(x)[0] == self.train_size, self.train_X)): raise DeephyperRuntimeError( f"All training inputs data should have same length!") self.valid_size = np.shape(self.valid_X[0])[0] if not all( map(lambda x: np.shape(x)[0] == self.valid_size, self.valid_X)): raise DeephyperRuntimeError( f"All validation inputs data should have same length!")
def create( run_function, cache_key=None, method="subprocess", ray_address=None, ray_password=None, num_workers=None, **kwargs, ): available_methods = [ "balsam", "subprocess", "processPool", "threadPool", "__mpiPool", "ray", "rayhorovod" ] if not method in available_methods: raise DeephyperRuntimeError( f'The method "{method}" is not a valid method for an Evaluator!' ) if method == "balsam": from deephyper.evaluator._balsam import BalsamEvaluator Eval = BalsamEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "subprocess": from deephyper.evaluator._subprocess import SubprocessEvaluator Eval = SubprocessEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "processPool": from deephyper.evaluator._processPool import ProcessPoolEvaluator Eval = ProcessPoolEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "threadPool": from deephyper.evaluator._threadPool import ThreadPoolEvaluator Eval = ThreadPoolEvaluator(run_function, cache_key=cache_key, **kwargs) elif method == "__mpiPool": from deephyper.evaluator._mpiWorkerPool import MPIWorkerPool Eval = MPIWorkerPool(run_function, cache_key=cache_key, **kwargs) elif method == "ray": from deephyper.evaluator._ray_evaluator import RayEvaluator Eval = RayEvaluator( run_function, cache_key=cache_key, ray_address=ray_address, ray_password=ray_password, num_workers=num_workers, **kwargs, ) elif method == "rayhorovod": from deephyper.evaluator._ray_horovod_evaluator import RayHorovodEvaluator Eval = RayHorovodEvaluator( run_function, cache_key=cache_key, ray_address=ray_address, ray_password=ray_password, num_workers=num_workers, **kwargs, ) # Override the number of workers if passed as an argument if not (num_workers is None) and type(num_workers) is int: Eval.num_workers = num_workers return Eval
def set_op(self): if self.node._index is None: raise DeephyperRuntimeError( f"{str(self)} cannot be initialized because its source {str(self.node)} is not initialized!" ) self._ops[self.node._index].init(self)
def train(config): seed = config["seed"] if not "post_train" in config: raise DeephyperRuntimeError("The post training was not define in the Problem!") repeat = config["post_train"]["repeat"] if seed is not None: np.random.seed(seed) # must be between (0, 2**32-1) seeds = [np.random.randint(0, 2 ** 32 - 1) for _ in range(repeat)] for rep in range(repeat): tf.keras.backend.clear_session() default_callbacks_config = copy.deepcopy(CB_CONFIG) if seed is not None: np.random.seed(seeds[rep]) tf.random.set_seed(seeds[rep]) logger.info(f"Training replica {rep+1}") # override hyperparameters with post_train hyperparameters keys = filter( lambda k: k in config["hyperparameters"], config["post_train"].keys() ) for k in keys: config["hyperparameters"][k] = config["post_train"][k] load_config(config) input_shape, output_shape = setup_data(config) search_space = setup_search_space(config, input_shape, output_shape, seed=seed) search_space.draw_graphviz(f'model_{config["id"]}.dot') logger.info("Model operations set.") model_created = False try: model = search_space.create_model() model_created = True except: model_created = False logger.info("Error: Model creation failed...") logger.info(traceback.format_exc()) if model_created: # model.load_weights(default_cfg['model_checkpoint']['filepath']) # Setup callbacks callbacks = [] callbacks_config = config["post_train"].get("callbacks") if callbacks_config is not None: for cb_name, cb_conf in callbacks_config.items(): if cb_name in default_callbacks_config: default_callbacks_config[cb_name].update(cb_conf) if cb_name == "ModelCheckpoint": default_callbacks_config[cb_name][ "filepath" ] = f'best_model_id{config["id"]}_r{rep}.h5' Callback = getattr(keras.callbacks, cb_name) callbacks.append(Callback(**default_callbacks_config[cb_name])) logger.info( f"Adding new callback {type(Callback).__name__} with config: {default_callbacks_config[cb_name]}!" ) else: logger.error(f"'{cb_name}' is not an accepted callback!") trainer = TrainerTrainValid(config=config, model=model) trainer.callbacks.extend(callbacks) json_fname = f'post_training_hist_{config["id"]}.json' # to log the number of trainable parameters before running training trainer.init_history() try: with open(json_fname, "r") as f: fhist = json.load(f) except FileNotFoundError: fhist = trainer.train_history for k, v in fhist.items(): fhist[k] = [v] with open(json_fname, "w") as f: json.dump(fhist, f, cls=Encoder) hist = trainer.train(with_pred=False, last_only=False) # Timing of prediction for validation dataset t = time() # ! TIMING - START trainer.predict(dataset="valid") hist["val_predict_time"] = time() - t # ! TIMING - END for k, v in hist.items(): fhist[k] = fhist.get(k, []) fhist[k].append(v) with open(json_fname, "w") as f: json.dump(fhist, f, cls=Encoder) return model