示例#1
0
def main(path: list, xy: list, *args, **kwargs):
    """
    :meta private:
    """

    extension = lambda path: path.split(".")[-1]

    if len(path) == 1:
        if extension(path[0]) == "csv":
            plot_for_single_csv(path[0], xy)
        elif extension(path[0]) == "json":
            plot_for_single_json(path[0], xy)
        else:
            raise DeephyperRuntimeError(
                f"Extension of input file '{extension(path[0])}' is not yet supported."
            )
    else:

        # Comparing multiple results.csv files (different search experiments)
        if all([extension(p) == "csv" for p in path]):
            raise DeephyperRuntimeError(
                "Comparison of multiple experiments is not yet supported.")
        # Comparing multiple history.json files (different neural networks)
        elif all([extension(p) == "json" for p in path]):
            plot_for_multiple_json(path, xy)
        else:
            raise DeephyperRuntimeError(
                "Multiple input files should all have the same extension '.csv' or '.json'"
            )
示例#2
0
def main(path: list, xy: list, *args, **kwargs):

    if len(path) == 1:
        input_extension = path[0].split(".")[-1]
        if input_extension == "csv":
            plot_for_csv(path[0], xy)
        else:
            raise DeephyperRuntimeError(f"Extension of input file '{input_extension}' is not yet supported.")
    else:
        raise DeephyperRuntimeError("Multiple input files not yet supported for quickplot.")
示例#3
0
def notebook_for_hps(path: list, output: str) -> None:
    output_file = "dh-analytics-hps.ipynb" if len(output) == 0 else output

    if len(path) == 1:
        hps.hps_analytics(path, output_file)
    else:
        raise DeephyperRuntimeError("Comparative analytics for HPS is not available yet!") # TODO
示例#4
0
    def preprocess_data(self):
        if self.data_config_type == "gen":
            return

        if not self.preprocessor is None:
            raise DeephyperRuntimeError("You can only preprocess data one time.")

        if self.preprocessing_func:
            logger.debug(f"preprocess_data with: {str(self.preprocessing_func)}")

            if len(np.shape(self.train_Y)) == 2:
                data_train = np.concatenate((*self.train_X, self.train_Y), axis=1)
                data_valid = np.concatenate((*self.valid_X, self.valid_Y), axis=1)
                data = np.concatenate((data_train, data_valid), axis=0)
                self.preprocessor = self.preprocessing_func()

                dt_shp = np.shape(data_train)
                tX_shp = [np.shape(x) for x in self.train_X]

                preproc_data = self.preprocessor.fit_transform(data)

                acc, self.train_X = 0, list()
                for shp in tX_shp:
                    self.train_X.append(preproc_data[: dt_shp[0], acc : acc + shp[1]])
                    acc += shp[1]
                self.train_Y = preproc_data[: dt_shp[0], acc:]

                acc, self.valid_X = 0, list()
                for shp in tX_shp:
                    self.valid_X.append(preproc_data[dt_shp[0] :, acc : acc + shp[1]])
                    acc += shp[1]
                self.valid_Y = preproc_data[dt_shp[0] :, acc:]
        else:
            logger.info("no preprocessing function")
示例#5
0
def model_predict(model_path, X, batch_size=32, verbose=0):
    """Perform an inference of the model located at ``model_path``.

    :meta private:

    Args:
        model_path (str): Path to the ``h5`` file to load to perform the inferencec.
        X (array): array of input data for which we perform the inference.
        batch_size (int, optional): Batch size used to perform the inferencec. Defaults to 32.
        verbose (int, optional): Verbose option. Defaults to 0.

    Returns:
        array: The prediction based on the provided input data.
    """
    import tensorflow as tf
    import tensorflow_probability as tfp

    # GPU Configuration if available
    set_memory_growth_for_visible_gpus(True)
    tf.keras.backend.clear_session()
    model_file = model_path.split("/")[-1]

    try:
        if verbose:
            print(f"Loading model {model_file}", end="\n", flush=True)
        model = tf.keras.models.load_model(model_path, compile=False)
    except:
        if verbose:
            print(f"Could not load model {model_file}", flush=True)
            traceback.print_exc()
        model = None

    dataset = tf.data.Dataset.from_tensor_slices(X)
    dataset = dataset.batch(batch_size)

    def batch_predict(dataset, convert_func=lambda x: x):
        y_list = []
        for batch in dataset:
            y = model(batch, training=False)
            y_list.append(convert_func(y))
        y = np.concatenate(y_list, axis=0)
        return y

    if model:
        y_dist = model(X[:1],
                       training=False)  # just to test the type of the output
        if isinstance(y_dist, tfp.distributions.Distribution):
            if hasattr(y_dist, "loc") and hasattr(y_dist, "scale"):
                convert_func = lambda y_dist: np.concatenate(
                    [y_dist.loc, y_dist.scale], axis=1)
                y = batch_predict(dataset, convert_func)
            else:
                raise DeephyperRuntimeError(
                    f"Distribution doesn't have 'loc' or 'scale' attributes!")
        else:
            y = model.predict(X, batch_size=batch_size)
    else:
        y = None

    return y
示例#6
0
def plot_for_single_csv(path: str, xy: list):
    """Generate a plot from a single CSV file.

    :meta private:

    Args:
        path (str): Path to the CSV file.
        xy (list): If empty ``list`` then it will use ``"elapsed_sec"`` for x-axis and ``"objective"`` for the y-axis.

    Raises:
        DeephyperRuntimeError: if only 1 or more than 2 arguments are provided.
    """

    if len(xy) == 0:
        xy = ["elapsed_sec", "objective"]
    elif len(xy) != 2:
        raise DeephyperRuntimeError(
            "--xy must take two arguments such as '--xy elapsed_sec objective'"
        )

    df = pd.read_csv(path)

    plt.figure()

    plt.scatter(df[xy[0]], df[xy[1]], s=5, alpha=1.0)

    plt.xlabel(xy[0])
    plt.ylabel(xy[1])
    plt.grid()
    plt.tight_layout()
    plt.show()
示例#7
0
    def _setup_optimizer(self):
        optimizer_fn = U.selectOptimizer_keras(self.optimizer_name)

        opti_parameters = signature(optimizer_fn).parameters
        params = {}

        if "lr" in opti_parameters:
            params["lr"] = self.learning_rate
        elif "learning_rate" in opti_parameters:
            params["learning_rate"] = self.learning_rate
        else:
            raise DeephyperRuntimeError(
                f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}"
            )

        if "epsilon" in opti_parameters:
            params["epsilon"] = self.optimizer_eps

        if "momentum" in opti_parameters:
            params["momentum"] = self.momentum

        if "nesterov" in opti_parameters:
            params["nesterov"] = self.nesterov

        self.optimizer = optimizer_fn(**params)
示例#8
0
    def create(run_function, method="subprocess", method_kwargs={}):
        """Create evaluator with a specific backend and configuration.

        Args:
            run_function (function): the function to execute in parallel.
            method (str, optional): the backend to use in ["thread", "process", "subprocess", "ray"]. Defaults to "subprocess".
            method_kwargs (dict, optional): configuration dictionnary of the corresponding backend. Keys corresponds to the keyword arguments of the corresponding implementation. Defaults to "{}".

        Raises:
            DeephyperRuntimeError: if the ``method is`` not acceptable.

        Returns:
            Evaluator: the ``Evaluator`` with the corresponding backend and configuration.
        """

        if not method in EVALUATORS.keys():
            val = ", ".join(EVALUATORS)
            raise DeephyperRuntimeError(
                f'The method "{method}" is not a valid method for an Evaluator!'
                f" Choose among the following evalutor types: "
                f"{val}.")

        # create the evaluator
        mod_name, attr_name = EVALUATORS[method].split(".")
        mod = importlib.import_module(f"deephyper.evaluator.{mod_name}")
        eval_cls = getattr(mod, attr_name)
        evaluator = eval_cls(run_function, **method_kwargs)

        return evaluator
示例#9
0
 def op(self):
     if self.num_ops != self.node.num_ops:
         raise DeephyperRuntimeError(
             f"{str(self)} and {str(self.node)} should have the same number of opertions, when {str(self)} has {self.num_ops} and {str(self.node)} has {self.node.num_ops}!"
         )
     else:
         return self._ops[self.node._index]
示例#10
0
文件: _topk.py 项目: felker/deephyper
def output_best_configuration_from_df(df: str, output: str, k: int, **kwargs) -> None:
    """Output the configuration based on the maximal objective found in the CSV input file.

    :meta private:

    Args:
        df (DataFrame): a Pandas DataFrame.
        output (str): Path of the output file ending in (.csv|.yaml|.json).
        k (int): Number of configuration to output.
    """

    df = df.sort_values(by=["objective"], ascending=False, ignore_index=True)
    subdf = df.iloc[:k]

    if len(output) == 0:
        print(yaml.dump(json.loads(subdf.to_json(orient="index"))))
    else:
        output_extension = output.split(".")[-1]
        if output_extension == "yaml":
            with open(output, "w") as f:
                yaml.dump(json.loads(subdf.to_json(orient="index")), f)
        elif output_extension == "csv":
            subdf.to_csv(output)
        elif output_extension == "json":
            subdf.to_json(output, orient="index")
        else:
            raise DeephyperRuntimeError(
                f"The specified output extension is not supported: {output_extension}"
            )
示例#11
0
def notebook_for_posttrain(path: list, output: str) -> None:
    output_file = "dh-analytics-posttrain.ipynb" if len(output) == 0 else output

    if len(path) == 1:
        post_train.post_train_analytics(path, output_file)
    else:
        raise DeephyperRuntimeError("Comparative analytics for Post-Train is not available yet!") # TODO
示例#12
0
    def __init__(
        self,
        run_function,
        cache_key=None,
        encoder=Encoder,
        seed=None,
        num_workers=None,
        **kwargs,
    ):
        self.encoder = encoder  # dict --> uuid
        self.pending_evals = {}  # uid --> Future
        self.finished_evals = OrderedDict()  # uid --> scalar
        self.requested_evals = []  # keys
        self.key_uid_map = {}  # map keys to uids
        self.uid_key_map = {}  # map uids to keys
        self.seed = seed
        self.seed_high = 2**32  # exclusive

        self.stats = {"num_cache_used": 0}

        self.transaction_context = dummy_context
        self._start_sec = time.time()
        self.elapsed_times = {}

        self._run_function = run_function
        self.num_workers = num_workers

        if (cache_key is not None) and (cache_key != "to_dict"):
            if callable(cache_key):
                self._gen_uid = cache_key
            elif cache_key == "uuid":
                self._gen_uid = lambda d: uuid.uuid4()
            else:
                raise DeephyperRuntimeError(
                    'The "cache_key" parameter of an Evaluator must be a callable!'
                )
        else:
            self._gen_uid = lambda d: self.encode(d)

        moduleName = self._run_function.__module__
        logger.info(
            f'moduleName == {moduleName} run_function = {self._run_function}')

        if moduleName == "__main__":
            raise DeephyperRuntimeError(
                f'Evaluator will not execute function " {run_function.__name__}" because it is in the __main__ module.  Please provide a function imported from an external module!'
            )
示例#13
0
def output_best_configuration(path: str, output: str, k: int,
                              **kwargs) -> None:
    """Output the configuration based on the maximal objective found in the CSV input file.

    Args:
        path (str): Path of the CSV input file.
        output (str): Path of the output file ending in (.csv|.yaml|.json).
        k (int): Number of configuration to output.
    """

    input_extension = path.split(".")[-1]
    if input_extension == "csv":
        df = pd.read_csv(path)
        df = df.sort_values(by=["objective"],
                            ascending=False,
                            ignore_index=True)
        subdf = df.iloc[:k]
        if not ("arch_seq" in subdf.columns):
            subdf = pd.DataFrame({
                "arch_seq":
                [str(list(el)) for el in subdf.to_numpy()[:, :-2].astype(int)],
                "objective":
                subdf.objective.tolist(),
                "elapsed_sec":
                subdf.elapsed_sec.tolist()
            })

        if len(output) == 0:
            print(yaml.dump(json.loads(subdf.to_json(orient="index"))))
        else:
            output_extension = output.split(".")[-1]
            if output_extension == "yaml":
                with open(output, "w") as f:
                    yaml.dump(json.loads(subdf.to_json(orient="index")), f)
            elif output_extension == "csv":
                subdf.to_csv(output)
            elif output_extension == "json":
                subdf.to_json(output, orient="index")
            else:
                raise DeephyperRuntimeError(
                    f"The specified output extension is not supported: {output_extension}"
                )
    else:
        raise DeephyperRuntimeError(
            f"The specified input file extension '{input_extension}' is not supported."
        )
示例#14
0
 def check_op_list(self, space: KSearchSpace, ops: list) -> list:
     if len(ops) == 0:
         ops = [random.random() for _ in range(space.num_nodes)]
     else:
         if not (len(ops) == space.num_nodes):
             raise DeephyperRuntimeError(
                 f"The argument list 'ops' should be of length {space.num_nodes} but is {len(ops)}!"
             )
     return ops
示例#15
0
def main(type: str, path: list, *args, **kwargs) -> None:

    if type == "hps":
        notebook_for_hps(path, **kwargs)
    elif type == "nas":
        notebook_for_nas(path, **kwargs)
    elif type == "posttrain":
        notebook_for_posttrain(path, **kwargs)
    else:
        raise DeephyperRuntimeError(f"The notebook TYPE '{type}' passed is not supported.")
示例#16
0
    def create(run_function,
               cache_key=None,
               method="subprocess",
               redis_address=None,
               **kwargs):
        available_methods = [
            "balsam",
            "subprocess",
            "processPool",
            "threadPool",
            "__mpiPool",
            "ray",
        ]

        if not method in available_methods:
            raise DeephyperRuntimeError(
                f'The method "{method}" is not a valid method for an Evaluator!'
            )

        if method == "balsam":
            from deephyper.evaluator._balsam import BalsamEvaluator

            Eval = BalsamEvaluator(run_function, cache_key=cache_key, **kwargs)
        elif method == "subprocess":
            from deephyper.evaluator._subprocess import SubprocessEvaluator

            Eval = SubprocessEvaluator(run_function,
                                       cache_key=cache_key,
                                       **kwargs)
        elif method == "processPool":
            from deephyper.evaluator._processPool import ProcessPoolEvaluator

            Eval = ProcessPoolEvaluator(run_function,
                                        cache_key=cache_key,
                                        **kwargs)
        elif method == "threadPool":
            from deephyper.evaluator._threadPool import ThreadPoolEvaluator

            Eval = ThreadPoolEvaluator(run_function,
                                       cache_key=cache_key,
                                       **kwargs)
        elif method == "__mpiPool":
            from deephyper.evaluator._mpiWorkerPool import MPIWorkerPool

            Eval = MPIWorkerPool(run_function, cache_key=cache_key, **kwargs)
        elif method == "ray":
            from deephyper.evaluator.ray_evaluator import RayEvaluator

            Eval = RayEvaluator(run_function,
                                cache_key=cache_key,
                                redis_address=redis_address,
                                **kwargs)

        return Eval
示例#17
0
    def predict(self, dataset: str = "valid", keep_normalize: bool = False) -> tuple:
        """[summary]

        Args:
            dataset (str, optional): 'valid' or 'train'. Defaults to 'valid'.
            keep_normalize (bool, optional): if False then the preprocessing will be reversed after prediction. if True nothing will be reversed. Defaults to False.

        Raises:
            DeephyperRuntimeError: [description]

        Returns:
            tuple: (y_true, y_pred)
        """
        if not (dataset == "valid" or dataset == "train"):
            raise DeephyperRuntimeError(
                "dataset parameter should be equal to: 'valid' or 'train'"
            )

        if dataset == "valid":
            valid_steps = self.valid_size // self.batch_size
            if valid_steps * self.batch_size < self.valid_size:
                valid_steps += 1
            y_pred = self.model.predict(self.dataset_valid, steps=valid_steps)
        else:  # dataset == 'train'
            y_pred = self.model.predict(
                self.dataset_train, steps=self.train_steps_per_epoch
            )

        if (
            self.preprocessing_func
            and not keep_normalize
            and not self.data_config_type == "gen"
        ):
            if dataset == "valid":
                data_X, data_Y = self.valid_X, self.valid_Y
            else:  # dataset == 'train'
                data_X, data_Y = self.train_X, self.train_Y
            val_pred = np.concatenate((*data_X, y_pred), axis=1)
            val_orig = np.concatenate((*data_X, data_Y), axis=1)
            val_pred_trans = self.preprocessor.inverse_transform(val_pred)
            val_orig_trans = self.preprocessor.inverse_transform(val_orig)
            y_orig = val_orig_trans[:, -np.shape(data_Y)[1] :]
            y_pred = val_pred_trans[:, -np.shape(data_Y)[1] :]
        else:
            if self.data_config_type == "ndarray":
                y_orig = self.valid_Y if dataset == "valid" else self.train_Y
            else:
                gen = self.valid_gen() if dataset == "valid" else self.train_gen()
                y_orig = np.array([e[-1] for e in gen])

        return y_orig, y_pred
示例#18
0
    def preprocess_data(self):
        logger.debug("Starting preprocess of data")

        if self.data_config_type == "gen":
            logger.warn("Cannot preprocess data with generator!")
            return

        if not self.preprocessor is None:
            raise DeephyperRuntimeError(
                "You can only preprocess data one time.")

        if self.preprocessing_func:
            logger.debug(
                f"preprocess_data with: {str(self.preprocessing_func)}")
            if all([
                    len(np.shape(tX)) == len(np.shape(self.train_Y))
                    for tX in self.train_X
            ]):
                data_train = np.concatenate((*self.train_X, self.train_Y),
                                            axis=-1)
                data_valid = np.concatenate((*self.valid_X, self.valid_Y),
                                            axis=-1)
                self.preprocessor = self.preprocessing_func()

                tX_shp = [np.shape(x) for x in self.train_X]

                preproc_data_train = self.preprocessor.fit_transform(
                    data_train)
                preproc_data_valid = self.preprocessor.transform(data_valid)

                acc, self.train_X = 0, list()
                for shp in tX_shp:
                    self.train_X.append(preproc_data_train[...,
                                                           acc:acc + shp[1]])
                    acc += shp[1]
                self.train_Y = preproc_data_train[..., acc:]

                acc, self.valid_X = 0, list()
                for shp in tX_shp:
                    self.valid_X.append(preproc_data_valid[...,
                                                           acc:acc + shp[1]])
                    acc += shp[1]
                self.valid_Y = preproc_data_valid[..., acc:]
            else:
                logger.warn(
                    f"Skipped preprocess because shape {np.shape(self.train_Y)} is not handled!"
                )
        else:
            logger.info(
                "Skipped preprocess of data because no function is defined!")
示例#19
0
    def model_compile(self):
        optimizer_fn = U.selectOptimizer_keras(self.optimizer_name)

        decay_rate = self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1

        opti_parameters = signature(optimizer_fn).parameters
        params = {}

        # "lr" and "learning_rate" is checked depending if Keras or Tensorflow optimizer is used
        if "lr" in opti_parameters:
            params["lr"] = self.learning_rate
        elif "learning_rate" in opti_parameters:
            params["learning_rate"] = self.learning_rate
        else:
            raise DeephyperRuntimeError(
                f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}"
            )

        if "epsilon" in opti_parameters:
            params["epsilon"] = self.optimizer_eps

        if self.clipvalue is not None:
            params["clipvalue"] = self.clipvalue

        # if "decay" in opti_parameters:
        #     decay_rate = (
        #         self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1
        #     )
        #     params["decay"] = decay_rate

        self.optimizer = hvd.DistributedOptimizer(optimizer_fn(**params))

        if type(self.loss_metrics) is dict:
            self.model.compile(
                optimizer=self.optimizer,
                loss=self.loss_metrics,
                loss_weights=self.loss_weights,
                metrics=self.metrics_name,
            )
        else:
            self.model.compile(
                optimizer=self.optimizer,
                loss=self.loss_metrics,
                metrics=self.metrics_name,
            )
示例#20
0
def import_callback(cb_name: str) -> Type[tf.keras.callbacks.Callback]:
    """Import a callback class from its name.

    Args:
        cb_name (str): class name of the callback to import fron ``tensorflow.keras.callbacks`` or ``deephyper.contrib.callbacks``.

    Raises:
        DeephyperRuntimeError: raised if the class name of the callback is not registered in corresponding packages.

    Returns:
        tensorflow.keras.callbacks.Callback: the class corresponding to the given class name.
    """
    if cb_name in dir(tf.keras.callbacks):
        return getattr(tf.keras.callbacks, cb_name)
    elif cb_name in dir(deephyper.contrib.callbacks):
        return getattr(deephyper.contrib.callbacks, cb_name)
    else:
        raise DeephyperRuntimeError(
            f"Callback '{cb_name}' is not registered in tensorflow.keras and deephyper.contrib.callbacks."
        )
示例#21
0
文件: _topk.py 项目: felker/deephyper
def output_best_configuration_from_csv(
    path: str, output: str, k: int, **kwargs
) -> None:
    """Output the configuration based on the maximal objective found in the CSV input file.

    :meta private:

    Args:
        path (str): Path of the CSV input file.
        output (str): Path of the output file ending in (.csv|.yaml|.json).
        k (int): Number of configuration to output.
    """

    input_extension = path.split(".")[-1]
    if input_extension == "csv":
        df = pd.read_csv(path)
        output_best_configuration_from_df(df, output, k)
    else:
        raise DeephyperRuntimeError(
            f"The specified input file extension '{input_extension}' is not supported."
        )
示例#22
0
    def load_data(self):
        logger.debug("load_data")

        self.data_config_type = U.check_data_config(self.data)
        logger.debug(f"data config type: {self.data_config_type}")
        if self.data_config_type == "gen":
            self.load_data_generator()
        elif self.data_config_type == "ndarray":
            self.load_data_ndarray()
        else:
            raise DeephyperRuntimeError(
                f"Data config is not supported by this Trainer: '{self.data_config_type}'!"
            )

        # prepare number of steps for training and validation
        self.train_steps_per_epoch = self.train_size // self.batch_size
        if self.train_steps_per_epoch * self.batch_size < self.train_size:
            self.train_steps_per_epoch += 1
        self.valid_steps_per_epoch = self.valid_size // self.batch_size
        if self.valid_steps_per_epoch * self.batch_size < self.valid_size:
            self.valid_steps_per_epoch += 1
示例#23
0
    def model_compile(self):
        optimizer_fn = U.selectOptimizer_keras(self.optimizer_name)

        decay_rate = self.learning_rate / self.num_epochs if self.num_epochs > 0 else 1

        opti_parameters = signature(optimizer_fn).parameters
        params = {}

        if "lr" in opti_parameters:
            params["lr"] = self.learning_rate
        elif "learning_rate" in opti_parameters:
            params["learning_rate"] = self.learning_rate
        else:
            raise DeephyperRuntimeError(
                f"The learning_rate parameter is not found amoung optimiser arguments: {opti_parameters}"
            )

        if "epsilon" in opti_parameters:
            params["epsilon"] = self.optimizer_eps

        if "decay" in opti_parameters:
            decay_rate = (self.learning_rate /
                          self.num_epochs if self.num_epochs > 0 else 1)
            params["decay"] = decay_rate
        self.optimizer = optimizer_fn(**params)

        if type(self.loss_metrics) is dict:
            self.model.compile(
                optimizer=self.optimizer,
                loss=self.loss_metrics,
                loss_weights=self.loss_weights,
                metrics=self.metrics_name,
            )
        else:
            self.model.compile(
                optimizer=self.optimizer,
                loss=self.loss_metrics,
                metrics=self.metrics_name,
            )
示例#24
0
def plot_for_multiple_json(path: list, xy: list):
    """
    :meta private:
    """
    if len(xy) == 0:
        xy = ["epochs", "val_loss"]
    elif len(xy) != 2:
        raise DeephyperRuntimeError(
            "--xy must take two arguments such as '--xy epochs val_loss'")

    xlabel, ylabel = xy

    plt.figure()

    if xlabel == "epochs":
        plot_multiple_training(path, ylabel)
    elif xlabel == "time":
        plot_multiple_objective_wrp_time(path, ylabel)

    plt.ylabel(ylabel)
    plt.grid()
    plt.tight_layout()
    plt.show()
示例#25
0
def plot_for_single_json(path: str, xy: list):
    """[summary]

    :meta private:

    Args:
        path (str): [description]
        xy (list): [description]

    Raises:
        DeephyperRuntimeError: [description]
    """

    if len(xy) == 0:
        xy = ["epochs", "val_loss"]
    elif len(xy) != 2:
        raise DeephyperRuntimeError(
            "--xy must take two arguments such as '--xy epochs val_loss'")

    xlabel, ylabel = xy

    with open(path, "r") as f:
        history = json.load(f)

    x = list(range(len(
        history[ylabel]))) if xlabel == "epochs" else history[xlabel]
    y = history[ylabel]

    plt.figure()

    plt.plot(x, y)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid()
    plt.tight_layout()
    plt.show()
示例#26
0
    def train(self,
              num_epochs: int = None,
              with_pred: bool = False,
              last_only: bool = False):
        """Train the model.

        Args:
            num_epochs (int, optional): override the num_epochs passed to init the Trainer. Defaults to None, will use the num_epochs passed to init the Trainer.
            with_pred (bool, optional): will compute a prediction after the training and will add ('y_true', 'y_pred') to the output history. Defaults to False, will skip it (use it to save compute time).
            last_only (bool, optional): will compute metrics after the last epoch only. Defaults to False, will compute metrics after each training epoch (use it to save compute time).

        Raises:
            DeephyperRuntimeError: raised when the ``num_epochs < 0``.

        Returns:
            dict: a dictionnary corresponding to the training.
        """
        num_epochs = self.num_epochs if num_epochs is None else num_epochs

        self.init_history()

        if num_epochs > 0:

            time_start_training = time.time()  # TIMING

            if not last_only:
                logger.info(
                    "Trainer is computing metrics on validation after each training epoch."
                )
                history = self.model.fit(
                    self.dataset_train,
                    verbose=self.verbose,
                    epochs=num_epochs,
                    steps_per_epoch=self.train_steps_per_epoch,
                    callbacks=self.callbacks,
                    validation_data=self.dataset_valid,
                    validation_steps=self.valid_steps_per_epoch,
                )
            else:
                logger.info(
                    "Trainer is computing metrics on validation after the last training epoch."
                )
                if num_epochs > 1:
                    self.model.fit(
                        self.dataset_train,
                        verbose=self.verbose,
                        epochs=num_epochs - 1,
                        steps_per_epoch=self.train_steps_per_epoch,
                        callbacks=self.callbacks,
                    )
                history = self.model.fit(
                    self.dataset_train,
                    epochs=1,
                    verbose=self.verbose,
                    steps_per_epoch=self.train_steps_per_epoch,
                    callbacks=self.callbacks,
                    validation_data=self.dataset_valid,
                    validation_steps=self.valid_steps_per_epoch,
                )

            time_end_training = time.time()  # TIMING
            self.train_history["training_time"] = (time_end_training -
                                                   time_start_training)

            self.train_history.update(history.history)

        elif num_epochs < 0:

            raise DeephyperRuntimeError(
                f"Trainer: number of epochs should be >= 0: {num_epochs}")

        if with_pred:
            time_start_predict = time.time()
            y_true, y_pred = self.predict(dataset="valid")
            time_end_predict = time.time()
            self.train_history["val_predict_time"] = (time_end_predict -
                                                      time_start_predict)

            self.train_history["y_true"] = y_true
            self.train_history["y_pred"] = y_pred

        return self.train_history
示例#27
0
    def load_data_ndarray(self):
        def f(x):
            return type(x) is np.ndarray

        # check data type

        # Output data
        if (type(self.config[a.data][a.train_Y]) is np.ndarray
                and type(self.config[a.data][a.valid_Y]) is np.ndarray):
            self.train_Y = self.config[a.data][a.train_Y]
            self.valid_Y = self.config[a.data][a.valid_Y]
        elif (type(self.config[a.data][a.train_Y]) is list
              and type(self.config[a.data][a.valid_Y]) is list):

            if not all(map(f, self.config[a.data][a.train_Y])) or not all(
                    map(f, self.config[a.data][a.valid_Y])):
                raise DeephyperRuntimeError(
                    f"all outputs data should be of type np.ndarray !")

            if (len(self.config[a.data][a.train_Y]) > 1
                    and len(self.config[a.data][a.valid_Y]) > 1):
                self.train_Y = self.config[a.data][a.train_Y]
                self.valid_Y = self.config[a.data][a.valid_Y]
            else:
                self.train_Y = self.config[a.data][a.train_Y][0]
                self.valid_Y = self.config[a.data][a.valid_Y][0]
        else:
            raise DeephyperRuntimeError(
                f"Data are of an unsupported type and should be of same type: type(self.config['data']['train_Y'])=={type(self.config[a.data][a.train_Y])} and type(self.config['data']['valid_Y'])=={type(self.config[a.valid_Y][a.valid_X])} !"
            )

        # Input data
        if (type(self.config[a.data][a.train_X]) is np.ndarray
                and type(self.config[a.data][a.valid_X]) is np.ndarray):
            self.train_X = [self.config[a.data][a.train_X]]
            self.valid_X = [self.config[a.data][a.valid_X]]
        elif (type(self.config[a.data][a.train_X]) is list
              and type(self.config[a.data][a.valid_X]) is list):

            if not all(map(f, self.config[a.data][a.train_X])) or not all(
                    map(f, self.config[a.data][a.valid_X])):
                raise DeephyperRuntimeError(
                    f"all inputs data should be of type np.ndarray !")
            if (len(self.config[a.data][a.train_X]) > 1
                    and len(self.config[a.data][a.valid_X]) > 1):
                self.train_X = self.config[a.data][a.train_X]
                self.valid_X = self.config[a.data][a.valid_X]
            else:
                self.train_X = self.config[a.data][a.train_X][0]
                self.valid_X = self.config[a.data][a.valid_X][0]
        else:
            raise DeephyperRuntimeError(
                f"Data are of an unsupported type and should be of same type: type(self.config['data']['train_X'])=={type(self.config[a.data][a.train_X])} and type(self.config['data']['valid_X'])=={type(self.config[a.data][a.valid_X])} !"
            )

        # check data length
        self.train_size = np.shape(self.train_X[0])[0]
        if not all(
                map(lambda x: np.shape(x)[0] == self.train_size,
                    self.train_X)):
            raise DeephyperRuntimeError(
                f"All training inputs data should have same length!")

        self.valid_size = np.shape(self.valid_X[0])[0]
        if not all(
                map(lambda x: np.shape(x)[0] == self.valid_size,
                    self.valid_X)):
            raise DeephyperRuntimeError(
                f"All validation inputs data should have same length!")
示例#28
0
    def create(
        run_function,
        cache_key=None,
        method="subprocess",
        ray_address=None,
        ray_password=None,
        num_workers=None,
        **kwargs,
    ):
        available_methods = [
            "balsam",
            "subprocess",
            "processPool",
            "threadPool",
            "__mpiPool",
            "ray",
            "rayhorovod"
        ]

        if not method in available_methods:
            raise DeephyperRuntimeError(
                f'The method "{method}" is not a valid method for an Evaluator!'
            )

        if method == "balsam":
            from deephyper.evaluator._balsam import BalsamEvaluator

            Eval = BalsamEvaluator(run_function, cache_key=cache_key, **kwargs)
        elif method == "subprocess":
            from deephyper.evaluator._subprocess import SubprocessEvaluator

            Eval = SubprocessEvaluator(run_function, cache_key=cache_key, **kwargs)
        elif method == "processPool":
            from deephyper.evaluator._processPool import ProcessPoolEvaluator

            Eval = ProcessPoolEvaluator(run_function, cache_key=cache_key, **kwargs)
        elif method == "threadPool":
            from deephyper.evaluator._threadPool import ThreadPoolEvaluator

            Eval = ThreadPoolEvaluator(run_function, cache_key=cache_key, **kwargs)
        elif method == "__mpiPool":
            from deephyper.evaluator._mpiWorkerPool import MPIWorkerPool

            Eval = MPIWorkerPool(run_function, cache_key=cache_key, **kwargs)
        elif method == "ray":
            from deephyper.evaluator._ray_evaluator import RayEvaluator

            Eval = RayEvaluator(
                run_function,
                cache_key=cache_key,
                ray_address=ray_address,
                ray_password=ray_password,
                num_workers=num_workers,
                **kwargs,
            )
        elif method == "rayhorovod":
            from deephyper.evaluator._ray_horovod_evaluator import RayHorovodEvaluator

            Eval = RayHorovodEvaluator(
                run_function,
                cache_key=cache_key,
                ray_address=ray_address,
                ray_password=ray_password,
                num_workers=num_workers,
                **kwargs,
            )

        # Override the number of workers if passed as an argument
        if not (num_workers is None) and type(num_workers) is int:
            Eval.num_workers = num_workers

        return Eval
示例#29
0
 def set_op(self):
     if self.node._index is None:
         raise DeephyperRuntimeError(
             f"{str(self)} cannot be initialized because its source {str(self.node)} is not initialized!"
         )
     self._ops[self.node._index].init(self)
示例#30
0
def train(config):
    seed = config["seed"]

    if not "post_train" in config:
        raise DeephyperRuntimeError("The post training was not define in the Problem!")

    repeat = config["post_train"]["repeat"]

    if seed is not None:
        np.random.seed(seed)
        # must be between (0, 2**32-1)
        seeds = [np.random.randint(0, 2 ** 32 - 1) for _ in range(repeat)]

    for rep in range(repeat):
        tf.keras.backend.clear_session()

        default_callbacks_config = copy.deepcopy(CB_CONFIG)
        if seed is not None:
            np.random.seed(seeds[rep])
            tf.random.set_seed(seeds[rep])

        logger.info(f"Training replica {rep+1}")

        # override hyperparameters with post_train hyperparameters
        keys = filter(
            lambda k: k in config["hyperparameters"], config["post_train"].keys()
        )
        for k in keys:
            config["hyperparameters"][k] = config["post_train"][k]

        load_config(config)

        input_shape, output_shape = setup_data(config)

        search_space = setup_search_space(config, input_shape, output_shape, seed=seed)
        search_space.draw_graphviz(f'model_{config["id"]}.dot')
        logger.info("Model operations set.")

        model_created = False
        try:
            model = search_space.create_model()
            model_created = True
        except:
            model_created = False
            logger.info("Error: Model creation failed...")
            logger.info(traceback.format_exc())

        if model_created:
            # model.load_weights(default_cfg['model_checkpoint']['filepath'])

            # Setup callbacks
            callbacks = []
            callbacks_config = config["post_train"].get("callbacks")
            if callbacks_config is not None:
                for cb_name, cb_conf in callbacks_config.items():
                    if cb_name in default_callbacks_config:
                        default_callbacks_config[cb_name].update(cb_conf)

                        if cb_name == "ModelCheckpoint":
                            default_callbacks_config[cb_name][
                                "filepath"
                            ] = f'best_model_id{config["id"]}_r{rep}.h5'

                        Callback = getattr(keras.callbacks, cb_name)
                        callbacks.append(Callback(**default_callbacks_config[cb_name]))

                        logger.info(
                            f"Adding new callback {type(Callback).__name__} with config: {default_callbacks_config[cb_name]}!"
                        )

                    else:
                        logger.error(f"'{cb_name}' is not an accepted callback!")

            trainer = TrainerTrainValid(config=config, model=model)
            trainer.callbacks.extend(callbacks)

            json_fname = f'post_training_hist_{config["id"]}.json'
            # to log the number of trainable parameters before running training
            trainer.init_history()
            try:
                with open(json_fname, "r") as f:
                    fhist = json.load(f)
            except FileNotFoundError:
                fhist = trainer.train_history
                for k, v in fhist.items():
                    fhist[k] = [v]
                with open(json_fname, "w") as f:
                    json.dump(fhist, f, cls=Encoder)

            hist = trainer.train(with_pred=False, last_only=False)

            # Timing of prediction for validation dataset
            t = time()  # ! TIMING - START
            trainer.predict(dataset="valid")
            hist["val_predict_time"] = time() - t  # ! TIMING - END

            for k, v in hist.items():
                fhist[k] = fhist.get(k, [])
                fhist[k].append(v)

            with open(json_fname, "w") as f:
                json.dump(fhist, f, cls=Encoder)

        return model