Exemplo n.º 1
0
    def _setup_tfgraph(*args):
        import tensorflow as tf
        tf.disable_eager_execution()
        tf.reset_default_graph()
        from delira.models import AbstractTfGraphNetwork
        from delira.training.backends.tf_graph.utils import \
            initialize_uninitialized

        class Model(AbstractTfGraphNetwork):
            def __init__(self):
                super().__init__()
                self.dense = tf.keras.layers.Dense(1, activation="relu")

                data = tf.placeholder(shape=[None, 1], dtype=tf.float32)

                labels = tf.placeholder_with_default(tf.zeros(
                    [tf.shape(data)[0], 1]),
                                                     shape=[None, 1])

                preds_train = self.dense(data)
                preds_eval = self.dense(data)

                self.inputs["data"] = data
                self.inputs["labels"] = labels
                self.outputs_train["pred"] = preds_train
                self.outputs_eval["pred"] = preds_eval

        model = Model()
        initialize_uninitialized(model._sess)
        return model
Exemplo n.º 2
0
    def test(self, network, test_data: BaseDataManager,
             metrics: dict, metric_keys=None,
             verbose=False, prepare_batch=lambda x: x,
             convert_fn=None, **kwargs):
        """
        Setup and run testing on a given network

        Parameters
        ----------
        network : :class:`AbstractNetwork`
            the (trained) network to test
        test_data : :class:`BaseDataManager`
            the data to use for testing
        metrics : dict
            the metrics to calculate
        metric_keys : dict of tuples
            the batch_dict keys to use for each metric to calculate.
            Should contain a value for each key in ``metrics``.
            If no values are given for a key, per default ``pred`` and
            ``label``
             will be used for metric calculation
        verbose : bool
            verbosity of the test process
        prepare_batch : function
            function to convert a batch-dict to a format accepted by the
            model. This conversion typically includes dtype-conversion,
            reshaping, wrapping to backend-specific tensors and
            pushing to correct devices. If not further specified uses the
            ``network``'s ``prepare_batch`` with CPU devices
        convert_fn : function
            function to convert a batch of tensors to numpy
            if not specified defaults to
            :func:`convert_torch_tensor_to_npy`
        **kwargs :
            additional keyword arguments

        Returns
        -------
        dict
            all predictions obtained by feeding the ``test_data`` through
            the ``network``
        dict
            all metrics calculated upon the ``test_data`` and the obtained
            predictions

        """

        initialize_uninitialized(network._sess)

        if prepare_batch is None:
            prepare_batch = partial(network.prepare_batch,
                                    input_device=None,
                                    output_device=None)

        return super().test(network=network, test_data=test_data,
                            metrics=metrics, metric_keys=metric_keys,
                            verbose=verbose, prepare_batch=prepare_batch,
                            convert_fn=convert_fn, **kwargs)
Exemplo n.º 3
0
    def _setup(self, network, optim_fn, optimizer_cls, optimizer_params,
               lr_scheduler_cls, lr_scheduler_params, key_mapping,
               convert_batch_to_npy_fn, gpu_ids, callbacks):
        """
        Defines the Trainers Setup

        Parameters
        ----------
        network : instance of :class: `AbstractTfNetwork`
            the network to train
        optim_fn : function
            creates a dictionary containing all necessary optimizers
        optimizer_cls : subclass of tf.train.Optimizer
            optimizer class implementing the optimization algorithm of choice
        optimizer_params : dict
        lr_scheduler_cls : Any
            learning rate schedule class: must implement step() method
        lr_scheduler_params : dict
            keyword arguments passed to lr scheduler during construction
        convert_batch_to_npy_fn : type, optional
            function converting a batch-tensor to numpy, per default this is
            the identity function
        gpu_ids : list
            list containing ids of GPUs to use; if empty: use cpu instead
        callbacks : list
            initial callbacks to register

        Raises
        ------
        RuntimeError
            if multiple GPU ids passed
        """

        # TODO: implement multi-GPU and single GPU training with help of
        #  keras multi-gpu model
        #  note: might be bugged in combination with sess.run
        #  https://github.com/tensorflow/tensorflow/issues/21788

        # if gpu_ids and tf.test.is_gpu_available():
        #     assert len(gpu_ids) <= len(get_available_gpus()), "more GPUs
        #     specified than available"
        #     self.use_gpu = True
        #     if len(gpu_ids) > 1:
        #         logger.warning(
        #             "multi-GPU training not yet tested!")
        #
        #         network.model = tf.keras.utils.multi_gpu_model(
        #                                 network.model,
        #                                 len(gpu_ids),
        #                                 cpu_merge=True,
        #                                 cpu_relocation=False)
        #     else:
        #         network.models = tf.keras.models.clone_model(network.model)
        # else:
        #     self.use_gpu = False
        #
        if len(gpu_ids) > 1:
            raise RuntimeError("Multiple GPUs not yet supported")

        self.optimizers = optim_fn(optimizer_cls, **optimizer_params)

        super()._setup(network, lr_scheduler_cls, lr_scheduler_params, gpu_ids,
                       key_mapping, convert_batch_to_npy_fn, lambda x: x,
                       callbacks)

        self.use_gpu = True

        self.module._add_losses(self.losses)
        self.module._add_optims(self.optimizers)
        # check for unitialized variables
        initialize_uninitialized(self.module._sess)

        # Load latest epoch file if available
        if os.path.isdir(self.save_path):
            latest_state_path, latest_epoch = self._search_for_prev_state(
                self.save_path)

            if latest_state_path is not None:

                # if pth file does not exist, load pt file instead
                if not os.path.isfile(latest_state_path):
                    latest_state_path = latest_state_path[:-1]

                logger.info("Attempting to load state from previous \
                            training from %s" % latest_state_path)

                self.update_state(latest_state_path)
                self.start_epoch = latest_epoch