Exemplo n.º 1
0
def normalize_configuration(cfg: Namespace, train_mode: bool) -> None:
    """Given a configuration namespace, normalize the values it contains.

    Arguments:
        cfg: The namespace object returned by `Configuration.make_namespace`
        train_mode: Boolean flag controlling normalization of parameters only
            used during training.
    """
    if train_mode:
        _normalize_train_cfg(cfg)

    if cfg.tf_manager is None:
        cfg.tf_manager = get_default_tf_manager()

    cfg.evaluation = [(e[0], e[0], e[1]) if len(e) == 2 else e
                      for e in cfg.evaluation]

    if cfg.evaluation:
        cfg.main_metric = "{}/{}".format(cfg.evaluation[-1][0],
                                         cfg.evaluation[-1][-1].name)
    else:
        cfg.main_metric = "{}/{}".format(cfg.runners[-1].decoder_data_id,
                                         cfg.runners[-1].loss_names[0])

        if not cfg.tf_manager.minimize_metric:
            raise ValueError("minimize_metric must be set to True in "
                             "TensorFlowManager when using loss as "
                             "the main metric")
Exemplo n.º 2
0
def normalize_configuration(cfg: Namespace, train_mode: bool) -> None:
    """Given a configuration namespace, normalize the values it contains.

    Arguments:
        cfg: The namespace object returned by `Configuration.make_namespace`
        train_mode: Boolean flag controlling normalization of parameters only
            used during training.
    """
    if train_mode:
        _normalize_train_cfg(cfg)

    if cfg.tf_manager is None:
        cfg.tf_manager = get_default_tf_manager()

    if (cfg.batch_size is None) == (cfg.batching_scheme is None):
        raise ValueError("You must specify either batch_size or "
                         "batching_scheme (not both).")

    if cfg.batch_size is not None:
        assert cfg.batching_scheme is None
        cfg.batching_scheme = BatchingScheme(batch_size=cfg.batch_size)
    else:
        assert cfg.batching_scheme is not None
        cfg.batch_size = cfg.batching_scheme.batch_size

    if cfg.runners_batch_size is None:
        cfg.runners_batch_size = cfg.batching_scheme.batch_size

    cfg.runners_batching_scheme = BatchingScheme(
        batch_size=cfg.runners_batch_size,
        token_level_batching=cfg.batching_scheme.token_level_batching,
        use_leftover_buckets=True)

    cfg.evaluation = [(e[0], e[0], e[1]) if len(e) == 2 else e
                      for e in cfg.evaluation]

    if cfg.evaluation:
        cfg.main_metric = "{}/{}".format(cfg.evaluation[-1][0],
                                         cfg.evaluation[-1][-1].name)
    else:
        cfg.main_metric = "{}/{}".format(cfg.runners[-1].decoder_data_id,
                                         cfg.runners[-1].loss_names[0])

        if not cfg.tf_manager.minimize_metric:
            raise ValueError("minimize_metric must be set to True in "
                             "TensorFlowManager when using loss as "
                             "the main metric")
Exemplo n.º 3
0
    def build_model(self) -> None:
        if self._model_built:
            raise RuntimeError("build_model() called twice")

        random.seed(self.config.args.random_seed)
        np.random.seed(self.config.args.random_seed)

        with self.graph.as_default():
            tf.set_random_seed(self.config.args.random_seed)

            # Enable the created model parts to find this experiment.
            type(self)._current_experiment = self  # type: ignore
            self.config.build_model(warn_unused=self.train_mode)
            type(self)._current_experiment = None

            self._model = self.config.model
            self._model_built = True

            if self.model.runners_batch_size is None:
                self.model.runners_batch_size = self.model.batch_size

            if self.model.tf_manager is None:
                self.model.tf_manager = get_default_tf_manager()

            if self.train_mode:
                check_dataset_and_coders(self.model.train_dataset,
                                         self.model.runners)
                if isinstance(self.model.val_dataset, Dataset):
                    check_dataset_and_coders(self.model.val_dataset,
                                             self.model.runners)
                else:
                    for val_dataset in self.model.val_dataset:
                        check_dataset_and_coders(val_dataset,
                                                 self.model.runners)

            if self.train_mode and self.model.visualize_embeddings:
                visualize_embeddings(self.model.visualize_embeddings,
                                     self.model.output)

        self._check_unused_initializers()