示例#1
0
 def __init__(self, args, **kwargs):
     """ Initializes a util class for training neural models. """
     super(Trainer, self).__init__(**kwargs)
     self._tb_log_dir = args["tb_log_dir"]
     self._train_steps = args["train_steps"]
     self._summary_steps = args["summary_steps"]
     self._save_checkpoint_steps = args["save_checkpoint_steps"]
     self._checkpoints_max_to_keep = args["checkpoints_max_to_keep"]
     self._initial_global_step = args["initial_global_step"]
     self._pretrain_variable_pattern = args["pretrain_variable_pattern"]
     if args["pretrain_model"] and isinstance(args["pretrain_model"][0],
                                              dict):
         self._pretrain_v2 = True
         self._pretrain_model = args["pretrain_model"]
         if self._pretrain_variable_pattern:
             logging.info(
                 "Using pretrain model v2 and ignoring pretrain_variable_pattern: "
                 f"{self._pretrain_variable_pattern}")
     else:
         self._pretrain_v2 = False
         self._pretrain_model = flatten_string_list(args["pretrain_model"])
         if self._pretrain_model and self._pretrain_variable_pattern is None:
             self._pretrain_variable_pattern = [None] * len(
                 self._pretrain_model)
         assert (
             (self._pretrain_model is None
              and self._pretrain_variable_pattern is None)
             or len(self._pretrain_model) == len(
                 self._pretrain_variable_pattern)
             or len(self._pretrain_model) == 1
         ), ("`pretrain_variable_pattern` must match with `pretrain_model`."
             )
         if self._pretrain_model is not None and self._pretrain_variable_pattern is None:
             self._pretrain_variable_pattern = [None] * len(
                 self._pretrain_model)
     self._update_cycle = args["update_cycle"]
     self._clip_value = args["clip_value"]
     self._clip_norm = args["clip_norm"]
     self._hvd_backend = self.strategy if self.strategy in [
         "byteps", "horovod"
     ] else None
     with training_utils.get_strategy_scope(self.strategy):
         self._criterion = build_criterion(args)
         self._criterion.set_model(self.model)
         self._lr_schedule_args = args
         if compat.IS_PREV_TF_2_4_0:
             self._optimizer = build_optimizer(args)
         else:
             self._optimizer = build_optimizer(args,
                                               clipnorm=self._clip_norm,
                                               clipvalue=self._clip_value)
         assert self._optimizer is not None, "optimizer parameters must be provided for training."
     self._validator = build_validator(args)
     self._experimental_count_batch_num = args[
         "experimental_count_batch_num"]
     self._freeze_variables = args["freeze_variables"]
示例#2
0
 def build(self, strategy, task, model):
     """ Initializes. """
     self._strategy = strategy
     self._criterion: Criterion = build_criterion(
         self.args["eval_criterion.class"],
         **self.args["eval_criterion.params"])
     self._criterion.set_model(model)
     if self._criterion is None:
         logging.info(
             "WARNING: no criterion is provided in CriterionValidator "
             "for validation process.")
         self._validate_criterion = False
         return self
     self._custom_dataset = build_dataset(
         self.args["eval_dataset.class"],
         **self.args["eval_dataset.params"])
     if self._custom_dataset is None:
         logging.info("WARNING: no validation dataset is provided "
                      "in CriterionValidator for validation process.")
         self._validate_criterion = False
         return self
     from neurst.exps.evaluator import Evaluator
     with training_utils.get_strategy_scope(strategy):
         self._criterion_model = Evaluator.build_evaluation_model(
             task, model, self._criterion)
         self._eval_tfds = training_utils.build_datasets(
             compat.ModeKeys.EVAL, strategy, self._custom_dataset, task,
             True, self._eval_task_args)
     self._criterion_metric = self._criterion.as_metric()
     if isinstance(self._custom_dataset, MultipleDataset):
         self._criterion_recorder = {
             name: training_utils.TrainingStatusRecorder(
                 model=model, task=task, metric=self._criterion_metric)
             for name in self._custom_dataset.datasets
         }
         self._avg_criterion_recorder = training_utils.TrainingStatusRecorder(
             model=model, task=task, metric=self._criterion_metric)
         self._mixed_criterion_recorder = training_utils.TrainingStatusRecorder(
             model=model, task=task, metric=self._criterion_metric)
     else:
         self._criterion_recorder = training_utils.TrainingStatusRecorder(
             model=model, task=task, metric=self._criterion_metric)
     self._criterion_start_time = time.time()
     return self
示例#3
0
    def __init__(self, args):
        """ Initializes the cross entropy with label smoothing.

        Args:
            args: A dict of full parameters.
        """
        super(JointCriterion, self).__init__()
        assert isinstance(args["criterions"], list), (
            "`criterions` should be a list of multiple criterion settings for JointCriterion."
        )
        self._criterions = []
        self._alphas = []
        logging.info(
            "Creating JointCriterion for training, which is composed by:")
        for crit in args["criterions"]:
            self._alphas.append(crit.pop("alpha", 1.0))
            self._criterions.append(build_criterion(crit))
            logging.info("  - {} with alpha={}".format(
                self._criterions[-1].__class__.__name__, self._alphas[-1]))
示例#4
0
 def __init__(self, args, **kwargs):
     """ Initializes a util class for training neural models. """
     super(Trainer, self).__init__(**kwargs)
     self._tb_log_dir = args["tb_log_dir"]
     self._train_steps = args["train_steps"]
     self._summary_steps = args["summary_steps"]
     self._save_checkpoint_steps = args["save_checkpoint_steps"]
     self._checkpoints_max_to_keep = args["checkpoints_max_to_keep"]
     self._initial_global_step = args["initial_global_step"]
     self._pretrain_model = flatten_string_list(args["pretrain_model"])
     self._pretrain_variable_pattern = args["pretrain_variable_pattern"]
     if self._pretrain_model and self._pretrain_variable_pattern is None:
         self._pretrain_variable_pattern = [None] * len(
             self._pretrain_model)
     assert (
         (self._pretrain_model is None
          and self._pretrain_variable_pattern is None) or len(
              self._pretrain_model) == len(self._pretrain_variable_pattern)
         or len(self._pretrain_model) == 1
     ), ("`pretrain_variable_pattern` must match with `pretrain_model`.")
     if self._pretrain_model is not None and self._pretrain_variable_pattern is None:
         self._pretrain_variable_pattern = [None] * len(
             self._pretrain_model)
     self._update_cycle = args["update_cycle"]
     self._clip_value = args["clip_value"]
     self._clip_norm = args["clip_norm"]
     self._hvd_backend = self.strategy if self.strategy in [
         "byteps", "horovod"
     ] else None
     with training_utils.get_strategy_scope(self.strategy):
         self._criterion = build_criterion(args)
         self._criterion.set_model(self.model)
         self._lr_schedule = build_lr_schedule(args)
         optimizer = build_optimizer(args)
         assert optimizer is not None, "optimizer parameters must be provided for training."
         self._optimizer = _handle_fp16_and_distributed_optimizer(
             optimizer, self._lr_schedule, self._hvd_backend)
     self._validator = build_validator(args)
     self._experimental_count_batch_num = args[
         "experimental_count_batch_num"]
示例#5
0
 def __init__(self, args, **kwargs):
     """ Initializes a util class for training neural models. """
     super(Trainer, self).__init__(**kwargs)
     self._tb_log_dir = args["tb_log_dir"]
     self._train_steps = args["train_steps"]
     self._summary_steps = args["summary_steps"]
     self._save_checkpoint_steps = args["save_checkpoint_steps"]
     self._checkpoints_max_to_keep = args["checkpoints_max_to_keep"]
     self._initial_global_step = args["initial_global_step"]
     self._pretrain_variable_pattern = args["pretrain_variable_pattern"]
     if args["pretrain_model"] and isinstance(args["pretrain_model"][0],
                                              dict):
         self._pretrain_v2 = True
         self._pretrain_model = args["pretrain_model"]
         if self._pretrain_variable_pattern:
             logging.info(
                 "Using pretrain model v2 and ignoring pretrain_variable_pattern: "
                 f"{self._pretrain_variable_pattern}")
     else:
         self._pretrain_v2 = False
         self._pretrain_model = flatten_string_list(args["pretrain_model"])
         if args["mask_dir"]:
             self.mask_dir = args["mask_dir"][0]
             # print(self.mask_dir)
             # self.load_mask = np.load(self.mask_dir, allow_pickle=True)
             with open(self.mask_dir, 'rb') as f:
                 self.load_mask = pickle.load(f)
             # i = 0
             # for weight in self.load_mask:
             #     if  i <= 1000:
             #         tf.print(weight.name, output_stream='file://./mask.txt')
             #         if weight.shape.ndims > 0:
             #             tf.print(weight[:1], output_stream='file://./mask.txt', summarize=-1, name=weight.name)
             #         else:
             #             tf.print(weight, output_stream='file://./mask.txt', summarize=-1, name=weight.name)
             #     else:
             #         i += 1
         else:
             self.mask_dir = os.path.join(self.model_dir, "mask.pkl")
             self.load_mask = None
         if self._pretrain_model:
             if self._pretrain_variable_pattern is None:
                 self._pretrain_variable_pattern = [None] * len(
                     self._pretrain_model)
             elif isinstance(self._pretrain_variable_pattern, str):
                 self._pretrain_variable_pattern = [
                     self._pretrain_variable_pattern
                 ]
         assert (
             (self._pretrain_model is None
              and self._pretrain_variable_pattern is None)
             or len(self._pretrain_model) == len(
                 self._pretrain_variable_pattern)
             or len(self._pretrain_model) == 1
         ), ("`pretrain_variable_pattern` must match with `pretrain_model`."
             )
         if self._pretrain_model is not None and self._pretrain_variable_pattern is None:
             self._pretrain_variable_pattern = [None] * len(
                 self._pretrain_model)
     self._update_cycle = args["update_cycle"]
     self._clip_value = args["clip_value"]
     self._clip_norm = args["clip_norm"]
     self._hvd_backend = self.strategy if self.strategy in [
         "byteps", "horovod"
     ] else None
     with training_utils.get_strategy_scope(self.strategy):
         self._criterion = build_criterion(args)
         self._criterion.set_model(self.model)
         self._lr_schedule_args = args
         if compat.IS_PREV_TF_2_4_0:
             self._optimizer = build_optimizer(args)
         else:
             self._optimizer = build_optimizer(args,
                                               clipnorm=self._clip_norm,
                                               clipvalue=self._clip_value)
         assert self._optimizer is not None, "optimizer parameters must be provided for training."
     self._validator = build_validator(args)
     self._experimental_count_batch_num = args[
         "experimental_count_batch_num"]
     self._freeze_variables = args["freeze_variables"]
     self._pruning_schedule = build_pruning_schedule(args)
     self._partial_tuning = args["partial_tuning"]
     self._pruning_variable_pattern = args["pruning_variable_pattern"]
     self._nopruning_variable_pattern = args["nopruning_variable_pattern"]
示例#6
0
 def __init__(self, args, **kwargs):
     """ Initializes a util class for evaluating neural models. """
     super(Evaluator, self).__init__(**kwargs)
     self._criterion = build_criterion(args)