示例#1
0
 def custom_on_train_batch_end(self, step, logs=None):
     if not hasattr(self.model.optimizer, 'lr'):
         raise ValueError('Optimizer must have a "lr" attribute.')
     lr = self._lr_schedule(step)
     if not (compat.is_tf_tensor(lr)
             or isinstance(lr, (float, numpy.float32, numpy.float64))):
         raise ValueError('The output of the "schedule" function '
                          'should be float.')
     if compat.is_tf_tensor(lr) and not lr.dtype.is_floating:
         raise ValueError('The dtype of Tensor should be float')
     K.set_value(self.model.optimizer.lr, K.get_value(lr))
示例#2
0
    def __call__(self, model_inp, model_out):
        """ Calculates.

        Args:
            model_inp: A dict containing the model inputs.
            model_out: The logits tensor or a dict containing the logits tensor.
                The logits tensor with shape [batch, max_len, vocab_size].

        Returns:
            The (nll_sum, num_of_samples(batch), num_of_tokens) with shape:
            nll_sum: [batch_size, ]
            num_of_samples: [1, ],
            num_of_tokens: [batch_size, ]
        """
        logits = model_out
        if isinstance(model_out, dict):
            logits = model_out["logits"]
        elif not is_tf_tensor(model_out):
            raise ValueError("Not supported type of model_out: {}".format(
                type(model_out)))

        logits = tf.cast(logits, tf.float32)
        labels = model_inp["trg"]

        with tf.name_scope("loss"):
            vocab_size = logits.get_shape()[-1]
            confidence = 1.0 - self._label_smoothing
            low_confidence = self._label_smoothing / tf.cast(
                vocab_size - 1, tf.float32)
            soft_target = tf.one_hot(tf.cast(labels, tf.int32),
                                     depth=vocab_size,
                                     on_value=confidence,
                                     off_value=low_confidence)
            # this may cause NaN when meets bad sample
            xentropy = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits, labels=soft_target)
            # xentropy = - tf.reduce_sum(soft_target * tf.nn.log_softmax(logits), axis=-1)
            # Calculate the best (lowest) possible value of cross entropy, and
            # subtract from the cross entropy loss.
            if self._label_smoothing:
                normalizing_constant = -(
                    confidence * tf.math.log(confidence) +
                    tf.cast(vocab_size - 1, tf.float32) * low_confidence *
                    tf.math.log(low_confidence + 1e-20))
                xentropy -= normalizing_constant
            # else:
            # TODO(ZhaoChengqi) https://github.com/tensorflow/tensorflow/issues/32578
            # xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            #     logits=logits, labels=labels)
            if "trg_padding" in model_inp:
                weights = tf.cast(1 - model_inp["trg_padding"], tf.float32)
            else:
                weights = input_length_to_nonpadding(model_inp["trg_length"],
                                                     tf.shape(labels)[1],
                                                     tf.float32)
            nll_sum = tf.reduce_sum(xentropy * weights, axis=1)
            n_samples = tf.cast(tf.expand_dims(tf.shape(labels)[0], axis=0),
                                dtype=tf.float32)
            n_tokens = tf.reduce_sum(weights, axis=1)
            return nll_sum, n_samples, n_tokens
示例#3
0
 def _process_and_truncate_text(text):
     if data_status["transcript"] == compat.DataStatus.RAW:
         if compat.is_tf_tensor(text):
             text = text.numpy()
         text = self._trg_data_pipeline.process(text,
                                                is_processed=False)
     else:
         assert data_status["transcript"] == compat.DataStatus.PROJECTED
     if mode == compat.ModeKeys.TRAIN and trunc_trg and max_trg_len:
         if compat.is_tf_tensor(text):
             text = tf.cond(
                 tf.less_equal(tf.size(text), max_trg_len),
                 lambda: text, lambda: tf.concat(
                     [text[:(max_trg_len - 1)], text[-1:]], axis=0))
         else:
             if len(text) > max_trg_len:
                 text = text[:(max_trg_len - 1)] + text[-1:]
     return text
示例#4
0
def dynamic_tensorshape_except_last_dim(tensor):
    """ Returns a tf.TensorShape with only last dim having the static shape. """
    shape_list = static_shape_list(tensor)
    # Only the last
    for i in range(len(shape_list) - 1):
        shape_list[i] = None

    if compat.is_tf_tensor(shape_list[-1]):
        shape_list[-1] = None
    return tf.TensorShape(shape_list)
示例#5
0
 def _process_and_truncate(text, dp, trunc, max_len):
     if data_status != compat.DataStatus.PROJECTED:
         text = dp.process(text, is_processed=(data_status == compat.DataStatus.PROCESSED))
     if mode == compat.ModeKeys.TRAIN and trunc and max_len:
         if compat.is_tf_tensor(text):
             text = tf.cond(
                 tf.less_equal(tf.size(text), max_len), lambda: text,
                 lambda: tf.concat([text[:(max_len - 1)], text[-1:]], axis=0))
         elif len(text) > max_len:
             text = text[:(max_len - 1)] + text[-1:]
     return text
示例#6
0
 def data_proc(data, with_label):
     feature = _process_audio(data["audio"])
     ret = {
         "audio":
         feature,
         "audio_length":
         tf.cast((tf.shape(feature)[0] if compat.is_tf_tensor(feature)
                  else feature.shape[0]) // self._audio_feature_dim //
                 self._audio_feature_channels,
                 dtype=tf.int64)
     }
     if with_label:
         ret["transcript"] = tf.convert_to_tensor(
             _process_and_truncate_text(data["transcript"]), tf.int64)
     return ret
示例#7
0
    def __call__(self, spectrogram, true_length=None):
        """ Applies specaug.

        Args:
            spectrogram: A numpy.ndarray of shape [nframes, nfeatures]
                or a tf.Tensor of shape [nframes, nfeatures] or [batch, nframes, nfeatures].
            true_length: A tf.Tensor of shape [batch, ] if `spectrogram` is a tensor of shape
                [batch, nframes, nfeatures], else None.

        Returns:
            A tuple (augmented spectrogram, new true_length) if ndims of `spectrogram` is 3,
            else the augmented spectrogram.
        """
        if is_tf_tensor(spectrogram):
            return self._call_tf(spectrogram)
        return self._call_numpy(spectrogram)
示例#8
0
def stack_beam_size(x, beam_size):
    """ Tiles a given tensor by beam_size.

    Args:
        x: A tensor with shape [batch_size, ...].
        beam_size: An int scalar.

    Returns:
        The tiled tensor with shape [batch_size * beam_size, ...]

    Raises:
        AssertionError: if the shape of tensor does not match
          [batch_size, 1, 1, timesteps] when tensor.ndims == 4.
        NotImplementedError: if tensor.ndims > 4.
    """
    assert compat.is_tf_tensor(x)
    original_shape = tf.shape(x)
    x = tf.expand_dims(x, axis=1)
    tile_dims = [1] * x.shape.ndims
    tile_dims[1] = beam_size
    tiled_x = tf.tile(x, tile_dims)
    tiled_shape = tf.concat([[-1], original_shape[1:]], axis=0)
    return tf.reshape(tiled_x, tiled_shape)
示例#9
0
文件: trainer.py 项目: lileicc/neurst
    def run(self):
        """ Training a neural model.

        Step 1: Create training model
        Step 2: Restore checkpoint/pretrain model/global_step if exists.
        Step 3: Fetch training data.
        Step 5: Fetch training training.
        Step 6: TRAIN!!!
        """
        if self._hvd_backend == "horovod":
            import horovod.tensorflow.keras as hvd
        elif self._hvd_backend == "byteps":
            import byteps.tensorflow.keras as hvd

        tfds = training_utils.build_datasets(compat.ModeKeys.TRAIN,
                                             self.strategy,
                                             self.custom_dataset, self.task)
        if isinstance(self.custom_dataset, MultipleDataset):
            _tfds = None
            for _, ds in tfds.items():
                if _tfds is None:
                    _tfds = ds
                else:
                    _tfds = _tfds.concatenate(ds)
            tfds = _tfds
        tfds = tfds.prefetch(tf.data.experimental.AUTOTUNE)
        # Step 1: create a model
        with training_utils.get_strategy_scope(self.strategy):
            inps = self.task.create_inputs(compat.ModeKeys.TRAIN)
            formatted_inps = self.task.example_to_input(
                inps, compat.ModeKeys.TRAIN)
            model_out = self.model(formatted_inps, is_training=True)
            for metric_layer in self.task.build_metric_layer():
                model_out = metric_layer([formatted_inps, model_out])
            if (LooseVersion(tf.__version__) < LooseVersion("2.3")
                    or LooseVersion(tf.__version__) >= LooseVersion("2.5")):
                logging.info(
                    f"Warning: Need further check on AccumgradKerasModel when TF version={tf.__version__}. "
                    f"Here we ignore update_cycle={self._update_cycle}, "
                    f"clip_value={self._clip_value}, clip_norm={self._clip_norm}."
                )
                keras_model = tf.keras.Model(inps, model_out)
            elif compat.IS_PREV_TF_2_4_0:
                from neurst.training.gradaccum_keras_model import TF23GradAccumKerasModel
                keras_model = TF23GradAccumKerasModel(
                    inps,
                    model_out,
                    update_cycle=self._update_cycle,
                    clip_value=self._clip_value,
                    clip_norm=self._clip_norm,
                    freeze_variables=self._freeze_variables)
            else:
                keras_model = GradAccumKerasModel(
                    inps,
                    model_out,
                    update_cycle=self._update_cycle,
                    clip_value=self._clip_value,
                    clip_norm=self._clip_norm,
                    freeze_variables=self._freeze_variables)

            loss = self._criterion.reduce_loss(formatted_inps, model_out)
            if compat.is_tf_tensor(loss) or isinstance(loss, (list, tuple)):
                keras_model.add_loss(loss)
            elif isinstance(loss, dict):
                for _name, _loss in loss.items():
                    keras_model.add_loss(_loss)
                    keras_model.add_metric(_loss,
                                           name=_name + "_mean",
                                           aggregation="mean")
            else:
                raise ValueError("criterion.reduce_loss returns "
                                 "unsupported value of type: {}".format(
                                     type(loss)))
            self._restore_ckpt_or_pretrain()
            self._lr_schedule = build_lr_schedule(self._lr_schedule_args)
            if self._pruning_schedule is not None:
                self._optimizer = create_pruning_optimizer(
                    self._optimizer,
                    self.model,
                    self._pruning_schedule,
                    pruning_variable_pattern=self._pruning_variable_pattern,
                    nopruning_variable_pattern=self.
                    _nopruning_variable_pattern,
                    keep_prune_property=True)
            self._optimizer = training_utils.handle_fp16_and_distributed_optimizer(
                self._optimizer, self._lr_schedule, self._hvd_backend)
            if self._hvd_backend is None:
                keras_model.compile(self._optimizer)
            else:
                # NOTE: we already add Horovod DistributedOptimizer in `_handle_fp16_and_distributed_optimizer`.
                # Horovod: Specify `experimental_run_tf_function=False` to ensure TensorFlow
                # uses hvd.DistributedOptimizer() to compute gradients.
                keras_model.compile(self._optimizer,
                                    experimental_run_tf_function=False)
            keras_model.summary()
            summary_model_variables(self.model, self._freeze_variables)
        # initialize the checkpoint manager
        _ = compat.get_saver_or_default(
            self.model,
            self.model_dir,
            max_to_keep=self._checkpoints_max_to_keep)
        # build training training
        if not self._tb_log_dir:
            self._tb_log_dir = os.path.join(self.model_dir, "train")

        training_callbacks = [
            MetricReductionCallback(self.strategy,
                                    self._summary_steps,
                                    self._tb_log_dir,
                                    device="GPU:0",
                                    lr_schedule=self._lr_schedule)
        ]
        if self._hvd_backend is None or hvd.rank() == 0:
            training_callbacks.append(
                CustomCheckpointCallback(
                    self.task.model_configs(self.model),
                    save_checkpoint_steps=self._save_checkpoint_steps))
            if self._validator is not None:
                training_callbacks.append(
                    self._validator.build(self.strategy, self.task,
                                          self.model))
        if self._hvd_backend is not None:
            # Horovod: average metrics among workers at the end of every epoch.
            #
            # Note: This callback must be in the list before the ReduceLROnPlateau,
            # TensorBoard or other metrics-based training.
            # NOTE!!! HERE we already integrate the metric averaging behaviour into the MetricReductionCallback.
            # training_callbacks.insert(0, hvd.callbacks.MetricAverageCallback(device="GPU:0"))

            # Horovod: broadcast initial variable states from rank 0 to all other processes.
            # This is necessary to ensure consistent initialization of all workers when
            # training is started with random weights or restored from a checkpoint.
            training_callbacks.insert(
                0,
                hvd.callbacks.BroadcastGlobalVariablesCallback(0,
                                                               device="GPU:0"))
            if self._lr_schedule is not None:
                training_callbacks.append(
                    LearningRateScheduler(self._lr_schedule))

        if self._experimental_count_batch_num:
            logging.info("Scanning the dataset......")
            iterator = iter(
                training_utils.maybe_distribution_dataset(self.strategy, tfds))
            cnt = 0
            for _ in iterator:
                cnt += 1
            logging.info(f"Total {cnt} batches per EPOCH.")

        history = keras_model.fit(
            map_data_for_keras(tfds.repeat()),
            initial_epoch=0,
            epochs=1,
            steps_per_epoch=self._train_steps,  # * args["update_cycle"],
            verbose=2,
            callbacks=training_callbacks)
        logging.info(history.history)
示例#10
0
def _convert_to_tensor(gradient):
    if is_tf_tensor(gradient):
        return gradient
    return tf.convert_to_tensor(gradient)