Exemplo n.º 1
0
 def testPaddingCrossEntropyFactored(self):
   vocab_size = 19
   rows = 5
   cols = 4
   depth = 11
   label_smoothing = 0.1
   features = np.random.rand(rows, cols, depth)
   weights = np.random.rand(vocab_size, depth)
   labels = np.random.randint(0, vocab_size - 1, size=(rows, cols))
   with self.test_session() as session:
     features = tf.to_float(features)
     weights = tf.to_float(weights)
     labels = tf.to_int32(labels)
     logits = tf.matmul(
         tf.reshape(features, [rows * cols, depth]), weights, transpose_b=True)
     logits = tf.reshape(logits, [rows, cols, vocab_size])
     loss_num, loss_den = common_layers.padded_cross_entropy(
         logits, labels, label_smoothing=label_smoothing, reduce_sum=False)
     factored_logits = common_layers.FactoredTensor(features, weights)
     loss_num_f, loss_den_f = common_layers.padded_cross_entropy_factored(
         factored_logits,
         labels=labels,
         label_smoothing=label_smoothing,
         reduce_sum=False)
     num, den, num_f, den_f = session.run(
         [loss_num, loss_den, loss_num_f, loss_den_f])
   self.assertEqual(num.shape, (rows, cols))
   self.assertEqual(den.shape, (rows, cols))
   self.assertEqual(num_f.shape, (rows, cols))
   self.assertEqual(den_f.shape, (rows, cols))
   self.assertAllClose(num, num_f)
   self.assertAllClose(den, den_f)
 def top(self, body_output, _):
     """Generate logits.
 Args:
   body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
 Returns:
   logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
 """
     scope_name = "shared"
     reuse = True
     if self._model_hparams.symbol_modality_skip_top:
         return tf.expand_dims(body_output, 3)
     with tf.variable_scope(scope_name, reuse=reuse):
         var = self._get_weights()
         if (self._model_hparams.factored_logits and
                 self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
             # insert channels dimension
             body_output = tf.expand_dims(body_output, 3)
             logits = common_layers.FactoredTensor(body_output, var)
         else:
             shape = tf.shape(body_output)[:-1]
             body_output = tf.reshape(body_output,
                                      [-1, self._body_input_depth])
             logits = tf.matmul(body_output, var, transpose_b=True)
             logits = tf.reshape(
                 logits, tf.concat([shape, [1, self._vocab_size]], 0))
         return logits
Exemplo n.º 3
0
  def top(self, body_output, _):
    """Generate logits.

    Args:
      body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
    Returns:
      logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
    """
    if self._model_hparams.symbol_modality_skip_top:
      return tf.expand_dims(body_output, 3)

    if self._model_hparams.shared_embedding_and_softmax_weights:
      scope_name = "shared"
      reuse = tf.AUTO_REUSE
    else:
      scope_name = "softmax"
      reuse = False
    with tf.variable_scope(scope_name, reuse=reuse):
      body_output_shape = common_layers.shape_list(body_output)
      var = self._get_weights(body_output_shape[-1])
      if (self._model_hparams.factored_logits and
          self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
        # insert channels dimension
        body_output = tf.expand_dims(body_output, 3)
        return common_layers.FactoredTensor(body_output, var)
      else:
        body_output = tf.reshape(body_output, [-1, body_output_shape[-1]])
        logits = tf.matmul(body_output, var, transpose_b=True)
        return tf.reshape(logits,
                          body_output_shape[:-1] + [1, self._vocab_size])
Exemplo n.º 4
0
  def top(self, body_output, _):
    """Generate logits.

    Args:
      body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
    Returns:
      logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
    """
    if self._model_hparams.symbol_modality_skip_top:
      return tf.expand_dims(body_output, 3)

    if self._model_hparams.shared_embedding_and_softmax_weights:
      scope_name = "shared"
      reuse = True
    else:
      scope_name = "softmax"
      reuse = False

    with tf.variable_scope(scope_name, reuse=reuse):
      body_output_shape = common_layers.shape_list(body_output)
      var = self._get_weights(body_output_shape[-1])
      if (self._model_hparams.factored_logits and
          self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
        # insert channels dimension
        body_output = tf.expand_dims(body_output, 3)
        return common_layers.FactoredTensor(body_output, var)
      else:
        body_output = tf.reshape(body_output, [-1, body_output_shape[-1]])
        logits = tf.matmul(body_output, var, transpose_b=True)
        # Just reshape like upstream t2t so that body_output
        # is the expected shape of [B, 1, D]
        # https://github.com/tensorflow/tensor2tensor/blob/d600c8bb196193596fdb38c2b6e5393c4e240564/tensor2tensor/layers/modalities.py#L1135
        return tf.reshape(logits,
                          body_output_shape[:-1] + [1, self._vocab_size])
Exemplo n.º 5
0
 def testFactoredTensorImplicitConversion(self):
     a = np.random.rand(3, 4, 5)
     b = np.random.rand(6, 5)
     c = np.random.rand(3, 4, 6)
     # a factored representation of a Tensor of shape (3, 4, 6)
     factored = common_layers.FactoredTensor(tf.to_float(a), tf.to_float(b))
     # implicitly converts factored to a Tensor (performing the matmul)
     d = factored + tf.to_float(c)
     out = self.evaluate(d)
     self.assertEqual(out.shape, (3, 4, 6))
Exemplo n.º 6
0
    def testPaddingCrossEntropyFactoredGrad(self):
        if tf.executing_eagerly():
            return  # don't run test in Eager mode

        vocab_size = 19
        rows = 5
        cols = 4
        depth = 11
        label_smoothing = 0.1
        features = np.random.rand(rows, cols, depth)
        weights = np.random.rand(vocab_size, depth)
        labels = np.random.randint(0, vocab_size - 1, size=(rows, cols))
        with self.session() as session:
            features = tf.to_float(features)
            weights = tf.to_float(weights)
            labels = tf.to_int32(labels)
            logits = tf.matmul(tf.reshape(features, [rows * cols, depth]),
                               weights,
                               transpose_b=True)
            logits = tf.reshape(logits, [rows, cols, vocab_size])
            loss_num, loss_den = common_layers.padded_cross_entropy(
                logits,
                labels,
                label_smoothing=label_smoothing,
                reduce_sum=False)
            factored_logits = common_layers.FactoredTensor(features, weights)
            loss_num_factored, loss_den_factored = (
                common_layers.padded_cross_entropy_factored(
                    factored_logits,
                    labels=labels,
                    label_smoothing=label_smoothing,
                    reduce_sum=False))
            df, dw = tf.gradients(ys=[loss_num, loss_den],
                                  xs=[features, weights])
            df_factored, dw_factored = tf.gradients(
                ys=[loss_num_factored, loss_den_factored],
                xs=[features, weights])
            actual_df, actual_dw, actual_df_factored, actual_dw_factored = (
                session.run([df, dw, df_factored, dw_factored]))
        self.assertEqual(actual_df.shape, (rows, cols, depth))
        self.assertEqual(actual_dw.shape, (vocab_size, depth))
        self.assertEqual(actual_df_factored.shape, (rows, cols, depth))
        self.assertEqual(actual_dw_factored.shape, (vocab_size, depth))
        self.assertAllClose(actual_df, actual_df_factored)
        self.assertAllClose(actual_dw, actual_dw_factored)
    def top(self, body_output, _):
        """Generate logits.
    Args:
      body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
    Returns:
      logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
    """
        if self._model_hparams.symbol_modality_skip_top:
            return tf.expand_dims(body_output, 3)

        scope_name = "softmax"
        reuse = tf.AUTO_REUSE

        with tf.variable_scope(scope_name, reuse=reuse):
            rank = len(body_output.get_shape().as_list())
            body_output_shape = [
                common_layers.shape_dim(body_output, i) for i in range(rank)
            ]
            var = self._get_weights(body_output_shape[-1])
            bias = tf.get_variable('bias', [self._vocab_size],
                                   initializer=tf.zeros_initializer())

            if (self._model_hparams.factored_logits and
                    self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
                # insert channels dimension
                body_output = tf.expand_dims(body_output, 3)
                logits = common_layers.FactoredTensor(body_output, var)
            else:
                body_output = tf.reshape(body_output,
                                         [-1, body_output_shape[-1]])
                logits = tf.matmul(body_output, var, transpose_b=True)
                logits += bias
                if self._model_hparams.normalize_before_softmax:
                    mean, variance = tf.nn.moments(logits, -1, keep_dims=True)
                    logits = (logits - mean) * tf.rsqrt(variance + 1e-6)
                temp = self._model_hparams.softmax_temperature
                logits /= temp

                out_shape = body_output_shape[:-1] + [1, self._vocab_size]
                logits = tf.reshape(logits, out_shape)
            return logits
Exemplo n.º 8
0
    def top(self, body_output, _):
        """Generate logits.

    Args:
      body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
    Returns:
      logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
    """
        if self._model_hparams.symbol_modality_skip_top:
            return tf.expand_dims(body_output, 3)

        if self._model_hparams.shared_embedding_and_softmax_weights:
            scope_name = "shared"
            reuse = True
        else:
            scope_name = "softmax"
            reuse = False

        with tf.variable_scope(scope_name, reuse=reuse):
            body_output_shape = common_layers.shape_list(body_output)
            var = self._get_weights(body_output_shape[-1])
            if (self._model_hparams.factored_logits and
                    self._model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
                # insert channels dimension
                body_output = tf.expand_dims(body_output, 3)
                return common_layers.FactoredTensor(body_output, var)
            else:
                body_output = tf.reshape(body_output,
                                         [-1, body_output_shape[-1]])
                logits = tf.matmul(body_output, var, transpose_b=True)
                if (common_layers.is_xla_compiled()
                        and self._model_hparams.mode
                        == tf.estimator.ModeKeys.TRAIN):
                    # TPU does not react kindly to extra dimensions.
                    # TODO(noam): remove this once TPU is more forgiving of extra dims.
                    return logits
                else:
                    return tf.reshape(
                        logits, body_output_shape[:-1] + [1, self._vocab_size])
Exemplo n.º 9
0
def top(body_output, targets, model_hparams, vocab_size):
    """Generate logits.

  Args:
    body_output: A Tensor with shape [batch, p0, p1, body_input_depth]
    targets: Unused.
    model_hparams: tf.HParams, model hyperparmeters.
    vocab_size: int, vocabulary size.

  Returns:
    logits: A Tensor with shape  [batch, p0, p1, ?, vocab_size].
  """
    del targets  # unused arg
    # Sparsity techniques only support shared weight matrices for now
    sparsity_technique = model_hparams.get("sparsity_technique")
    assert (not sparsity_technique
            or model_hparams.shared_embedding_and_softmax_weights)
    if model_hparams.shared_embedding_and_softmax_weights:
        scope_name = "shared"
        reuse = tf.AUTO_REUSE
    else:
        scope_name = "softmax"
        reuse = False

    with tf.variable_scope(scope_name, reuse=reuse):
        body_output_shape = common_layers.shape_list(body_output)
        var = _get_weights(model_hparams, vocab_size, body_output_shape[-1])
        if (model_hparams.factored_logits
                and model_hparams.mode == tf.estimator.ModeKeys.TRAIN):
            # Sparsity techniques only support non-factored logits for now
            assert not sparsity_technique

            # insert channels dimension
            body_output = tf.expand_dims(body_output, 3)
            return common_layers.FactoredTensor(body_output, var)
        else:
            body_output = tf.reshape(body_output, [-1, body_output_shape[-1]])

            training = model_hparams.get("mode") == tf.estimator.ModeKeys.TRAIN
            if sparsity_technique == "variational_dropout":
                if training:
                    logits = vd.nn.matmul_train(
                        body_output,
                        var,
                        transpose_b=True,
                        clip_alpha=model_hparams.get("clip_log_alpha"))
                else:
                    threshold = model_hparams.get("log_alpha_threshold")
                    logits = vd.nn.matmul_eval(body_output,
                                               var,
                                               transpose_b=True,
                                               threshold=threshold)
            elif sparsity_technique == "l0_regularization":
                if training:
                    logits = l0.nn.matmul_train(body_output,
                                                var,
                                                transpose_b=True)
                else:
                    logits = l0.nn.matmul_eval(body_output,
                                               var,
                                               transpose_b=True)
            elif (sparsity_technique == "magnitude_pruning"
                  or sparsity_technique == "random_pruning"):
                logits = tf.matmul(body_output,
                                   pruning.apply_mask(var),
                                   transpose_b=True)
            else:
                logits = tf.matmul(body_output, var, transpose_b=True)

            return tf.reshape(logits, body_output_shape[:-1] + [1, vocab_size])