示例#1
0
    def testVirtualAdvRegularizer(self):
        """Tests virtual_adv_regularizer returning expected loss."""
        np_input = np.array([[1.0, -1.0]])
        tf_input = tf.constant(np_input)
        np_weights = np.array([[1.0, 5.0], [2.0, 2.0]])
        tf_weights = tf.constant(np_weights)
        # Linear transformation and L2 loss makes the Hessian matrix constant.
        embedding_fn = lambda x: tf.matmul(x, tf_weights)
        step_size = 0.1
        vadv_config = configs.VirtualAdvConfig(
            adv_neighbor_config=configs.AdvNeighborConfig(
                feature_mask=None,
                adv_step_size=step_size,
                adv_grad_norm=configs.NormType.L2),
            distance_config=configs.DistanceConfig(
                distance_type=configs.DistanceType.L2, sum_over_axis=-1),
            num_approx_steps=1,
            approx_difference=1e-3)  # enlarged for numerical stability
        np_seed = np.array([[0.6, 0.8]])
        tf_seed = tf.constant(np_seed)
        vadv_loss = regularizer._virtual_adv_regularizer(
            tf_input, embedding_fn, vadv_config, embedding_fn(tf_input),
            tf_seed)

        actual_loss = self.evaluate(vadv_loss)

        # For detail derivation of the Hessian matrix, see go/vadv-tests-hessian
        hessian = 2 * np.dot(np_weights, np_weights.T)
        approx = np.matmul(np_seed, hessian)
        approx *= step_size / np.linalg.norm(approx, axis=-1, keepdims=True)
        expected_loss = np.linalg.norm(np.matmul(approx, np_weights))**2
        self.assertNear(actual_loss, expected_loss, err=1e-5)
示例#2
0
    def testVirtualAdvRegularizerMultiStepApproximation(self):
        """Tests virtual_adv_regularizer with multi-step approximation."""
        np_input = np.array([[0.28, -0.96]])
        tf_input = tf.constant(np_input)
        embedding_fn = lambda x: x
        vadv_config = configs.VirtualAdvConfig(
            adv_neighbor_config=configs.AdvNeighborConfig(
                feature_mask=None,
                adv_step_size=1,
                adv_grad_norm=configs.NormType.L2),
            distance_config=configs.DistanceConfig(
                distance_type=configs.DistanceType.COSINE, sum_over_axis=-1),
            num_approx_steps=20,
            approx_difference=1)
        np_seed = np.array([[0.6, 0.8]])
        tf_seed = tf.constant(np_seed)
        vadv_loss = regularizer._virtual_adv_regularizer(
            tf_input, embedding_fn, vadv_config, embedding_fn(tf_input),
            tf_seed)

        actual_loss = self.evaluate(vadv_loss)

        # For detail derivation of the Hessian matrix, see go/vadv-tests-hessian
        x = np_input
        hessian = np.dot(x, x.T) * np.identity(2) - np.dot(x.T, x)
        hessian /= np.linalg.norm(x)**4
        approx = np.matmul(np_seed, hessian)
        approx /= np.linalg.norm(approx, axis=-1, keepdims=True)
        expected_loss = np.matmul(np.matmul(approx, hessian),
                                  np.transpose(approx))
        self.assertNear(actual_loss, expected_loss, err=1e-5)
示例#3
0
  def testWeightedDistance(self):
    source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]],
                                dtype='float32')
    target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]],
                                dtype='float32')
    weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32')

    l1_distance_config = configs.DistanceConfig('l1', sum_over_axis=-1)
    l1_distance_tensor = distances.pairwise_distance_wrapper(
        source_tensor, target_tensor, weights, l1_distance_config)
    l2_distance_config = configs.DistanceConfig('l2', sum_over_axis=-1)
    l2_distance_tensor = distances.pairwise_distance_wrapper(
        source_tensor, target_tensor, weights, l2_distance_config)
    with self.cached_session() as sess:
      l1_distance_value = sess.run(l1_distance_tensor)
      self.assertAllClose(l1_distance_value, 5.5 / 3)
      l2_distance_value = sess.run(l2_distance_tensor)
      self.assertAllClose(l2_distance_value, 18.5 / 3)
 def _make_model(sources_shape, targets_shape):
   """Makes a model where `sources` and `targets` have the same rank."""
   sources = tf.keras.Input(sources_shape, name='sources')
   targets = tf.keras.Input(targets_shape, name='targets')
   outputs = pairwise_distance_lib.PairwiseDistance(
       configs.DistanceConfig(
           distance_type=configs.DistanceType.KL_DIVERGENCE,
           reduction=tf.compat.v1.losses.Reduction.NONE,
           sum_over_axis=-1))(sources, targets)
   return tf.keras.Model(inputs=[sources, targets], outputs=outputs)
示例#5
0
  def testDistanceInvalidAxis(self):
    source_tensor = tf.constant(1.0, dtype='float32', shape=[4, 2])
    target_tensor = tf.constant(1.0, dtype='float32', shape=[4, 2])
    weights = tf.constant(1.0, dtype='float32', shape=[4, 2])

    distance_config = configs.DistanceConfig(sum_over_axis=2)
    with self.assertRaises(ValueError):
      distance_tensor = distances.pairwise_distance_wrapper(
          source_tensor, target_tensor, weights, distance_config)
      distance_tensor.eval()
示例#6
0
 def testL2Distance(self):
   source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]],
                               dtype='float32')
   target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]],
                               dtype='float32')
   distance_config = configs.DistanceConfig('l2', sum_over_axis=-1)
   distance_tensor = distances.pairwise_distance_wrapper(
       source_tensor, target_tensor, distance_config=distance_config)
   with self.cached_session() as sess:
     distance_value = sess.run(distance_tensor)
     self.assertAllClose(distance_value, 10.25)
 def testAssertions(self):
   """Tests that assertions still work with Keras."""
   distance_config = configs.DistanceConfig(
       distance_type=configs.DistanceType.JENSEN_SHANNON_DIVERGENCE,
       sum_over_axis=-1)
   regularizer = pairwise_distance_lib.PairwiseDistance(distance_config)
   # Try Jennsen-Shannon divergence on an improper probability distribution.
   with self.assertRaisesRegex(
       tf.errors.InvalidArgumentError,
       'x and/or y is not a proper probability distribution'):
     self.evaluate(regularizer(np.array([0.6, 0.5]), np.array([[0.25, 0.75]])))
示例#8
0
 def testCosineDistance(self):
   source_tensor = tf.constant([[1, 1], [1, 1], [3, 4], [-1, -1]],
                               dtype='float32')
   target_tensor = tf.constant([[1, 1], [5, 5], [4, 3], [1, 1]],
                               dtype='float32')
   distance_config = configs.DistanceConfig('cosine', sum_over_axis=-1)
   distance_tensor = distances.pairwise_distance_wrapper(
       source_tensor, target_tensor, distance_config=distance_config)
   with self.cached_session() as sess:
     distance_value = sess.run(distance_tensor)
     self.assertAllClose(distance_value,
                         0.51)  # sum([0.0, 1.0, 0.04, 2.0]) / 4
 def testCall(self):
   """Makes a function from config and runs it."""
   regularizer = pairwise_distance_lib.PairwiseDistance(
       configs.DistanceConfig(
           distance_type=configs.DistanceType.KL_DIVERGENCE, sum_over_axis=-1),
       name='kl_loss')
   # Run a computation.
   example = np.array([0.3, 0.3, 0.4])
   neighbors = np.array([[0.9, 0.05, 0.05]])
   kl_loss = self.evaluate(regularizer(example, neighbors))
   # Assert correctness of KL divergence calculation.
   self.assertNear(kl_loss, np.sum(special.kl_div(example, neighbors)),
                   _ERR_TOL)
示例#10
0
  def testDistanceWithoutSumOverAxis(self):
    source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]],
                                dtype='float32')
    target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]],
                                dtype='float32')
    weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32')

    distance_config = configs.DistanceConfig('l1')
    distance_tensor = distances.pairwise_distance_wrapper(
        source_tensor, target_tensor, weights, distance_config)
    with self.cached_session() as sess:
      distance_value = sess.run(distance_tensor)
      self.assertAllClose(distance_value, 5.5 / 6)
示例#11
0
  def testDistanceReductionMean(self):
    source_tensor = tf.constant([[1, 1], [2, 2], [0, 2], [5, 5]],
                                dtype='float32')
    target_tensor = tf.constant([[1, 1], [0, 2], [4, 4], [1, 4]],
                                dtype='float32')
    weights = tf.constant([[1], [0], [0.5], [0.5]], dtype='float32')

    distance_mean_config = configs.DistanceConfig(
        'l1', tf.compat.v1.losses.Reduction.MEAN, sum_over_axis=-1)
    distance_mean_tensor = distances.pairwise_distance_wrapper(
        source_tensor, target_tensor, weights, distance_mean_config)
    with self.cached_session() as sess:
      distance_mean_value = sess.run(distance_mean_tensor)
      self.assertAllClose(distance_mean_value, 5.5 / 2.0)
示例#12
0
 def testJensenShannonDistance(self):
   source_tensor = np.array([[1, 0, 0], [0.1, 0.2, 0.7]], dtype='float32')
   target_tensor = np.array([[1, 0, 0], [0.1, 0.9, 0]], dtype='float32')
   expected_tensor = np.sum(self._jsd_func(source_tensor, target_tensor), -1)
   expected_value = np.mean(expected_tensor)
   distance_config = configs.DistanceConfig(
       'jensen_shannon_divergence', sum_over_axis=-1)
   distance_tensor = distances.pairwise_distance_wrapper(
       tf.constant(source_tensor),
       tf.constant(target_tensor),
       distance_config=distance_config)
   with self.cached_session() as sess:
     distance_value = sess.run(distance_tensor)
     self.assertAllClose(distance_value, expected_value)
 def testWeights(self):
   """Tests that weights are propagated to the distance function."""
   regularizer = pairwise_distance_lib.PairwiseDistance(
       configs.DistanceConfig(
           distance_type=configs.DistanceType.KL_DIVERGENCE, sum_over_axis=-1),
       name='weighted_kl_loss')
   example = np.array([0.1, 0.4, 0.5])
   neighbors = np.array([[0.6, 0.2, 0.2], [0.9, 0.01, 0.09]])
   neighbor_weight = 0.5
   loss = self.evaluate(regularizer(example, neighbors, neighbor_weight))
   self.assertAllClose(
       loss,
       neighbor_weight *
       np.mean(np.sum(special.kl_div(example, neighbors), -1)), _ERR_TOL)
示例#14
0
  def testKLDistanceFromLogit(self):
    source = np.array([[1, 2, 3], [1, -1, 2]], dtype='float32')
    target = np.array([[1, 2, 3], [1, 0, -1]], dtype='float32')

    expected_value = np.mean(
        np.sum(
            self._kl_func(
                self._softmax_func(source), self._softmax_func(target)), -1))

    distance_config = configs.DistanceConfig(
        'kl_divergence', transform_fn='softmax', sum_over_axis=-1)
    distance_tensor = distances.pairwise_distance_wrapper(
        tf.constant(source),
        tf.constant(target),
        distance_config=distance_config)
    with self.cached_session() as sess:
      distance_value = sess.run(distance_tensor)
      self.assertAllClose(distance_value, expected_value)
示例#15
0
  def testDistanceWithTransformButNoSumOverAxis(self):
    source = np.array([[1, 1], [2, 2], [0, 2], [10, -10]], dtype='float32')
    target = np.array([[0, 0], [0, 2], [1, 3], [3, 3]], dtype='float32')

    distance_config = configs.DistanceConfig(
        distance_type='l1',
        reduction=tf.compat.v1.losses.Reduction.NONE,
        transform_fn='softmax')
    distance_tensor = distances.pairwise_distance_wrapper(
        tf.constant(source),
        tf.constant(target),
        distance_config=distance_config)

    expected_distance = np.abs(
        self._softmax_func(source) - self._softmax_func(target))
    with self.cached_session() as sess:
      distance = sess.run(distance_tensor)
      self.assertAllClose(distance, expected_distance)
示例#16
0
    def testVirtualAdvRegularizerRandomPerturbation(self):
        """Tests virtual_adv_regularizer with num_approx_steps=0."""
        input_layer = tf.constant([[1.0, -1.0]])
        embedding_fn = lambda x: x
        step_size = 0.1
        vadv_config = configs.VirtualAdvConfig(
            adv_neighbor_config=configs.AdvNeighborConfig(
                feature_mask=None,
                adv_step_size=step_size,
                adv_grad_norm=configs.NormType.L2),
            distance_config=configs.DistanceConfig(
                distance_type=configs.DistanceType.L2, sum_over_axis=-1),
            num_approx_steps=0)
        vadv_loss = regularizer.virtual_adv_regularizer(
            input_layer, embedding_fn, vadv_config)
        actual_loss = self.evaluate(vadv_loss)

        # The identity embedding_fn makes the virtual adversarial loss immune to the
        # direction of the perturbation, only the size matters.
        expected_loss = step_size**2  # square loss
        self.assertNear(actual_loss, expected_loss, err=1e-5)
 def testModelFitAndEvaluate(self, model_fn, distance_type):
     """Fit and evaluate models with various distance configurations."""
     # Set up graph-regularized model.
     distance_config = configs.DistanceConfig(
         distance_type=distance_type,
         transform_fn=configs.TransformType.SOFTMAX,
         sum_over_axis=-1)
     model = model_fn(distance_config)
     model.compile(optimizer=tf.keras.optimizers.SGD(),
                   loss=tf.keras.losses.SparseCategoricalCrossentropy(
                       from_logits=True),
                   metrics=[
                       tf.keras.metrics.SparseCategoricalAccuracy(),
                       tf.keras.metrics.SparseCategoricalCrossentropy(
                           from_logits=True),
                   ])
     # Fit and evaluate the model on dummy data that has 8 examples.
     features = {
         'features': np.random.normal(size=(8, 4)),
         'neighbors': np.random.normal(size=(8, 2, 4)),
         'neighbor_weights': np.random.uniform(size=(8, 2, 1)),
     }
     labels = np.random.randint(0, 3, size=8)
     train_history = model.fit(features, labels, batch_size=2,
                               epochs=16).history
     evaluation_results = dict(
         zip(model.metrics_names,
             model.evaluate(features, labels, batch_size=4)))
     # Assert that losses and metrics were evaluated.
     self.assertAllGreater(train_history['graph_loss'], 0.)
     self.assertGreater(evaluation_results['graph_loss'], 0.)
     self.assertAllClose(
         train_history['loss'],
         np.add(train_history['graph_loss'],
                train_history['sparse_categorical_crossentropy']), _ERR_TOL)
     self.assertNear(
         evaluation_results['loss'], evaluation_results['graph_loss'] +
         evaluation_results['sparse_categorical_crossentropy'], _ERR_TOL)
 def __init__(self, distance_config=None, **kwargs):
     super(PairwiseDistance, self).__init__(**kwargs)
     self._distance_config = (configs.DistanceConfig()
                              if distance_config is None else
                              attr.evolve(distance_config))
 def from_config(cls, config):
     return cls(configs.DistanceConfig(**config["distance_config"]),
                name=config.get("name"))
def pairwise_distance_wrapper(sources,
                              targets,
                              weights=1.0,
                              distance_config=None):
    """A wrapper to compute pairwise distance between sources and targets.

  distances = weights * distance_type(sources, targets)

  This wrapper calculates the weighted distance between `(sources, targets)`
  pairs, and provides an option to return the distance as the sum over the
  difference along the given axis, when vector based distance is needed.

  For the usage of `weights` and `reduction`, please refer to tf.losses. For the
  usage of `sum_over_axis`, see the following examples:

  Given target tensors with shape `[batch_size, features]`, reduction set to
  be MEAN, and `sum_over_axis` set to be last dimension, the weighted average
  distance of `sample pairs` will be returned. For example:
  With a distance_config('L2', sum_over_axis=-1), the distance between
  [[1, 1], [2, 2], [0, 2], [5, 5]] and [[1, 1], [0, 2], [4, 4], [1, 4]] will be
  {(0+0) + (4+0) + (16+4) + (16+1)}/4 = 10.25

  If `sum_over_axis` is None, the weighted average distance of `feature pairs`
  (instead of sample pairs) will be returned. For example:
  With a distance_config('L2'), the distance between
  [[1, 1], [2, 2], [0, 2], [5, 5]] and [[1, 1], [0, 2], [4, 4], [1, 4]] will be
  {(0+0) + (4+0) + (16+4) + (16+1)}/8 = 5.125

  If `transform_fn` is not None, the transform function is applied to both
  sources and targets before computing the distance. For example:
  distance_config('KL_DIVERGENCE', sum_over_axis=-1, transform_fn='SOFTMAX')
  treats `sources` and `targets` as logits, and computes the KL-divergence
  between the probability distributions.

  Args:
    sources:  `Tensor` of type float32 or float64.
    targets: `Tensor` of the same type and shape as sources.
    weights: (optional) `Tensor` whose rank is either 0, or the same rank as
      `targets`, and must be broadcastable to `targets` (i.e., all dimensions
      must be either `1`, or the same as the corresponding `distance`
      dimension).
    distance_config: DistanceConfig contains the following configs (or
      hyper-parameters) for computing distances:
      (a) 'distance_type': Type of distance function to apply.
      (b) 'reduction': Type of distance reduction. Refer to tf.losses.Reduction.
      (c) 'sum_over_axis': (optional) The distance is sum over the difference
        along the axis. Note, if `sum_over_axis` is not None and the rank of
        `weights` is nonzero, the size of `weights` along the `sum_over_axis`
        must be 1.
      (d) 'transform_fn': (optional) If set, both sources and targets will be
        transformed before calculating the distance. If set to 'SOFTMAX', it
        will be performed on the axis specified by 'sum_over_axis', or -1 if
        that is not specified. If None, the default distance config will be
        used.

  Returns:
    Weighted distance scalar `Tensor`. If `reduction` is `NONE`, this has the
      same shape as `targets`.
  Raises:
    ValueError: If the shape of targets doesn't match that of sources, or if the
      shape of weights is invalid.
    TypeError: If the distance function gets an unexpected keyword argument.
  """
    if distance_config is None:
        distance_config = configs.DistanceConfig()  # Default configs.

    tf.compat.v1.losses.Reduction.validate(distance_config.reduction)

    if distance_config.transform_fn is not configs.TransformType.NONE:
        sources = _apply_transform(sources, distance_config.transform_fn,
                                   distance_config.sum_over_axis)
        targets = _apply_transform(targets, distance_config.transform_fn,
                                   distance_config.sum_over_axis)

    sum_over_axis = distance_config.sum_over_axis
    # Validates the `sum_over_axis`
    _assert_valid_axis(sources.get_shape().ndims, sum_over_axis)
    distance_fn = _select_distance_fn(distance_config.distance_type)
    if distance_config.distance_type == configs.DistanceType.COSINE:
        # Cosine distance function assumes input tensors have been unit-normalized
        sources = tf.nn.l2_normalize(sources, axis=sum_over_axis)
        targets = tf.nn.l2_normalize(targets, axis=sum_over_axis)
    if _is_axis_required_in_distance_fn(distance_config.distance_type):
        distances = distance_fn(labels=sources,
                                predictions=targets,
                                weights=weights,
                                axis=sum_over_axis,
                                reduction=distance_config.reduction,
                                loss_collection=None)
    else:
        distances = distance_fn(labels=sources,
                                predictions=targets,
                                weights=weights,
                                reduction=distance_config.reduction,
                                loss_collection=None)
        if sum_over_axis is not None and _is_reduced_by_average(
                distance_config.reduction):
            # The distance is divided by the size of targets tensor, so we need to
            # rescale the distance by multiplying the size of axis. Note, the distance
            # function with `axis` as a required argument (e.g., consine distance)
            # does not need to be rescaled.
            weights = tf.convert_to_tensor(value=weights)
            weights_shape = weights.get_shape().as_list()
            if weights_shape and weights_shape[sum_over_axis] != 1:
                raise ValueError(
                    'Shape of weights along the axis %d must be 1.' %
                    sum_over_axis)
            distances *= sources.shape.dims[sum_over_axis].value
    return distances