Пример #1
0
 def test_batch_jacobian_fixed_shape(self):
   x = random_ops.random_uniform([2, 3, 5])
   y = x * x
   batch_jacobian_pfor = gradients.batch_jacobian(y, x, use_pfor=True)
   batch_jacobian_while = gradients.batch_jacobian(y, x, use_pfor=False)
   two_x = 2 * x
   answer = array_ops.stack(
       [array_ops.diag(two_x[0]),
        array_ops.diag(two_x[1])])
   self.run_and_assert_equal(answer, batch_jacobian_pfor)
   self.run_and_assert_equal(answer, batch_jacobian_while)
Пример #2
0
 def _jacobian(self, experimental_use_pfor):
   persistent = context.executing_eagerly and not experimental_use_pfor
   with backprop.GradientTape(persistent=persistent) as g:
     x = constant_op.constant([1., 2.])
     y = constant_op.constant([3., 4.])
     g.watch(x)
     g.watch(y)
     z = x * x * y
   jacobian = g.jacobian(z, [x, y],
                         experimental_use_pfor=experimental_use_pfor)
   answer = [array_ops.diag(2 * x * y), array_ops.diag(x * x)]
   return jacobian, answer
Пример #3
0
 def _batch_jacobian(self, experimental_use_pfor):
   persistent = context.executing_eagerly and not experimental_use_pfor
   with backprop.GradientTape(persistent=persistent) as g:
     x = constant_op.constant([[1., 2.], [3., 4.]])
     y = constant_op.constant([[3., 4.], [5., 6.]])
     g.watch(x)
     z = x * x * y
   batch_jacobian = g.batch_jacobian(
       z, x, experimental_use_pfor=experimental_use_pfor)
   answer = array_ops.stack([array_ops.diag(2 * x[0] * y[0]),
                             array_ops.diag(2 * x[1] * y[1])])
   return batch_jacobian, answer
Пример #4
0
 def test_batch_jacobian_unknown_shape(self):
   with self.test_session() as sess:
     x = array_ops.placeholder(dtypes.float32)
     y = x * x
     batch_jacobian_pfor = gradients.batch_jacobian(y, x, use_pfor=True)
     batch_jacobian_while = gradients.batch_jacobian(y, x, use_pfor=False)
     two_x = 2 * x
     answer = array_ops.stack(
         [array_ops.diag(two_x[0]),
          array_ops.diag(two_x[1])])
     ans, pfor_value, while_value = sess.run(
         [answer, batch_jacobian_pfor, batch_jacobian_while],
         feed_dict={x: [[1, 2], [3, 4]]})
     self.assertAllClose(ans, pfor_value)
     self.assertAllClose(ans, while_value)
Пример #5
0
def _symmetric_matrix_square_root(mat, eps=1e-10):
    """Compute square root of a symmetric matrix.

  Note that this is different from an elementwise square root. We want to
  compute M' where M' = sqrt(mat) such that M' * M' = mat.

  Also note that this method **only** works for symmetric matrices.

  Args:
    mat: Matrix to take the square root of.
    eps: Small epsilon such that any element less than eps will not be square
      rooted to guard against numerical instability.

  Returns:
    Matrix square root of mat.
  """
    # Unlike numpy, tensorflow's return order is (s, u, v)
    s, u, v = linalg_ops.svd(mat)
    # sqrt is unstable around 0, just use 0 in such case
    si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))
    # Note that the v returned by Tensorflow is v = V
    # (when referencing the equation A = U S V^T)
    # This is unlike Numpy which returns v = V^T
    return math_ops.matmul(math_ops.matmul(u, array_ops.diag(si)),
                           v,
                           transpose_b=True)
 def test_noise_decreasing(self):
   for dtype in [dtypes.float32, dtypes.float64]:
     with variable_scope.variable_scope(dtype.name):
       random_model = RandomStateSpaceModel(
           state_dimension=5, state_noise_dimension=4,
           configuration=state_space_model.StateSpaceModelConfiguration(
               dtype=dtype, num_features=1))
       random_model.initialize_graph()
       original_covariance = array_ops.diag(
           array_ops.ones(shape=[5], dtype=dtype))
       _, new_covariance, _ = random_model._exogenous_noise_decreasing(
           current_times=[[1]],
           exogenous_values=constant_op.constant([[-2.]], dtype=dtype),
           state=[
               -array_ops.ones(shape=[1, 5], dtype=dtype),
               original_covariance[None], [0]
           ])
       with self.cached_session() as session:
         variables.global_variables_initializer().run()
         evaled_new_covariance, evaled_original_covariance = session.run(
             [new_covariance[0], original_covariance])
         new_variances = numpy.diag(evaled_new_covariance)
         original_variances = numpy.diag(evaled_original_covariance)
         for i in range(5):
           self.assertLess(new_variances[i], original_variances[i])
 def test_noise_decreasing(self):
     for dtype in [dtypes.float32, dtypes.float64]:
         with variable_scope.variable_scope(dtype.name):
             random_model = RandomStateSpaceModel(
                 state_dimension=5,
                 state_noise_dimension=4,
                 configuration=state_space_model.
                 StateSpaceModelConfiguration(dtype=dtype, num_features=1))
             random_model.initialize_graph()
             original_covariance = array_ops.diag(
                 array_ops.ones(shape=[5], dtype=dtype))
             _, new_covariance, _ = random_model._exogenous_noise_decreasing(
                 current_times=[[1]],
                 exogenous_values=constant_op.constant([[-2.]],
                                                       dtype=dtype),
                 state=[
                     -array_ops.ones(shape=[1, 5], dtype=dtype),
                     original_covariance[None], [0]
                 ])
             with self.test_session() as session:
                 variables.global_variables_initializer().run()
                 evaled_new_covariance, evaled_original_covariance = session.run(
                     [new_covariance[0], original_covariance])
                 new_variances = numpy.diag(evaled_new_covariance)
                 original_variances = numpy.diag(evaled_original_covariance)
                 for i in range(5):
                     self.assertLess(new_variances[i],
                                     original_variances[i])
def _symmetric_matrix_square_root(mat, eps=1e-10):
  """Compute square root of a symmetric matrix.

  Note that this is different from an elementwise square root. We want to
  compute M' where M' = sqrt(mat) such that M' * M' = mat.

  Also note that this method **only** works for symmetric matrices.

  Args:
    mat: Matrix to take the square root of.
    eps: Small epsilon such that any element less than eps will not be square
      rooted to guard against numerical instability.

  Returns:
    Matrix square root of mat.
  """
  # Unlike numpy, tensorflow's return order is (s, u, v)
  s, u, v = linalg_ops.svd(mat)
  # sqrt is unstable around 0, just use 0 in such case
  si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))
  # Note that the v returned by Tensorflow is v = V
  # (when referencing the equation A = U S V^T)
  # This is unlike Numpy which returns v = V^T
  return math_ops.matmul(
      math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True)
Пример #9
0
 def test_batch_jacobian_unknown_shape(self):
     with self.cached_session() as sess:
         x = array_ops.placeholder(dtypes.float32)
         y = x * x
         batch_jacobian_pfor = gradients.batch_jacobian(y, x, use_pfor=True)
         batch_jacobian_while = gradients.batch_jacobian(y,
                                                         x,
                                                         use_pfor=False)
         two_x = 2 * x
         answer = array_ops.stack(
             [array_ops.diag(two_x[0]),
              array_ops.diag(two_x[1])])
         ans, pfor_value, while_value = sess.run(
             [answer, batch_jacobian_pfor, batch_jacobian_while],
             feed_dict={x: [[1, 2], [3, 4]]})
         self.assertAllClose(ans, pfor_value)
         self.assertAllClose(ans, while_value)
Пример #10
0
def pairwise_distance(feature, squared=False):
    """Computes the pairwise distance matrix with numerical stability.

    output[i, j] = || feature[i, :] - feature[j, :] ||_2

    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.

    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    # dot_product = math_ops.matmul(embeddings, array_ops.transpose(embeddings))

    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    # square_norm = array_ops.diag_part(dot_product)

    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)

    # pairwise_distances_squared = array_ops.expand_dims(square_norm, 0) \
    #     - 2.0 * dot_product \
    #     + array_ops.expand_dims(square_norm, 1)

    pairwise_distances_squared = math_ops.add(
        math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
        math_ops.reduce_sum(math_ops.square(array_ops.transpose(feature)), axis=[0], keepdims=True))
    - 2.0 * math_ops.matmul(feature, array_ops.transpose(feature))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
    # Get the mask where the zero distances are at.
    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        pairwise_distances = math_ops.sqrt(
            pairwise_distances_squared + tf.cast(math_ops.to_float(error_mask) * 1e-16, dtype=tf.float32))

    # Undo conditionally adding 1e-16.
    pairwise_distances = math_ops.multiply(
        pairwise_distances, tf.cast(math_ops.to_float(math_ops.logical_not(error_mask)), dtype=tf.float32))

    num_data = array_ops.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = array_ops.ones_like(pairwise_distances) - \
        tf.cast(array_ops.diag(array_ops.ones([num_data])), tf.float32)
    pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
    return pairwise_distances
Пример #11
0
def loss(y_true, y_pred):
    del y_true
    margin = 1.
    labels = y_pred[:, :1]

    labels = tf.cast(labels, dtype='int32')

    embeddings = y_pred[:, 1:]

    pdist_matrix = pairwise_distance(embeddings, squared=True)
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)  # was 'array_ops.size(labels)'

    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    num_positives = math_ops.reduce_sum(mask_positives)

    semi_hard_triplet_loss_distance = math_ops.truediv(
        math_ops.reduce_sum(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0)),
        num_positives,
        name='triplet_semihard_loss')

    ### Code from Tensorflow function semi-hard triplet loss ENDS here.
    return semi_hard_triplet_loss_distance
def _symmetric_matrix_square_root(mat, eps=1e-10):

    s, u, v = linalg_ops.svd(mat)

    si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))

    return math_ops.matmul(math_ops.matmul(u, array_ops.diag(si)),
                           v,
                           transpose_b=True)
Пример #13
0
 def diagOp(self, diag, dtype, expected_ans, use_gpu=False):
   with self.test_session(use_gpu=use_gpu):
     tf_ans = array_ops.diag(ops.convert_to_tensor(diag.astype(dtype)))
     out = tf_ans.eval()
     tf_ans_inv = array_ops.diag_part(expected_ans)
     inv_out = tf_ans_inv.eval()
   self.assertAllClose(out, expected_ans)
   self.assertAllClose(inv_out, diag)
   self.assertShapeEqual(expected_ans, tf_ans)
   self.assertShapeEqual(diag, tf_ans_inv)
Пример #14
0
 def _diagOp(self, diag, dtype, expected_ans, use_gpu):
   with self.cached_session(use_gpu=use_gpu):
     tf_ans = array_ops.diag(ops.convert_to_tensor(diag.astype(dtype)))
     out = self.evaluate(tf_ans)
     tf_ans_inv = array_ops.diag_part(expected_ans)
     inv_out = self.evaluate(tf_ans_inv)
   self.assertAllClose(out, expected_ans)
   self.assertAllClose(inv_out, diag)
   self.assertShapeEqual(expected_ans, tf_ans)
   self.assertShapeEqual(diag, tf_ans_inv)
Пример #15
0
 def diagOp(self, diag, dtype, expected_ans, use_gpu=False):
   with self.test_session(use_gpu=use_gpu):
     tf_ans = array_ops.diag(ops.convert_to_tensor(diag.astype(dtype)))
     out = tf_ans.eval()
     tf_ans_inv = array_ops.diag_part(expected_ans)
     inv_out = tf_ans_inv.eval()
   self.assertAllClose(out, expected_ans)
   self.assertAllClose(inv_out, diag)
   self.assertShapeEqual(expected_ans, tf_ans)
   self.assertShapeEqual(diag, tf_ans_inv)
Пример #16
0
 def _diagOp(self, diag, dtype, expected_ans, use_gpu):
   with self.cached_session(use_gpu=use_gpu):
     tf_ans = array_ops.diag(ops.convert_to_tensor(diag.astype(dtype)))
     out = self.evaluate(tf_ans)
     tf_ans_inv = array_ops.diag_part(expected_ans)
     inv_out = self.evaluate(tf_ans_inv)
   self.assertAllClose(out, expected_ans)
   self.assertAllClose(inv_out, diag)
   self.assertShapeEqual(expected_ans, tf_ans)
   self.assertShapeEqual(diag, tf_ans_inv)
Пример #17
0
  def __init__(self,
               data,
               num_classes,
               initial_means=None,
               params='wmc',
               covariance_type=FULL_COVARIANCE,
               random_seed=0):
    """Constructor.

    Args:
      data: a list of Tensors with data, each row is a new example.
      num_classes: number of clusters.
      initial_means: a Tensor with a matrix of means. If None, means are
        computed by sampling randomly.
      params: Controls which parameters are updated in the training
        process. Can contain any combination of "w" for weights, "m" for
        means, and "c" for covariances.
      covariance_type: one of "full", "diag".
      random_seed: Seed for PRNG used to initialize seeds.

    Raises:
      Exception if covariance type is unknown.
    """
    self._params = params
    self._random_seed = random_seed
    self._covariance_type = covariance_type
    if self._covariance_type not in [DIAG_COVARIANCE, FULL_COVARIANCE]:
      raise Exception(  # pylint: disable=g-doc-exception
          'programmer error: Invalid covariance type: %s' %
          self._covariance_type)
    # Create sharded variables for multiple shards. The following
    # lists are indexed by shard.
    # Probability per example in a class.
    num_shards = len(data)
    self._probs = [None] * num_shards
    # Prior probability.
    self._prior_probs = [None] * num_shards
    # Membership weights w_{ik} where "i" is the i-th example and "k"
    # is the k-th mixture.
    self._w = [None] * num_shards
    # Number of examples in a class.
    self._points_in_k = [None] * num_shards
    first_shard = data[0]
    self._dimensions = array_ops.shape(first_shard)[1]
    self._num_classes = num_classes
    # Small value to guarantee that covariances are invertible.
    self._min_var = array_ops.diag(
        array_ops.ones(array_ops.stack([self._dimensions]))) * 1e-3
    self._create_variables()
    self._initialize_variables(data, initial_means)
    # Operations of partial statistics for the computation of the means.
    self._w_mul_x = []
    # Operations of partial statistics for the computation of the covariances.
    self._w_mul_x2 = []
    self._define_graph(data)
Пример #18
0
    def __init__(self,
                 data,
                 num_classes,
                 initial_means=None,
                 params='wmc',
                 covariance_type=FULL_COVARIANCE,
                 random_seed=0):
        """Constructor.

    Args:
      data: a list of Tensors with data, each row is a new example.
      num_classes: number of clusters.
      initial_means: a Tensor with a matrix of means. If None, means are
        computed by sampling randomly.
      params: Controls which parameters are updated in the training
        process. Can contain any combination of "w" for weights, "m" for
        means, and "c" for covariances.
      covariance_type: one of "full", "diag".
      random_seed: Seed for PRNG used to initialize seeds.

    Raises:
      Exception if covariance type is unknown.
    """
        self._params = params
        self._random_seed = random_seed
        self._covariance_type = covariance_type
        if self._covariance_type not in [DIAG_COVARIANCE, FULL_COVARIANCE]:
            raise Exception(  # pylint: disable=g-doc-exception
                'programmer error: Invalid covariance type: %s' %
                self._covariance_type)
        # Create sharded variables for multiple shards. The following
        # lists are indexed by shard.
        # Probability per example in a class.
        num_shards = len(data)
        self._probs = [None] * num_shards
        # Prior probability.
        self._prior_probs = [None] * num_shards
        # Membership weights w_{ik} where "i" is the i-th example and "k"
        # is the k-th mixture.
        self._w = [None] * num_shards
        # Number of examples in a class.
        self._points_in_k = [None] * num_shards
        first_shard = data[0]
        self._dimensions = array_ops.shape(first_shard)[1]
        self._num_classes = num_classes
        # Small value to guarantee that covariances are invertible.
        self._min_var = array_ops.diag(
            array_ops.ones(array_ops.stack([self._dimensions]))) * 1e-3
        self._create_variables()
        self._initialize_variables(data, initial_means)
        # Operations of partial statistics for the computation of the means.
        self._w_mul_x = []
        # Operations of partial statistics for the computation of the covariances.
        self._w_mul_x2 = []
        self._define_graph(data)
Пример #19
0
 def testShuffle2d(self):
     with self.cached_session() as sess:
         with self.test_scope():
             x = array_ops.diag(math_ops.range(20))
             shuffle = random_ops.random_shuffle(x)
         result = sess.run(shuffle)
         expected = np.diag(range(20)).flatten()
         # Compare sets to avoid randomness behavior changes but make sure still
         # have all the values.
         self.assertAllEqual(len(result.flatten()), len(expected))
         self.assertAllEqual(set(result.flatten()), set(expected))
Пример #20
0
  def testComputePiTracenorm(self):
    with ops.Graph().as_default(), self.test_session() as sess:
      random_seed.set_random_seed(200)
      left_factor = array_ops.diag([1., 2., 0., 1.])
      right_factor = array_ops.ones([2., 2.])

      # pi is the sqrt of the left trace norm divided by the right trace norm
      pi = fb.compute_pi_tracenorm(left_factor, right_factor)

      pi_val = sess.run(pi)
      self.assertEqual(1., pi_val)
Пример #21
0
 def testShuffle2d(self):
   with self.cached_session() as sess:
     with self.test_scope():
       x = array_ops.diag(math_ops.range(20))
       shuffle = random_ops.random_shuffle(x)
     result = sess.run(shuffle)
     expected = np.diag(range(20)).flatten()
     # Compare sets to avoid randomness behavior changes but make sure still
     # have all the values.
     self.assertAllEqual(len(result.flatten()), len(expected))
     self.assertAllEqual(set(result.flatten()), set(expected))
Пример #22
0
def pairwise_distance(feature, squared=False):
    """Computes the pairwise distance matrix with numerical stability.

    output[i, j] = || feature[i, :] - feature[j, :] ||_2

    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.

    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    pairwise_distances_squared = (math_ops.add(
        math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
        math_ops.reduce_sum(math_ops.square(array_ops.transpose(feature)),
                            axis=[0],
                            keepdims=True),
    ) - 2.0 * math_ops.matmul(feature, array_ops.transpose(feature)))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared,
                                                  0.0)
    # Get the mask where the zero distances are at.
    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = math_ops.sqrt(pairwise_distances_squared +
                                           math_ops.to_float(error_mask) *
                                           1e-16)

    # Undo conditionally adding 1e-16.
    pairwise_distances = math_ops.multiply(
        pairwise_distances,
        math_ops.to_float(math_ops.logical_not(error_mask)))

    # num_data = array_ops.shape(feature)[0]

    # Explicitly set diagonals to zero.

    # import utool as ut
    # ut.embed()
    # mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
    #     array_ops.ones([num_data])
    # )
    mask_offdiagonals = array_ops.ones_like(
        pairwise_distances) - array_ops.diag(
            array_ops.ones(array_ops.shape(feature))[:, 0])

    pairwise_distances = math_ops.multiply(pairwise_distances,
                                           mask_offdiagonals)
    return pairwise_distances
    def testComputePiTracenorm(self):
        with ops.Graph().as_default(), self.test_session() as sess:
            random_seed.set_random_seed(200)
            left_factor = array_ops.diag([1., 2., 0., 1.])
            right_factor = array_ops.ones([2., 2.])

            # pi is the sqrt of the left trace norm divided by the right trace norm
            pi = fb._compute_pi_tracenorm(left_factor, right_factor)

            pi_val = sess.run(pi)
            self.assertEqual(1., pi_val)
Пример #24
0
def _test1(op, grad_e, grad_v):
    """Gradient for SelfAdjointEigV2 derived with Joan with no adjustment for subspace"""
    e = op.outputs[0]
    v = op.outputs[1]
    #dim = v.get_shape()
    with ops.control_dependencies([grad_e.op, grad_v.op]):
        if grad_v is not None:  
            E = array_ops.diag(e)
            v_proj = array_ops.slice(v, [0,0], [20,2])
            grad_grassman = grad_v - math_ops.batch_matmul(math_ops.batch_matmul(v_proj, array_ops.transpose(v_proj)), grad_v)
            grad_a = math_ops.batch_matmul(grad_grassman, math_ops.batch_matmul(E, array_ops.transpose(grad_v)))+math_ops.batch_matmul(grad_v, math_ops.batch_matmul(E, array_ops.transpose(grad_grassman)))
    return grad_a
Пример #25
0
def pairwise_distance_euclidean(a, b=None, squared=False):
    """Computes the pairwise distance matrix with numerical stability.

  output[i, j] = || a[i, :] - b[j, :] ||_2

  Args:
    a: 2-D Tensor of size [number of a, feature dimension].
    b: 2-D Tensor of size [number of b, feature dimension].
    squared: Boolean, whether or not to square the pairwise distances.

  Returns:
    pairwise_distances: 2-D Tensor of size [number of a, number of b].
  """
    b_was_none = False
    if b is None:
        b_was_none = True
        b = tf.identity(a)

    transpose_b = tf.transpose(b)

    pairwise_distances_squared = math_ops.add(
        math_ops.reduce_sum(math_ops.square(a), axis=[1], keepdims=True), # [len_a, 1]
        math_ops.reduce_sum(math_ops.square(transpose_b), axis=[0], keepdims=True)) -\
        2.0 * math_ops.matmul(a, transpose_b) # [len_a, len_b]

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared,
                                                  0.0)
    # Get the mask where the zero distances are at.
    error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        pairwise_distances = math_ops.sqrt(pairwise_distances_squared +
                                           math_ops.to_float(error_mask) *
                                           1e-16)

    # Undo conditionally adding 1e-16.
    pairwise_distances = math_ops.multiply(
        pairwise_distances,
        math_ops.to_float(math_ops.logical_not(error_mask)))

    # If b was None, Explicitly set diagonals to zero.
    if b_was_none:
        num_data = array_ops.shape(a)[0]
        mask_offdiagonals = array_ops.ones_like(
            pairwise_distances) - array_ops.diag(array_ops.ones([num_data]))
        pairwise_distances = math_ops.multiply(pairwise_distances,
                                               mask_offdiagonals)

    return pairwise_distances
Пример #26
0
 def testBatchGradientUnknownSize(self):
   with self.test_session():
     batch_size = constant_op.constant(3)
     matrix_size = constant_op.constant(4)
     batch_identity = array_ops.tile(
         array_ops.expand_dims(
             array_ops.diag(array_ops.ones([matrix_size])), 0),
         [batch_size, 1, 1])
     determinants = linalg_ops.matrix_determinant(batch_identity)
     reduced = math_ops.reduce_sum(determinants)
     sum_grad = gradients_impl.gradients(reduced, batch_identity)[0]
     self.assertAllClose(batch_identity.eval(), sum_grad.eval())
Пример #27
0
 def testBatchGradientUnknownSize(self):
   with self.cached_session():
     batch_size = constant_op.constant(3)
     matrix_size = constant_op.constant(4)
     batch_identity = array_ops.tile(
         array_ops.expand_dims(
             array_ops.diag(array_ops.ones([matrix_size])), 0),
         [batch_size, 1, 1])
     determinants = linalg_ops.matrix_determinant(batch_identity)
     reduced = math_ops.reduce_sum(determinants)
     sum_grad = gradients_impl.gradients(reduced, batch_identity)[0]
     self.assertAllClose(batch_identity.eval(), self.evaluate(sum_grad))
Пример #28
0
 def testDiagGrad(self):
   np.random.seed(0)
   shapes = ((3,), (3, 3), (3, 3, 3))
   dtypes = (dtypes_lib.float32, dtypes_lib.float64)
   with self.test_session(use_gpu=False):
     errors = []
     for shape in shapes:
       for dtype in dtypes:
         x1 = constant_op.constant(np.random.rand(*shape), dtype=dtype)
         y = array_ops.diag(x1)
         error = gradient_checker.compute_gradient_error(
             x1, x1.get_shape().as_list(), y, y.get_shape().as_list())
         tf_logging.info("error = %f", error)
         self.assertLess(error, 1e-4)
Пример #29
0
 def testDiagGrad(self):
   np.random.seed(0)
   shapes = ((3,), (3, 3), (3, 3, 3))
   dtypes = (dtypes_lib.float32, dtypes_lib.float64)
   with self.test_session(use_gpu=False):
     errors = []
     for shape in shapes:
       for dtype in dtypes:
         x1 = constant_op.constant(np.random.rand(*shape), dtype=dtype)
         y = array_ops.diag(x1)
         error = gradient_checker.compute_gradient_error(
             x1, x1.get_shape().as_list(), y, y.get_shape().as_list())
         tf_logging.info("error = %f", error)
         self.assertLess(error, 1e-4)
Пример #30
0
def pairwise_distance(feature, squared=False):
  """Computes the pairwise distance matrix with numerical stability.

  output[i, j] = || feature[i, :] - feature[j, :] ||_2

  Args:
    feature: 2-D Tensor of size [number of data, feature dimension].
    squared: Boolean, whether or not to square the pairwise distances.

  Returns:
    pairwise_distances: 2-D Tensor of size [number of data, number of data].
  """
  pairwise_distances_squared = math_ops.add(
      math_ops.reduce_sum(
          math_ops.square(feature),
          axis=[1],
          keepdims=True),
      math_ops.reduce_sum(
          math_ops.square(
              array_ops.transpose(feature)),
          axis=[0],
          keepdims=True)) - 2.0 * math_ops.matmul(
              feature, array_ops.transpose(feature))

  # Deal with numerical inaccuracies. Set small negatives to zero.
  pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared, 0.0)
  # Get the mask where the zero distances are at.
  error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)

  # Optionally take the sqrt.
  if squared:
    pairwise_distances = pairwise_distances_squared
  else:
    pairwise_distances = math_ops.sqrt(
        pairwise_distances_squared + math_ops.to_float(error_mask) * 1e-16)

  # Undo conditionally adding 1e-16.
  pairwise_distances = math_ops.multiply(
      pairwise_distances, math_ops.to_float(math_ops.logical_not(error_mask)))

  num_data = array_ops.shape(feature)[0]
  # Explicitly set diagonals to zero.
  mask_offdiagonals = array_ops.ones_like(pairwise_distances) - array_ops.diag(
      array_ops.ones([num_data]))
  pairwise_distances = math_ops.multiply(pairwise_distances, mask_offdiagonals)
  return pairwise_distances
Пример #31
0
def pairwise_distance(feature, squared=False, normalized=True):
    """from the source code of `tf.contrib.losses.metric_learning.triplet_semihard_loss`
    Computes the pairwise distance matrix with numerical stability.
    output[i, j] = || feature[i, :] - feature[j, :] ||_2
    Args:
      feature: 2-D Tensor of size [number of data, feature dimension].
      squared: Boolean, whether or not to square the pairwise distances.
      normalized: Boolean, whether or not input feature has be l2 normalized.
    Returns:
      pairwise_distances: 2-D Tensor of size [number of data, number of data].
    """
    if normalized:
        pairwise_distances_squared = 2.0 * (
            1.0 - math_ops.matmul(feature, array_ops.transpose(feature)))
    else:
        pairwise_distances_squared = math_ops.add(
            math_ops.reduce_sum(math_ops.square(feature), axis=[1], keepdims=True),
            math_ops.reduce_sum(math_ops.square(array_ops.transpose(feature)), axis=[0], keepdims=True))\
            - 2.0 * math_ops.matmul(feature, array_ops.transpose(feature))

    # Deal with numerical inaccuracies. Set small negatives to zero.
    pairwise_distances_squared = math_ops.maximum(pairwise_distances_squared,
                                                  0.0)

    # Optionally take the sqrt.
    if squared:
        pairwise_distances = pairwise_distances_squared
    else:
        # Get the mask where the zero distances are at.
        error_mask = math_ops.less_equal(pairwise_distances_squared, 0.0)
        pairwise_distances = math_ops.sqrt(
            pairwise_distances_squared +
            math_ops.cast(error_mask, dtypes.float32) * 1e-16)
        # Undo conditionally adding 1e-16.
        pairwise_distances = math_ops.multiply(
            pairwise_distances,
            math_ops.cast(math_ops.logical_not(error_mask), dtypes.float32))

    num_data = array_ops.shape(feature)[0]
    # Explicitly set diagonals to zero.
    mask_offdiagonals = array_ops.ones_like(
        pairwise_distances) - array_ops.diag(array_ops.ones([num_data]))
    pairwise_distances = math_ops.multiply(pairwise_distances,
                                           mask_offdiagonals)
    return pairwise_distances
def _matrix_square_root(mat, eps=1e-10):
  """Compute symmetric square root of matrix.

  Equivalent to matrix square root when matrix is invertible; note that this is
  different from an elementwise square root. We want to compute M' where M' =
  sqrt(mat) such that M' * M' = mat.

  Args:
    mat: Matrix to take the square root of.
    eps: Small epsilon such that any element less than eps will not be square
      rooted to guard against numerical instability.

  Returns:
    Matrix square root of mat.
  """
  s, u, v = linalg_ops.svd(mat)
  # sqrt is unstable around 0, just use 0 in such case
  si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))
  return math_ops.matmul(
      math_ops.matmul(u, array_ops.diag(si)), v, transpose_b=True)
Пример #33
0
def _matrix_square_root(mat, eps=1e-10):
    """Compute symmetric square root of matrix.

  Equivalent to matrix square root when matrix is invertible; note that this is
  different from an elementwise square root. We want to compute M' where M' =
  sqrt(mat) such that M' * M' = mat.

  Args:
    mat: Matrix to take the square root of.
    eps: Small epsilon such that any element less than eps will not be square
      rooted to guard against numerical instability.

  Returns:
    Matrix square root of mat.
  """
    s, u, v = linalg_ops.svd(mat)
    # sqrt is unstable around 0, just use 0 in such case
    si = array_ops.where(math_ops.less(s, eps), s, math_ops.sqrt(s))
    return math_ops.matmul(math_ops.matmul(u, array_ops.diag(si)),
                           v,
                           transpose_b=True)
Пример #34
0
def log_noninformative_covariance_prior(covariance):
    """Compute a relatively uninformative prior for noise parameters.

  Helpful for avoiding noise over-estimation, where noise otherwise decreases
  very slowly during optimization.

  See:
    Villegas, C. On the A Priori Distribution of the Covariance Matrix.
    Ann. Math. Statist. 40 (1969), no. 3, 1098--1099.

  Args:
    covariance: A covariance matrix.
  Returns:
    For a [p x p] matrix:
      log(det(covariance)^(-(p + 1) / 2))
  """
    # Avoid zero/negative determinants due to numerical errors
    covariance += array_ops.diag(1e-8 * array_ops.ones(
        shape=[array_ops.shape(covariance)[0]], dtype=covariance.dtype))
    power = -(math_ops.cast(
        array_ops.shape(covariance)[0] + 1, covariance.dtype) / 2.)
    return power * math_ops.log(linalg_ops.matrix_determinant(covariance))
Пример #35
0
    def _define_distance_to_clusters(self, data):
        """Defines the Mahalanobis distance to the assigned Gaussian."""
        # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
        # mean) from log probability function.
        self._all_scores = []
        for shard in data:
            all_scores = []
            shard = array_ops.expand_dims(shard, 0)
            for c in xrange(self._num_classes):
                if self._covariance_type == FULL_COVARIANCE:
                    cov = self._covs[c, :, :]
                elif self._covariance_type == DIAG_COVARIANCE:
                    cov = array_ops.diag(self._covs[c, :])
                inverse = linalg_ops.matrix_inverse(cov + self._min_var)
                inv_cov = array_ops.tile(
                    array_ops.expand_dims(inverse, 0),
                    array_ops.stack([self._num_examples, 1, 1]))
                diff = array_ops.transpose(shard - self._means[c, :, :],
                                           perm=[1, 0, 2])
                m_left = math_ops.matmul(diff, inv_cov)
                all_scores.append(
                    math_ops.sqrt(
                        math_ops.matmul(
                            m_left, array_ops.transpose(diff, perm=[0, 2,
                                                                    1]))))
            self._all_scores.append(
                array_ops.reshape(
                    array_ops.concat(all_scores, 1),
                    array_ops.stack([self._num_examples, self._num_classes])))

        # Distance to the associated class.
        self._all_scores = array_ops.concat(self._all_scores, 0)
        assignments = array_ops.concat(self.assignments(), 0)
        rows = math_ops.to_int64(math_ops.range(0, self._num_examples))
        indices = array_ops.concat([
            array_ops.expand_dims(rows, 1),
            array_ops.expand_dims(assignments, 1)
        ], 1)
        self._scores = array_ops.gather_nd(self._all_scores, indices)
Пример #36
0
def log_noninformative_covariance_prior(covariance):
  """Compute a relatively uninformative prior for noise parameters.

  Helpful for avoiding noise over-estimation, where noise otherwise decreases
  very slowly during optimization.

  See:
    Villegas, C. On the A Priori Distribution of the Covariance Matrix.
    Ann. Math. Statist. 40 (1969), no. 3, 1098--1099.

  Args:
    covariance: A covariance matrix.
  Returns:
    For a [p x p] matrix:
      log(det(covariance)^(-(p + 1) / 2))
  """
  # Avoid zero/negative determinants due to numerical errors
  covariance += array_ops.diag(1e-8 * array_ops.ones(
      shape=[array_ops.shape(covariance)[0]], dtype=covariance.dtype))
  power = -(math_ops.cast(array_ops.shape(covariance)[0] + 1,
                          covariance.dtype) / 2.)
  return power * math_ops.log(linalg_ops.matrix_determinant(covariance))
Пример #37
0
  def _define_distance_to_clusters(self, data):
    """Defines the Mahalanobis distance to the assigned Gaussian."""
    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
    # mean) from log probability function.
    self._all_scores = []
    for shard in data:
      all_scores = []
      shard = array_ops.expand_dims(shard, 0)
      for c in xrange(self._num_classes):
        if self._covariance_type == FULL_COVARIANCE:
          cov = self._covs[c, :, :]
        elif self._covariance_type == DIAG_COVARIANCE:
          cov = array_ops.diag(self._covs[c, :])
        inverse = linalg_ops.matrix_inverse(cov + self._min_var)
        inv_cov = array_ops.tile(
            array_ops.expand_dims(inverse, 0),
            array_ops.stack([self._num_examples, 1, 1]))
        diff = array_ops.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
        m_left = math_ops.matmul(diff, inv_cov)
        all_scores.append(
            math_ops.sqrt(
                math_ops.matmul(
                    m_left, array_ops.transpose(
                        diff, perm=[0, 2, 1]))))
      self._all_scores.append(
          array_ops.reshape(
              array_ops.concat(all_scores, 1),
              array_ops.stack([self._num_examples, self._num_classes])))

    # Distance to the associated class.
    self._all_scores = array_ops.concat(self._all_scores, 0)
    assignments = array_ops.concat(self.assignments(), 0)
    rows = math_ops.to_int64(math_ops.range(0, self._num_examples))
    indices = array_ops.concat(
        [array_ops.expand_dims(rows, 1), array_ops.expand_dims(assignments, 1)],
        1)
    self._scores = array_ops.gather_nd(self._all_scores, indices)
Пример #38
0
 def testInvalidRank(self):
   with self.assertRaisesRegexp(ValueError, "must be at least rank 1"):
     array_ops.diag(0.0)
Пример #39
0
  def __init__(self,
               input_rows,
               input_cols,
               n_components,
               unobserved_weight=0.1,
               regularization=None,
               row_init="random",
               col_init="random",
               num_row_shards=1,
               num_col_shards=1,
               row_weights=1,
               col_weights=1,
               use_factors_weights_cache=True):
    """Creates model for WALS matrix factorization.

    Args:
      input_rows: total number of rows for input matrix.
      input_cols: total number of cols for input matrix.
      n_components: number of dimensions to use for the factors.
      unobserved_weight: weight given to unobserved entries of matrix.
      regularization: weight of L2 regularization term. If None, no
        regularization is done.
      row_init: initializer for row factor. Can be a tensor or numpy constant.
        If set to "random", the value is initialized randomly.
      col_init: initializer for column factor. See row_init for details.
      num_row_shards: number of shards to use for row factors.
      num_col_shards: number of shards to use for column factors.
      row_weights: Must be in one of the following three formats: None, a list
        of lists of non-negative real numbers (or equivalent iterables) or a
        single non-negative real number.
        - When set to None, w_ij = unobserved_weight, which simplifies to ALS.
        Note that col_weights must also be set to "None" in this case.
        - If it is a list of lists of non-negative real numbers, it needs to be
        in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of
        inner lists matching the number of row factor shards and the elements in
        each inner list are the weights for the rows of the corresponding row
        factor shard. In this case,  w_ij = unonbserved_weight +
                                            row_weights[i] * col_weights[j].
        - If this is a single non-negative real number, this value is used for
        all row weights and w_ij = unobserved_weight + row_weights *
                                   col_weights[j].
        Note that it is allowed to have row_weights as a list while col_weights
        a single number or vice versa.
      col_weights: See row_weights.
      use_factors_weights_cache: When True, the factors and weights will be
        cached on the workers before the updates start. Defaults to True.
    """
    self._input_rows = input_rows
    self._input_cols = input_cols
    self._num_row_shards = num_row_shards
    self._num_col_shards = num_col_shards
    self._n_components = n_components
    self._unobserved_weight = unobserved_weight
    self._regularization = (array_ops.diag(
        constant_op.constant(
            regularization, shape=[self._n_components], dtype=dtypes.float32))
                            if regularization is not None else None)
    assert (row_weights is None) == (col_weights is None)
    self._row_weights = WALSModel._create_weights(row_weights, self._input_rows,
                                                  self._num_row_shards,
                                                  "row_weights")
    self._col_weights = WALSModel._create_weights(col_weights, self._input_cols,
                                                  self._num_col_shards,
                                                  "col_weights")
    self._use_factors_weights_cache = use_factors_weights_cache
    self._row_factors = self._create_factors(self._input_rows,
                                             self._n_components,
                                             self._num_row_shards, row_init,
                                             "row_factors")
    self._col_factors = self._create_factors(self._input_cols,
                                             self._n_components,
                                             self._num_col_shards, col_init,
                                             "col_factors")
    self._row_gramian = self._create_gramian(self._n_components, "row_gramian")
    self._col_gramian = self._create_gramian(self._n_components, "col_gramian")
    self._row_update_prep_gramian = self._prepare_gramian(self._col_factors,
                                                          self._col_gramian)
    self._col_update_prep_gramian = self._prepare_gramian(self._row_factors,
                                                          self._row_gramian)
    self._create_transient_vars()
Пример #40
0
def batch_matrix_pow(matrices, powers):
  """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A).

  Uses exponentiation by squaring, with O(log(p)) matrix multiplications to
  compute A^p.

  Args:
    matrices: [batch size x N x N]
    powers: Which integer power to raise each matrix to [batch size]
  Returns:
    The matrices raised to their respective powers, same dimensions as the
    "matrices" argument.
  """

  def terminate_when_all_zero(current_argument, residual_powers, accumulator):
    del current_argument, accumulator  # not used for condition
    do_exit = math_ops.reduce_any(
        math_ops.greater(residual_powers, array_ops.ones_like(residual_powers)))
    return do_exit

  def do_iteration(current_argument, residual_powers, accumulator):
    """Compute one step of iterative exponentiation by squaring.

    The recursive form is:
      power(A, p) = { power(matmul(A, A), p / 2) for even p
                    { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p
      power(A, 0) = I

    The power(A, 0) = I case is handeled by starting with accumulator set to the
    identity matrix; matrices with zero residual powers are passed through
    unchanged.

    Args:
      current_argument: On this step, what is the first argument (A^2..^2) to
          the (unrolled) recursive function? [batch size x N x N]
      residual_powers: On this step, what is the second argument (residual p)?
          [batch_size]
      accumulator: Accumulates the exterior multiplications from the odd
          powers (initially the identity matrix). [batch_size x N x N]
    Returns:
      Updated versions of each argument for one step of the unrolled
      computation. Does not change parts of the batch which have a residual
      power of zero.
    """
    is_even = math_ops.equal(residual_powers % 2,
                             array_ops.zeros(
                                 array_ops.shape(residual_powers),
                                 dtype=dtypes.int32))
    new_accumulator = array_ops.where(is_even, accumulator,
                                      math_ops.matmul(accumulator,
                                                      current_argument))
    new_argument = math_ops.matmul(current_argument, current_argument)
    do_update = math_ops.greater(residual_powers, 1)
    new_residual_powers = residual_powers - residual_powers % 2
    new_residual_powers //= 2
    # Stop updating if we've reached our base case; some batch elements may
    # finish sooner than others
    accumulator = array_ops.where(do_update, new_accumulator, accumulator)
    current_argument = array_ops.where(do_update, new_argument,
                                       current_argument)
    residual_powers = array_ops.where(do_update, new_residual_powers,
                                      residual_powers)
    return (current_argument, residual_powers, accumulator)

  matrices = ops.convert_to_tensor(matrices)
  powers = math_ops.cast(powers, dtype=dtypes.int32)
  ident = array_ops.expand_dims(
      array_ops.diag(
          array_ops.ones([array_ops.shape(matrices)[1]], dtype=matrices.dtype)),
      0)
  ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1])
  (final_argument,
   final_residual_power, final_accumulator) = control_flow_ops.while_loop(
       terminate_when_all_zero, do_iteration, [matrices, powers, ident_tiled])
  return array_ops.where(
      math_ops.equal(final_residual_power,
                     array_ops.zeros_like(
                         final_residual_power, dtype=dtypes.int32)),
      ident_tiled, math_ops.matmul(final_argument, final_accumulator))
Пример #41
0
def covariance_initializer(shape, dtype, partition_info=None):  # pylint: disable=unused-argument
  if INIT_COVARIANCES_AT_ZERO:
    return array_ops.diag(array_ops.zeros(shape[0], dtype))
  return array_ops.diag(array_ops.ones(shape[0], dtype))
Пример #42
0
def lifted_struct_loss(labels, embeddings, margin=1.0):
  """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pairwise_distances = pairwise_distance(embeddings)

  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  diff = margin - pairwise_distances
  mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  # Safe maximum: Temporarily shift negative distances
  #   above zero before taking max.
  #     this is to take the max only among negatives.
  row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
  row_negative_maximums = math_ops.reduce_max(
      math_ops.multiply(diff - row_minimums, mask), 1,
      keepdims=True) + row_minimums

  # Compute the loss.
  # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
  #   where m_i is the max of alpha - negative D_i's.
  # This matches the Caffe loss layer implementation at:
  #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

  max_elements = math_ops.maximum(
      row_negative_maximums, array_ops.transpose(row_negative_maximums))
  diff_tiled = array_ops.tile(diff, [batch_size, 1])
  mask_tiled = array_ops.tile(mask, [batch_size, 1])
  max_elements_vect = array_ops.reshape(
      array_ops.transpose(max_elements), [-1, 1])

  loss_exp_left = array_ops.reshape(
      math_ops.reduce_sum(
          math_ops.multiply(
              math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
          1,
          keepdims=True), [batch_size, batch_size])

  loss_mat = max_elements + math_ops.log(
      loss_exp_left + array_ops.transpose(loss_exp_left))
  # Add the positive distance.
  loss_mat += pairwise_distances

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
  num_positives = math_ops.reduce_sum(mask_positives) / 2.0

  lifted_loss = math_ops.truediv(
      0.25 * math_ops.reduce_sum(
          math_ops.square(
              math_ops.maximum(
                  math_ops.multiply(loss_mat, mask_positives), 0.0))),
      num_positives,
      name='liftedstruct_loss')
  return lifted_loss
Пример #43
0
 def test_rotate_static_shape(self):
   image = array_ops.diag([1., 2., 3.])
   result = image_ops.rotate(
       image, random_ops.random_uniform((), -1, 1), interpolation="BILINEAR")
   self.assertEqual(image.get_shape(), result.get_shape())
Пример #44
0
 def full_fisher_block(self):
   return array_ops.diag(array_ops.reshape(self._factor.get_cov(), (-1,)))
Пример #45
0
def _DiagPartGrad(_, grad):
  return array_ops.diag(grad)
Пример #46
0
 def test_rotate_static_shape(self):
     image = array_ops.diag([1., 2., 3.])
     result = image_ops.rotate(image,
                               random_ops.random_uniform((), -1, 1),
                               interpolation="BILINEAR")
     self.assertEqual(image.get_shape(), result.get_shape())
Пример #47
0
    def _finish(self, state):
        var_dtype = self._variables[0].dtype.base_dtype
        # Update global step.
        global_step = self._get_global_step(state)
        update_global_step = state_ops.assign_add(global_step, 1.)

        # Update the first moment estimate.
        beta1 = state.get_hyper("beta1", dtype=var_dtype)
        moment1 = self._get_moment1(state)
        flat_grad = self._get_flat_grad(state)
        # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
        update_moment1 = moment1.assign(beta1 * moment1 +
                                        (1. - beta1) * flat_grad)

        # Update the gradient buffer.
        window = state.get_hyper("window")
        grad_buffer = self._get_grad_buffer(state)
        next_grad_index = math_ops.floormod(
            math_ops.to_int32(update_global_step - 1.), window)
        # grad_buffer[(t-1) % window] := moment1_t
        update_grad_buffer = state_ops.scatter_update(grad_buffer,
                                                      next_grad_index,
                                                      update_moment1)

        # Compute the update step.
        eps = state.get_hyper("eps", dtype=var_dtype)
        svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
        sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
        lr = state.get_hyper("lr", dtype=var_dtype)
        denom = math_ops.sqrt(
            math_ops.minimum(
                ops.convert_to_tensor(update_global_step),
                ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
        moment1_2d = array_ops.expand_dims(update_moment1, -1)

        # m = grad_buffer^T / sqrt(min(t, window))
        # m has shape [model dimension, window], where model dimension is the sum
        # of the dimensions of the flattened variables.
        m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

        # sigma, u, _ = SVD(m^Tm + I * svd_eps)
        mm = math_ops.matmul(m, m, transpose_a=True)
        damping = math_ops.cast(linalg_ops.eye(window),
                                dtype=var_dtype) * svd_eps
        sigma, u, _ = linalg_ops.svd(mm + damping)
        sigma_sqrt = math_ops.sqrt(sigma)
        sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

        # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
        # We add sigma_eps to alleviate numerical instability.
        # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
        sigma_sqrt_inv = math_ops.divide(
            math_ops.cast(1.0, dtype=var_dtype),
            math_ops.pow(sigma_sqrt + sigma_eps, 3))

        # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
        # inversion of a model dimension by model dimension matrix is needed. To
        # speed up this computation we calculate the following instead:
        # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
        new_step = array_ops.expand_dims(
            array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
        head = math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(sigma_sqrt_inv),
                    math_ops.matmul(u,
                                    math_ops.matmul(m,
                                                    moment1_2d,
                                                    transpose_a=True),
                                    transpose_a=True))))

        # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
        # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
        # Woodbury's identity.
        # For full derivation please see paper at
        # https://arxiv.org/pdf/1806.02958.pdf
        tail = moment1_2d - math_ops.matmul(
            m,
            math_ops.matmul(
                u,
                math_ops.matmul(
                    array_ops.diag(
                        math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                        sigma)),
                    math_ops.matmul(u,
                                    math_ops.matmul(
                                        m, moment1_2d, transpose_a=True),
                                    transpose_a=True))))
        scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

        update_new_step = control_flow_ops.cond(
            sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
            lambda: math_ops.add(new_step, head))

        # Update each variable.
        update_step = []
        for var in self._variables:
            dim = self.shape_dict[var.name]
            start_index = self.index_dict[var.name]
            end_index = start_index + dim
            var_update_correct_shape = array_ops.reshape(
                update_new_step[start_index:end_index], var.get_shape())
            var_updated = state_ops.assign_sub(var,
                                               lr * var_update_correct_shape)
            update_step.append(var_updated)

        return control_flow_ops.group(update_step)
Пример #48
0
def triplet_semihard_loss(labels, embeddings, margin=1.0):
  """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """
  # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
  lshape = array_ops.shape(labels)
  assert lshape.shape == 1
  labels = array_ops.reshape(labels, [lshape[0], 1])

  # Build pairwise squared distance matrix.
  pdist_matrix = pairwise_distance(embeddings, squared=True)
  # Build pairwise binary adjacency matrix.
  adjacency = math_ops.equal(labels, array_ops.transpose(labels))
  # Invert so we can select negatives only.
  adjacency_not = math_ops.logical_not(adjacency)

  batch_size = array_ops.size(labels)

  # Compute the mask.
  pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
  mask = math_ops.logical_and(
      array_ops.tile(adjacency_not, [batch_size, 1]),
      math_ops.greater(
          pdist_matrix_tile, array_ops.reshape(
              array_ops.transpose(pdist_matrix), [-1, 1])))
  mask_final = array_ops.reshape(
      math_ops.greater(
          math_ops.reduce_sum(
              math_ops.cast(mask, dtype=dtypes.float32), 1, keepdims=True),
          0.0), [batch_size, batch_size])
  mask_final = array_ops.transpose(mask_final)

  adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
  mask = math_ops.cast(mask, dtype=dtypes.float32)

  # negatives_outside: smallest D_an where D_an > D_ap.
  negatives_outside = array_ops.reshape(
      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
  negatives_outside = array_ops.transpose(negatives_outside)

  # negatives_inside: largest D_an.
  negatives_inside = array_ops.tile(
      masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
  semi_hard_negatives = array_ops.where(
      mask_final, negatives_outside, negatives_inside)

  loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

  mask_positives = math_ops.cast(
      adjacency, dtype=dtypes.float32) - array_ops.diag(
          array_ops.ones([batch_size]))

  # In lifted-struct, the authors multiply 0.5 for upper triangular
  #   in semihard, they take all positive pairs except the diagonal.
  num_positives = math_ops.reduce_sum(mask_positives)

  triplet_loss = math_ops.truediv(
      math_ops.reduce_sum(
          math_ops.maximum(
              math_ops.multiply(loss_mat, mask_positives), 0.0)),
      num_positives,
      name='triplet_semihard_loss')

  return triplet_loss
Пример #49
0
 def testInvalidRank(self):
   with self.assertRaisesRegexp(ValueError, "must be at least rank 1"):
     array_ops.diag(0.0)
Пример #50
0
def batch_matrix_pow(matrices, powers):
    """Compute powers of matrices, e.g. A^3 = matmul(matmul(A, A), A).

  Uses exponentiation by squaring, with O(log(p)) matrix multiplications to
  compute A^p.

  Args:
    matrices: [batch size x N x N]
    powers: Which integer power to raise each matrix to [batch size]
  Returns:
    The matrices raised to their respective powers, same dimensions as the
    "matrices" argument.
  """
    def terminate_when_all_zero(current_argument, residual_powers,
                                accumulator):
        del current_argument, accumulator  # not used for condition
        do_exit = math_ops.reduce_any(
            math_ops.greater(residual_powers,
                             array_ops.ones_like(residual_powers)))
        return do_exit

    def do_iteration(current_argument, residual_powers, accumulator):
        """Compute one step of iterative exponentiation by squaring.

    The recursive form is:
      power(A, p) = { power(matmul(A, A), p / 2) for even p
                    { matmul(A, power(matmul(A, A), (p - 1) / 2)) for odd p
      power(A, 0) = I

    The power(A, 0) = I case is handeled by starting with accumulator set to the
    identity matrix; matrices with zero residual powers are passed through
    unchanged.

    Args:
      current_argument: On this step, what is the first argument (A^2..^2) to
          the (unrolled) recursive function? [batch size x N x N]
      residual_powers: On this step, what is the second argument (residual p)?
          [batch_size]
      accumulator: Accumulates the exterior multiplications from the odd
          powers (initially the identity matrix). [batch_size x N x N]
    Returns:
      Updated versions of each argument for one step of the unrolled
      computation. Does not change parts of the batch which have a residual
      power of zero.
    """
        is_even = math_ops.equal(
            residual_powers % 2,
            array_ops.zeros(array_ops.shape(residual_powers),
                            dtype=dtypes.int32))
        new_accumulator = array_ops.where(
            is_even, accumulator, math_ops.matmul(accumulator,
                                                  current_argument))
        new_argument = math_ops.matmul(current_argument, current_argument)
        do_update = math_ops.greater(residual_powers, 1)
        new_residual_powers = residual_powers - residual_powers % 2
        new_residual_powers //= 2
        # Stop updating if we've reached our base case; some batch elements may
        # finish sooner than others
        accumulator = array_ops.where(do_update, new_accumulator, accumulator)
        current_argument = array_ops.where(do_update, new_argument,
                                           current_argument)
        residual_powers = array_ops.where(do_update, new_residual_powers,
                                          residual_powers)
        return (current_argument, residual_powers, accumulator)

    matrices = ops.convert_to_tensor(matrices)
    powers = math_ops.cast(powers, dtype=dtypes.int32)
    ident = array_ops.expand_dims(
        array_ops.diag(
            array_ops.ones([array_ops.shape(matrices)[1]],
                           dtype=matrices.dtype)), 0)
    ident_tiled = array_ops.tile(ident, [array_ops.shape(matrices)[0], 1, 1])
    (final_argument,
     final_residual_power, final_accumulator) = control_flow_ops.while_loop(
         terminate_when_all_zero, do_iteration,
         [matrices, powers, ident_tiled])
    return array_ops.where(
        math_ops.equal(
            final_residual_power,
            array_ops.zeros_like(final_residual_power, dtype=dtypes.int32)),
        ident_tiled, math_ops.matmul(final_argument, final_accumulator))
def lifted_struct_loss(labels, embeddings, margin=1.0):
    """Computes the lifted structured loss.

  The loss encourages the positive distances (between a pair of embeddings
  with the same labels) to be smaller than any negative distances (between a
  pair of embeddings with different labels) in the mini-batch in a way
  that is differentiable with respect to the embedding vectors.
  See: https://arxiv.org/abs/1511.06452.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should not
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    lifted_loss: tf.float32 scalar.
  """
    # Reshape [batch_size] label tensor to a [batch_size, 1] label tensor.
    lshape = array_ops.shape(labels)
    assert lshape.shape == 1
    labels = array_ops.reshape(labels, [lshape[0], 1])

    # Build pairwise squared distance matrix.
    pairwise_distances = pairwise_distance(embeddings)

    # Build pairwise binary adjacency matrix.
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    diff = margin - pairwise_distances
    mask = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    # Safe maximum: Temporarily shift negative distances
    #   above zero before taking max.
    #     this is to take the max only among negatives.
    row_minimums = math_ops.reduce_min(diff, 1, keepdims=True)
    row_negative_maximums = math_ops.reduce_max(
        math_ops.multiply(diff - row_minimums,
                          mask), 1, keepdims=True) + row_minimums

    # Compute the loss.
    # Keep track of matrix of maximums where M_ij = max(m_i, m_j)
    #   where m_i is the max of alpha - negative D_i's.
    # This matches the Caffe loss layer implementation at:
    #   https://github.com/rksltnl/Caffe-Deep-Metric-Learning-CVPR16/blob/0efd7544a9846f58df923c8b992198ba5c355454/src/caffe/layers/lifted_struct_similarity_softmax_layer.cpp  # pylint: disable=line-too-long

    max_elements = math_ops.maximum(row_negative_maximums,
                                    array_ops.transpose(row_negative_maximums))
    diff_tiled = array_ops.tile(diff, [batch_size, 1])
    mask_tiled = array_ops.tile(mask, [batch_size, 1])
    max_elements_vect = array_ops.reshape(array_ops.transpose(max_elements),
                                          [-1, 1])

    loss_exp_left = array_ops.reshape(
        math_ops.reduce_sum(math_ops.multiply(
            math_ops.exp(diff_tiled - max_elements_vect), mask_tiled),
                            1,
                            keepdims=True), [batch_size, batch_size])

    loss_mat = max_elements + math_ops.log(loss_exp_left +
                                           array_ops.transpose(loss_exp_left))
    # Add the positive distance.
    loss_mat += pairwise_distances

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # *0.5 for upper triangular, and another *0.5 for 1/2 factor for loss^2.
    num_positives = math_ops.reduce_sum(mask_positives) / 2.0

    lifted_loss = math_ops.truediv(0.25 * math_ops.reduce_sum(
        math_ops.square(
            math_ops.maximum(math_ops.multiply(loss_mat, mask_positives),
                             0.0))),
                                   num_positives,
                                   name='liftedstruct_loss')
    return lifted_loss
Пример #52
0
 def loop_fn(i):
     inp = array_ops.gather(x, i)
     return array_ops.diag(inp)
Пример #53
0
  def _finish(self, state):
    var_dtype = self._variables[0].dtype.base_dtype
    # Update global step.
    global_step = self._get_global_step(state)
    update_global_step = state_ops.assign_add(global_step, 1.)

    # Update the first moment estimate.
    beta1 = state.get_hyper("beta1", dtype=var_dtype)
    moment1 = self._get_moment1(state)
    flat_grad = self._get_flat_grad(state)
    # moment1_t := beta1 * moment1_{t-1} + (1 - beta1) * flat_grad_t
    update_moment1 = moment1.assign(beta1 * moment1 + (1. - beta1) * flat_grad)

    # Update the gradient buffer.
    window = state.get_hyper("window")
    grad_buffer = self._get_grad_buffer(state)
    next_grad_index = math_ops.floormod(
        math_ops.to_int32(update_global_step - 1.), window)
    # grad_buffer[(t-1) % window] := moment1_t
    update_grad_buffer = state_ops.scatter_update(grad_buffer, next_grad_index,
                                                  update_moment1)

    # Compute the update step.
    eps = state.get_hyper("eps", dtype=var_dtype)
    svd_eps = state.get_hyper("svd_eps", dtype=var_dtype)
    sigma_eps = state.get_hyper("sigma_eps", dtype=var_dtype)
    lr = state.get_hyper("lr", dtype=var_dtype)
    denom = math_ops.sqrt(
        math_ops.minimum(
            ops.convert_to_tensor(update_global_step),
            ops.convert_to_tensor(math_ops.cast(window, dtype=var_dtype))))
    moment1_2d = array_ops.expand_dims(update_moment1, -1)

    # m = grad_buffer^T / sqrt(min(t, window))
    # m has shape [model dimension, window], where model dimension is the sum
    # of the dimensions of the flattened variables.
    m = array_ops.transpose(math_ops.divide(update_grad_buffer, denom))

    # sigma, u, _ = SVD(m^Tm + I * svd_eps)
    mm = math_ops.matmul(m, m, transpose_a=True)
    damping = math_ops.cast(linalg_ops.eye(window), dtype=var_dtype) * svd_eps
    sigma, u, _ = linalg_ops.svd(mm + damping)
    sigma_sqrt = math_ops.sqrt(sigma)
    sigma_sqrt_min = math_ops.reduce_min(sigma_sqrt)

    # sigma_sqrt_inv = 1 / (\sqrt{sigma} + sigma_eps) ^ 3
    # We add sigma_eps to alleviate numerical instability.
    # Note that (m^Tm)^(-3/2) = u diag(sigma_sqrt_inv) u^T.
    sigma_sqrt_inv = math_ops.divide(
        math_ops.cast(1.0, dtype=var_dtype),
        math_ops.pow(sigma_sqrt + sigma_eps, 3))

    # In full matrix AdaGrad, the update step computes (mm^T)^(-1/2)g, where the
    # inversion of a model dimension by model dimension matrix is needed. To
    # speed up this computation we calculate the following instead:
    # m(m^Tm)^(-3/2)m^T moment1 = m u diag(sigma_sqrt_inv) u^T m^T moment1.
    new_step = array_ops.expand_dims(
        array_ops.zeros(flat_grad.get_shape(), dtype=var_dtype), -1)
    head = math_ops.matmul(
        m,
        math_ops.matmul(
            u,
            math_ops.matmul(
                array_ops.diag(sigma_sqrt_inv),
                math_ops.matmul(
                    u,
                    math_ops.matmul(m, moment1_2d, transpose_a=True),
                    transpose_a=True))))

    # When inverting (mm^t)^(1/2), we also add epsilon * I regularization for
    # degenerate cases. We expand ((mm^t)^(1/2) + epsilon * I)^(-1) using
    # Woodbury's identity.
    # For full derivation please see paper at
    # https://arxiv.org/pdf/1806.02958.pdf
    tail = moment1_2d - math_ops.matmul(
        m,
        math_ops.matmul(
            u,
            math_ops.matmul(
                array_ops.diag(
                    math_ops.divide(math_ops.cast(1.0, dtype=var_dtype),
                                    sigma)),
                math_ops.matmul(
                    u,
                    math_ops.matmul(m, moment1_2d, transpose_a=True),
                    transpose_a=True))))
    scaled_tail = math_ops.divide(tail, sigma_sqrt_min)

    update_new_step = control_flow_ops.cond(
        sigma_sqrt_min > eps, lambda: math_ops.add(head, scaled_tail),
        lambda: math_ops.add(new_step, head))

    # Update each variable.
    update_step = []
    for var in self._variables:
      dim = self.shape_dict[var.name]
      start_index = self.index_dict[var.name]
      end_index = start_index + dim
      var_update_correct_shape = array_ops.reshape(
          update_new_step[start_index:end_index], var.get_shape())
      var_updated = state_ops.assign_sub(var, lr * var_update_correct_shape)
      update_step.append(var_updated)

    return control_flow_ops.group(update_step)
Пример #54
0
def inverse_initializer(shape, dtype, partition_info=None):  # pylint: disable=unused-argument
  return array_ops.diag(array_ops.ones(shape[0], dtype))
def triplet_semihard_loss(labels, embeddings, margin=1.0):
    """Computes the triplet loss with semi-hard negative mining.

  The loss encourages the positive distances (between a pair of embeddings with
  the same labels) to be smaller than the minimum negative distance among
  which are at least greater than the positive distance plus the margin constant
  (called semi-hard negative) in the mini-batch. If no such negative exists,
  uses the largest negative distance instead.
  See: https://arxiv.org/abs/1503.03832.

  Args:
    labels: 1-D tf.int32 `Tensor` with shape [batch_size] of
      multiclass integer labels.
    embeddings: 2-D float `Tensor` of embedding vectors. Embeddings should
      be l2 normalized.
    margin: Float, margin term in the loss definition.

  Returns:
    triplet_loss: tf.float32 scalar.
  """

    # Build pairwise squared distance matrix.
    pdist_matrix = pairwise_distance(embeddings, squared=True)
    # Build pairwise binary adjacency matrix.
    # adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    adjacency = math_ops.equal(labels, array_ops.transpose(labels))
    # Invert so we can select negatives only.
    adjacency_not = math_ops.logical_not(adjacency)

    batch_size = array_ops.size(labels)

    # Compute the mask.
    pdist_matrix_tile = array_ops.tile(pdist_matrix, [batch_size, 1])
    mask = math_ops.logical_and(
        array_ops.tile(adjacency_not, [batch_size, 1]),
        math_ops.greater(
            pdist_matrix_tile,
            array_ops.reshape(array_ops.transpose(pdist_matrix), [-1, 1])))
    mask_final = array_ops.reshape(
        math_ops.greater(
            math_ops.reduce_sum(math_ops.cast(mask, dtype=dtypes.float32),
                                1,
                                keepdims=True), 0.0), [batch_size, batch_size])
    mask_final = array_ops.transpose(mask_final)

    adjacency_not = math_ops.cast(adjacency_not, dtype=dtypes.float32)
    mask = math_ops.cast(mask, dtype=dtypes.float32)

    # negatives_outside: smallest D_an where D_an > D_ap.
    negatives_outside = array_ops.reshape(
        masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
    negatives_outside = array_ops.transpose(negatives_outside)

    # negatives_inside: largest D_an.
    negatives_inside = array_ops.tile(
        masked_maximum(pdist_matrix, adjacency_not), [1, batch_size])
    semi_hard_negatives = array_ops.where(mask_final, negatives_outside,
                                          negatives_inside)

    loss_mat = math_ops.add(margin, pdist_matrix - semi_hard_negatives)

    mask_positives = math_ops.cast(adjacency,
                                   dtype=dtypes.float32) - array_ops.diag(
                                       array_ops.ones([batch_size]))

    # In lifted-struct, the authors multiply 0.5 for upper triangular
    #   in semihard, they take all positive pairs except the diagonal.
    num_positives = math_ops.reduce_sum(mask_positives)

    triplet_loss = math_ops.truediv(math_ops.reduce_sum(
        math_ops.maximum(math_ops.multiply(loss_mat, mask_positives), 0.0)),
                                    num_positives,
                                    name='triplet_semihard_loss')

    return triplet_loss
Пример #56
0
def _DiagPartGrad(_, grad):
    return array_ops.diag(grad)
 def full_fisher_block(self):
     return array_ops.diag(array_ops.reshape(self._factor.get_cov(),
                                             (-1, )))