Пример #1
0
 def testInvalidShapeAtEval(self):
   with self.session(use_gpu=True):
     v = array_ops.placeholder(dtype=dtypes_lib.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       array_ops.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0})
     with self.assertRaisesOpError(
         r"but received input shape: \[1,1\] and diagonal shape: \[\]"):
       array_ops.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0})
Пример #2
0
  def testRectangular(self):
    with self.session(use_gpu=True):
      v = np.array([3.0, 4.0])
      mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0]])
      expected = np.array([[3.0, 1.0, 0.0], [1.0, 4.0, 1.0]])
      output = array_ops.matrix_set_diag(mat, v)
      self.assertEqual((2, 3), output.get_shape())
      self.assertAllEqual(expected, self.evaluate(output))

      v = np.array([3.0, 4.0])
      mat = np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
      expected = np.array([[3.0, 1.0], [1.0, 4.0], [1.0, 1.0]])
      output = array_ops.matrix_set_diag(mat, v)
      self.assertEqual((3, 2), output.get_shape())
      self.assertAllEqual(expected, self.evaluate(output))
Пример #3
0
 def _covariance(self):
   p = self.probs * array_ops.ones_like(
       self.total_count)[..., array_ops.newaxis]
   return array_ops.matrix_set_diag(
       -math_ops.matmul(self._mean_val[..., array_ops.newaxis],
                        p[..., array_ops.newaxis, :]),  # outer product
       self._variance())
 def _to_dense(self):
   normalized_axis = self.reflection_axis / linalg.norm(
       self.reflection_axis, axis=-1, keepdims=True)
   mat = normalized_axis[..., array_ops.newaxis]
   matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True)
   return array_ops.matrix_set_diag(
       matrix, 1. + array_ops.matrix_diag_part(matrix))
Пример #5
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape_tensor()
    event_shape = self.event_shape_tensor()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat([[n], batch_shape, event_shape], 0)

    # Complexity: O(nbk**2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    expanded_df = self.df * array_ops.ones(
        self.scale_operator.batch_shape_tensor(),
        dtype=self.df.dtype.base_dtype)
    g = random_ops.random_gamma(shape=[n],
                                alpha=self._multi_gamma_sequence(
                                    0.5 * expanded_df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk**2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk**2)
    perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so
    # this complexity is O(nbk**2). For LinearOperatorLowerTriangular,
    # each matmul is O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator.matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk**2)
    shape = array_ops.concat([batch_shape, event_shape, [n]], 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
Пример #6
0
def random_tril_matrix(shape,
                       dtype,
                       force_well_conditioned=False,
                       remove_upper=True):
  """[batch] lower triangular matrix.

  Args:
    shape:  `TensorShape` or Python `list`.  Shape of the returned matrix.
    dtype:  `TensorFlow` `dtype` or Python dtype
    force_well_conditioned:  Python `bool`. If `True`, returned matrix will have
      eigenvalues with modulus in `(1, 2)`.  Otherwise, eigenvalues are unit
      normal random variables.
    remove_upper:  Python `bool`.
      If `True`, zero out the strictly upper triangle.
      If `False`, the lower triangle of returned matrix will have desired
      properties, but will not have the strictly upper triangle zero'd out.

  Returns:
    `Tensor` with desired shape and dtype.
  """
  with ops.name_scope("random_tril_matrix"):
    # Totally random matrix.  Has no nice properties.
    tril = random_normal(shape, dtype=dtype)
    if remove_upper:
      tril = array_ops.matrix_band_part(tril, -1, 0)

    # Create a diagonal with entries having modulus in [1, 2].
    if force_well_conditioned:
      maxval = ops.convert_to_tensor(np.sqrt(2.), dtype=dtype.real_dtype)
      diag = random_sign_uniform(
          shape[:-1], dtype=dtype, minval=1., maxval=maxval)
      tril = array_ops.matrix_set_diag(tril, diag)

    return tril
Пример #7
0
 def _variance(self):
   p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1)
   outer_prod = math_ops.batch_matmul(
       array_ops.expand_dims(self._mean_val, -1),
       array_ops.expand_dims(p, -2))
   return array_ops.matrix_set_diag(-outer_prod,
                                    self._mean_val - self._mean_val * p)
Пример #8
0
def matrix_diag_transform(matrix, transform=None, name=None):
  """Transform diagonal of [batch-]matrix, leave rest of matrix unchanged.

  Create a trainable covariance defined by a Cholesky factor:

  ```python
  # Transform network layer into 2 x 2 array.
  matrix_values = tf.contrib.layers.fully_connected(activations, 4)
  matrix = tf.reshape(matrix_values, (batch_size, 2, 2))

  # Make the diagonal positive.  If the upper triangle was zero, this would be a
  # valid Cholesky factor.
  chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)

  # OperatorPDCholesky ignores the upper triangle.
  operator = OperatorPDCholesky(chol)
  ```

  Example of heteroskedastic 2-D linear regression.

  ```python
  # Get a trainable Cholesky factor.
  matrix_values = tf.contrib.layers.fully_connected(activations, 4)
  matrix = tf.reshape(matrix_values, (batch_size, 2, 2))
  chol = matrix_diag_transform(matrix, transform=tf.nn.softplus)

  # Get a trainable mean.
  mu = tf.contrib.layers.fully_connected(activations, 2)

  # This is a fully trainable multivariate normal!
  dist = tf.contrib.distributions.MVNCholesky(mu, chol)

  # Standard log loss.  Minimizing this will "train" mu and chol, and then dist
  # will be a distribution predicting labels as multivariate Gaussians.
  loss = -1 * tf.reduce_mean(dist.log_prob(labels))
  ```

  Args:
    matrix:  Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are
      equal.
    transform:  Element-wise function mapping `Tensors` to `Tensors`.  To
      be applied to the diagonal of `matrix`.  If `None`, `matrix` is returned
      unchanged.  Defaults to `None`.
    name:  A name to give created ops.
      Defaults to "matrix_diag_transform".

  Returns:
    A `Tensor` with same shape and `dtype` as `matrix`.
  """
  with ops.name_scope(name, "matrix_diag_transform", [matrix]):
    matrix = ops.convert_to_tensor(matrix, name="matrix")
    if transform is None:
      return matrix
    # Replace the diag with transformed diag.
    diag = array_ops.matrix_diag_part(matrix)
    transformed_diag = transform(diag)
    transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag)

  return transformed_mat
Пример #9
0
 def _variance(self):
   scale = self.alpha_sum * math_ops.sqrt(1. + self.alpha_sum)
   alpha = self.alpha / scale
   outer_prod = -math_ops.batch_matmul(
       array_ops.expand_dims(alpha, dim=-1),  # column
       array_ops.expand_dims(alpha, dim=-2))  # row
   return array_ops.matrix_set_diag(outer_prod,
                                    alpha * (self.alpha_sum / scale - alpha))
Пример #10
0
  def _verifyLu(self, x, output_idx_type=dtypes.int64):
    # Verify that Px = LU.
    lu, perm = linalg_ops.lu(x, output_idx_type=output_idx_type)

    # Prepare the lower factor of shape num_rows x num_rows
    lu_shape = np.array(lu.shape.as_list())
    batch_shape = lu_shape[:-2]
    num_rows = lu_shape[-2]
    num_cols = lu_shape[-1]

    lower = array_ops.matrix_band_part(lu, -1, 0)

    if num_rows > num_cols:
      eye = linalg_ops.eye(
          num_rows, batch_shape=batch_shape, dtype=lower.dtype)
      lower = array_ops.concat([lower, eye[..., num_cols:]], axis=-1)
    elif num_rows < num_cols:
      lower = lower[..., :num_rows]

    # Fill the diagonal with ones.
    ones_diag = array_ops.ones(
        np.append(batch_shape, num_rows), dtype=lower.dtype)
    lower = array_ops.matrix_set_diag(lower, ones_diag)

    # Prepare the upper factor.
    upper = array_ops.matrix_band_part(lu, 0, -1)

    verification = math_ops.matmul(lower, upper)

    # Permute the rows of product of the Cholesky factors.
    if num_rows > 0:
      # Reshape the product of the triangular factors and permutation indices
      # to a single batch dimension. This makes it easy to apply
      # invert_permutation and gather_nd ops.
      perm_reshaped = array_ops.reshape(perm, [-1, num_rows])
      verification_reshaped = array_ops.reshape(verification,
                                                [-1, num_rows, num_cols])
      # Invert the permutation in each batch.
      inv_perm_reshaped = map_fn.map_fn(array_ops.invert_permutation,
                                        perm_reshaped)
      batch_size = perm_reshaped.shape.as_list()[0]
      # Prepare the batch indices with the same shape as the permutation.
      # The corresponding batch index is paired with each of the `num_rows`
      # permutation indices.
      batch_indices = math_ops.cast(
          array_ops.broadcast_to(
              math_ops.range(batch_size)[:, None], perm_reshaped.shape),
          dtype=output_idx_type)
      permuted_verification_reshaped = array_ops.gather_nd(
          verification_reshaped,
          array_ops.stack([batch_indices, inv_perm_reshaped], axis=-1))

      # Reshape the verification matrix back to the original shape.
      verification = array_ops.reshape(permuted_verification_reshaped,
                                       lu_shape)

    self._verifyLuBase(x, lower, upper, perm, verification,
                       output_idx_type)
Пример #11
0
def sign_magnitude_positive_definite(
    raw, off_diagonal_scale=0., overall_scale=0.):
  """Constructs a positive definite matrix from an unconstrained input matrix.

  We want to keep the whole matrix on a log scale, but also allow off-diagonal
  elements to be negative, so the sign of off-diagonal elements is modeled
  separately from their magnitude (using the lower and upper triangles
  respectively). Specifically:

  for i < j, we have:
    output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) *
        exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2)

  output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2)

  output = output_cholesky^T * output_cholesky

  where raw, off_diagonal_scale, and overall_scale are
  un-constrained real-valued variables. The resulting values are stable
  around zero due to the exponential (and the softsign keeps the function
  smooth).

  Args:
    raw: A [..., M, M] Tensor.
    off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative
        scale of off-diagonal values in the output matrix.
    overall_scale: A scalar or [...] shaped Tensor controlling the overall scale
        of the output matrix.
  Returns:
    The `output` matrix described above, a [..., M, M] positive definite matrix.

  """
  raw = ops.convert_to_tensor(raw)
  diagonal = array_ops.matrix_diag_part(raw)
  def _right_pad_with_ones(tensor, target_rank):
    # Allow broadcasting even if overall_scale and off_diagonal_scale have batch
    # dimensions
    tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype)
    return array_ops.reshape(tensor,
                             array_ops.concat(
                                 [
                                     array_ops.shape(tensor), array_ops.ones(
                                         [target_rank - array_ops.rank(tensor)],
                                         dtype=target_rank.dtype)
                                 ],
                                 axis=0))
  # We divide the log values by 2 to compensate for the squaring that happens
  # when transforming Cholesky factors into positive definite matrices.
  sign_magnitude = (gen_math_ops.exp(
      (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) +
       _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) *
                    nn.softsign(array_ops.matrix_transpose(raw)))
  sign_magnitude.set_shape(raw.get_shape())
  cholesky_factor = array_ops.matrix_set_diag(
      input=array_ops.matrix_band_part(sign_magnitude, 0, -1),
      diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones(
          overall_scale, array_ops.rank(diagonal))) / 2.))
  return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
Пример #12
0
 def testSquare(self):
   with self.session(use_gpu=True):
     v = np.array([1.0, 2.0, 3.0])
     mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0]])
     mat_set_diag = np.array([[1.0, 1.0, 0.0], [1.0, 2.0, 1.0],
                              [1.0, 1.0, 3.0]])
     output = array_ops.matrix_set_diag(mat, v)
     self.assertEqual((3, 3), output.get_shape())
     self.assertAllEqual(mat_set_diag, self.evaluate(output))
Пример #13
0
  def _sample_n(self, n, seed):
    batch_shape = self.batch_shape()
    event_shape = self.event_shape()
    batch_ndims = array_ops.shape(batch_shape)[0]

    ndims = batch_ndims + 3  # sample_ndims=1, event_ndims=2
    shape = array_ops.concat(((n,), batch_shape, event_shape), 0)

    # Complexity: O(nbk^2)
    x = random_ops.random_normal(shape=shape,
                                 mean=0.,
                                 stddev=1.,
                                 dtype=self.dtype,
                                 seed=seed)

    # Complexity: O(nbk)
    # This parametrization is equivalent to Chi2, i.e.,
    # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2)
    g = random_ops.random_gamma(shape=(n,),
                                alpha=self._multi_gamma_sequence(
                                    0.5 * self.df, self.dimension),
                                beta=0.5,
                                dtype=self.dtype,
                                seed=distribution_util.gen_new_seed(
                                    seed, "wishart"))

    # Complexity: O(nbk^2)
    x = array_ops.matrix_band_part(x, -1, 0)  # Tri-lower.

    # Complexity: O(nbk)
    x = array_ops.matrix_set_diag(x, math_ops.sqrt(g))

    # Make batch-op ready.
    # Complexity: O(nbk^2)
    perm = array_ops.concat((math_ops.range(1, ndims), (0,)), 0)
    x = array_ops.transpose(x, perm)
    shape = array_ops.concat((batch_shape, (event_shape[0], -1)), 0)
    x = array_ops.reshape(x, shape)

    # Complexity: O(nbM) where M is the complexity of the operator solving a
    # vector system.  E.g., for OperatorPDDiag, each matmul is O(k^2), so
    # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is
    # O(k^3) so this step has complexity O(nbk^3).
    x = self.scale_operator_pd.sqrt_matmul(x)

    # Undo make batch-op ready.
    # Complexity: O(nbk^2)
    shape = array_ops.concat((batch_shape, event_shape, (n,)), 0)
    x = array_ops.reshape(x, shape)
    perm = array_ops.concat(((ndims - 1,), math_ops.range(0, ndims - 1)), 0)
    x = array_ops.transpose(x, perm)

    if not self.cholesky_input_output_matrices:
      # Complexity: O(nbk^3)
      x = math_ops.matmul(x, x, adjoint_b=True)

    return x
Пример #14
0
def _GradWithInverseL(l, l_inverse, grad):
  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)
  grad_a = math_ops.matmul(
      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)
  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Пример #15
0
def eye(
    num_rows,
    num_columns=None,
    batch_shape=None,
    dtype=dtypes.float32,
    name=None):
  """Construct an identity matrix, or a batch of matrices.

  ```python
  # Construct one identity matrix.
  tf.eye(2)
  ==> [[1., 0.],
       [0., 1.]]

  # Construct a batch of 3 identity matricies, each 2 x 2.
  # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2.
  batch_identity = tf.eye(2, batch_shape=[3])

  # Construct one 2 x 3 "identity" matrix
  tf.eye(2, num_columns=3)
  ==> [[ 1.,  0.,  0.],
       [ 0.,  1.,  0.]]
  ```

  Args:
    num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows
      in each batch matrix.
    num_columns: Optional non-negative `int32` scalar `Tensor` giving the number
      of columns in each batch matrix.  Defaults to `num_rows`.
    batch_shape:  `int32` `Tensor`.  If provided, returned `Tensor` will have
      leading batch dimensions of this shape.
    dtype:  The type of an element in the resulting `Tensor`
    name:  A name for this `Op`.  Defaults to "eye".

  Returns:
    A `Tensor` of shape `batch_shape + [num_rows, num_columns]`
  """
  with ops.name_scope(
      name, default_name="eye", values=[num_rows, num_columns, batch_shape]):

    batch_shape = [] if batch_shape is None else batch_shape
    batch_shape = ops.convert_to_tensor(
        batch_shape, name="shape", dtype=dtypes.int32)

    if num_columns is None:
      diag_size = num_rows
    else:
      diag_size = math_ops.minimum(num_rows, num_columns)
    diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0)
    diag_ones = array_ops.ones(diag_shape, dtype=dtype)

    if num_columns is None:
      return array_ops.matrix_diag(diag_ones)
    else:
      shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0)
      zero_matrix = array_ops.zeros(shape, dtype=dtype)
      return array_ops.matrix_set_diag(zero_matrix, diag_ones)
Пример #16
0
  def testRectangularBatch(self):
    with self.session(use_gpu=True):
      v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]])
      mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]],
                            [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]])

      mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0, 0.0]],
                                     [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0]]])
      output = array_ops.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 2, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
Пример #17
0
 def _variance(self):
   alpha_sum = array_ops.expand_dims(self.alpha_sum, -1)
   normalized_alpha = self.alpha / alpha_sum
   variance = -math_ops.matmul(
       array_ops.expand_dims(normalized_alpha, -1),
       array_ops.expand_dims(normalized_alpha, -2))
   variance = array_ops.matrix_set_diag(variance, normalized_alpha *
                                        (1. - normalized_alpha))
   shared_factor = (self.n * (alpha_sum + self.n) /
                    (alpha_sum + 1) * array_ops.ones_like(self.alpha))
   variance *= array_ops.expand_dims(shared_factor, -1)
   return variance
Пример #18
0
 def _preprocess_tril(self, identity_multiplier, diag, tril, event_ndims):
   """Helper to preprocess a lower triangular matrix."""
   tril = array_ops.matrix_band_part(tril, -1, 0)  # Zero out TriU.
   if identity_multiplier is None and diag is None:
     return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
   new_diag = array_ops.matrix_diag_part(tril)
   if identity_multiplier is not None:
     new_diag += identity_multiplier
   if diag is not None:
     new_diag += diag
   tril = array_ops.matrix_set_diag(tril, new_diag)
   return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
Пример #19
0
        def loop_fn(i):
            matrix_i = array_ops.gather(matrices, i)
            diag_i = array_ops.gather(diags, i)
            results = [
                array_ops.matrix_set_diag(matrix_i, diag_i),
                array_ops.matrix_set_diag(matrices[0, ...], diag_i),
                array_ops.matrix_set_diag(matrix_i, diags[0, ...]),
            ]

            k = (-1, 1)
            band_i = array_ops.gather(bands, i)
            for align in ["RIGHT_LEFT", "LEFT_RIGHT"]:
                results.extend([
                    array_ops.matrix_set_diag(matrix_i,
                                              band_i,
                                              k=k,
                                              align=align),
                    array_ops.matrix_set_diag(matrices[0, ...],
                                              band_i,
                                              k=k,
                                              align=align),
                    array_ops.matrix_set_diag(matrix_i,
                                              bands[0, ...],
                                              k=k,
                                              align=align)
                ])
            return results
Пример #20
0
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
  """Gradient for SelfAdjointEigV2."""
  e = op.outputs[0]
  compute_v = op.get_attr("compute_v")
  # a = op.inputs[0], which satisfies
  # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
  with ops.control_dependencies([grad_e, grad_v]):
    if compute_v:
      v = op.outputs[1]
      # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
      # Notice that because of the term involving f, the gradient becomes
      # infinite (or NaN in practice) when eigenvalues are not unique.
      # Mathematically this should not be surprising, since for (k-fold)
      # degenerate eigenvalues, the corresponding eigenvectors are only defined
      # up to arbitrary rotation in a (k-dimensional) subspace.
      f = array_ops.matrix_set_diag(
          math_ops.reciprocal(
              array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)),
          array_ops.zeros_like(e))
      grad_a = math_ops.matmul(
          v,
          math_ops.matmul(
              array_ops.matrix_diag(grad_e) +
              f * math_ops.matmul(v, grad_v, adjoint_a=True),
              v,
              adjoint_b=True))
    else:
      _, v = linalg_ops.self_adjoint_eig(op.inputs[0])
      grad_a = math_ops.matmul(v,
                               math_ops.matmul(
                                   array_ops.matrix_diag(grad_e),
                                   v,
                                   adjoint_b=True))
    # The forward op only depends on the lower triangular part of a, so here we
    # symmetrize and take the lower triangle
    grad_a = array_ops.matrix_band_part(
        grad_a + math_ops.conj(array_ops.matrix_transpose(grad_a)), -1, 0)
    grad_a = array_ops.matrix_set_diag(grad_a,
                                       0.5 * array_ops.matrix_diag_part(grad_a))
    return grad_a
Пример #21
0
def _EigGrad(op, grad_e, grad_v):
    """Gradient for Eig.

  Based on eq. 4.77 from paper by
  Christoph Boeddeker et al.
  https://arxiv.org/abs/1701.00392
  See also
  "Computation of eigenvalue and eigenvector derivatives
  for a general complex-valued eigensystem" by Nico van der Aa.
  As for now only distinct eigenvalue case is considered.
  """
    e = op.outputs[0]
    compute_v = op.get_attr("compute_v")
    # a = op.inputs[0], which satisfies
    # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
    with ops.control_dependencies([grad_e, grad_v]):
        if compute_v:
            v = op.outputs[1]
            vt = _linalg.adjoint(v)
            # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
            # Notice that because of the term involving f, the gradient becomes
            # infinite (or NaN in practice) when eigenvalues are not unique.
            # Mathematically this should not be surprising, since for (k-fold)
            # degenerate eigenvalues, the corresponding eigenvectors are only defined
            # up to arbitrary rotation in a (k-dimensional) subspace.
            f = array_ops.matrix_set_diag(
                _SafeReciprocal(
                    array_ops.expand_dims(e, -2) -
                    array_ops.expand_dims(e, -1)), array_ops.zeros_like(e))
            f = math_ops.conj(f)
            vgv = math_ops.matmul(vt, grad_v)
            mid = array_ops.matrix_diag(grad_e)
            diag_grad_part = array_ops.matrix_diag(
                array_ops.matrix_diag_part(
                    math_ops.cast(math_ops.real(vgv), vgv.dtype)))
            mid += f * (
                vgv - math_ops.matmul(math_ops.matmul(vt, v), diag_grad_part))
            # vt is formally invertible as long as the original matrix is
            # diagonalizable. However, in practice, vt may
            # be ill-conditioned when matrix original matrix is close to
            # non-diagonalizable one
            grad_a = linalg_ops.matrix_solve(vt, math_ops.matmul(mid, vt))
        else:
            _, v = linalg_ops.eig(op.inputs[0])
            vt = _linalg.adjoint(v)
            # vt is formally invertible as long as the original matrix is
            # diagonalizable. However, in practice, vt may
            # be ill-conditioned when matrix original matrix is close to
            # non-diagonalizable one
            grad_a = linalg_ops.matrix_solve(
                vt, math_ops.matmul(array_ops.matrix_diag(grad_e), vt))
        return math_ops.cast(grad_a, op.inputs[0].dtype)
Пример #22
0
 def add_to_tensor(self, mat, name="add_to_tensor"):
     """Add matrix represented by this operator to `mat`.  Equiv to `I + mat`.
     Args:
       mat:  `Tensor` with same `dtype` and shape broadcastable to `self`.
       name:  A name to give this `Op`.
     Returns:
       A `Tensor` with broadcast shape and same `dtype` as `self`.
     """
     with self._name_scope(name):
         mat = ops.convert_to_tensor(mat, name="mat")
         mat_diag = array_ops.matrix_diag_part(mat)
         new_diag = 1 + mat_diag
         return array_ops.matrix_set_diag(mat, new_diag)
Пример #23
0
    def testRectangularBatch(self):
        with self.test_session(use_gpu=True):
            v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]])
            mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]],
                                  [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]])

            mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0,
                                                               0.0]],
                                           [[-4.0, 0.0, 4.0], [0.0, -5.0,
                                                               0.0]]])
            output = array_ops.matrix_set_diag(mat_batch, v_batch)
            self.assertEqual((2, 2, 3), output.get_shape())
            self.assertAllEqual(mat_set_diag_batch, output.eval())
Пример #24
0
def _SelfAdjointEigV2Grad(op, grad_e, grad_v):
    """Gradient for SelfAdjointEigV2."""
    e = op.outputs[0]
    compute_v = op.get_attr("compute_v")
    # a = op.inputs[0], which satisfies
    # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i]
    with ops.control_dependencies([grad_e, grad_v]):
        if compute_v:
            v = op.outputs[1]
            # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0).
            # Notice that because of the term involving f, the gradient becomes
            # infinite (or NaN in practice) when eigenvalues are not unique.
            # Mathematically this should not be surprising, since for (k-fold)
            # degenerate eigenvalues, the corresponding eigenvectors are only defined
            # up to arbitrary rotation in a (k-dimensional) subspace.
            f = array_ops.matrix_set_diag(
                math_ops.reciprocal(
                    array_ops.expand_dims(e, -2) -
                    array_ops.expand_dims(e, -1)), array_ops.zeros_like(e))
            grad_a = math_ops.matmul(
                v,
                math_ops.matmul(array_ops.matrix_diag(grad_e) +
                                f * math_ops.matmul(v, grad_v, adjoint_a=True),
                                v,
                                adjoint_b=True))
        else:
            _, v = linalg_ops.self_adjoint_eig(op.inputs[0])
            grad_a = math_ops.matmul(
                v,
                math_ops.matmul(array_ops.matrix_diag(grad_e),
                                v,
                                adjoint_b=True))
        # The forward op only depends on the lower triangular part of a, so here we
        # symmetrize and take the lower triangle
        grad_a = array_ops.matrix_band_part(grad_a + _linalg.adjoint(grad_a),
                                            -1, 0)
        grad_a = array_ops.matrix_set_diag(
            grad_a, 0.5 * array_ops.matrix_diag_part(grad_a))
        return grad_a
Пример #25
0
def _MatrixDiagPartV2Grad(op, grad):
  """Gradient for MatrixDiagPartV2."""
  matrix_shape = op.inputs[0].get_shape()[-2:]
  if matrix_shape.is_fully_defined():
    return array_ops.matrix_diag(
        grad,
        k=op.inputs[1],
        num_rows=matrix_shape[0],
        num_cols=matrix_shape[1]), None, None
  else:
    return array_ops.matrix_set_diag(
        array_ops.zeros_like(op.inputs[0]), grad,
        k=op.inputs[1]), None, None
Пример #26
0
def _MatrixSetDiagGrad(op, grad):
  diag_shape = op.inputs[1].get_shape()
  diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1])
  diag_shape = diag_shape.merge_with(grad.get_shape()[:-1])
  if diag_shape.is_fully_defined():
    diag_shape = diag_shape.as_list()
  else:
    diag_shape = array_ops.shape(grad)
    diag_shape = array_ops.slice(diag_shape, [0], [array_ops.rank(grad) - 1])
  grad_input = array_ops.matrix_set_diag(
      grad, array_ops.zeros(
          diag_shape, dtype=grad.dtype))
  grad_diag = array_ops.matrix_diag_part(grad)
  return (grad_input, grad_diag)
Пример #27
0
  def _testGrad(self, input_shape, diag_shape, diags):
    with self.session(use_gpu=True):
      x = constant_op.constant(
          np.random.rand(*input_shape), dtype=dtypes_lib.float32)
      x_diag = constant_op.constant(
          np.random.rand(*diag_shape), dtype=dtypes_lib.float32)

      # LINT.IfChange
      if compat.forward_compatible(2019, 11, 30):
      # LINT.ThenChange(//tensorflow/python/ops/array_ops.py)
        y = array_ops.matrix_set_diag(x, x_diag, k=diags)
      else:
        y = array_ops.matrix_set_diag(x, x_diag)
      error_x = gradient_checker.compute_gradient_error(x,
                                                        x.get_shape().as_list(),
                                                        y,
                                                        y.get_shape().as_list())
      self.assertLess(error_x, 1e-4)
      error_x_diag = gradient_checker.compute_gradient_error(
          x_diag,
          x_diag.get_shape().as_list(), y,
          y.get_shape().as_list())
      self.assertLess(error_x_diag, 1e-4)
Пример #28
0
def _MatrixSetDiagGrad(op, grad):
    diag_shape = op.inputs[1].get_shape()
    diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1])
    diag_shape = diag_shape.merge_with(grad.get_shape()[:-1])
    if diag_shape.is_fully_defined():
        diag_shape = diag_shape.as_list()
    else:
        diag_shape = array_ops.shape(grad)
        diag_shape = array_ops.slice(diag_shape, [0],
                                     [array_ops.rank(grad) - 1])
    grad_input = array_ops.matrix_set_diag(
        grad, array_ops.zeros(diag_shape, dtype=grad.dtype))
    grad_diag = array_ops.matrix_diag_part(grad)
    return (grad_input, grad_diag)
 def test_assert_non_singular_raises_if_cond_too_big_but_finite(self):
   with self.cached_session():
     tril = linear_operator_test_util.random_tril_matrix(
         shape=(50, 50), dtype=np.float32)
     diag = np.logspace(-2, 2, 50).astype(np.float32)
     tril = array_ops.matrix_set_diag(tril, diag)
     matrix = math_ops.matmul(tril, tril, transpose_b=True).eval()
     operator = linalg.LinearOperatorFullMatrix(matrix)
     with self.assertRaisesOpError("Singular matrix"):
       # Ensure that we have finite condition number...just HUGE.
       cond = np.linalg.cond(matrix)
       self.assertTrue(np.isfinite(cond))
       self.assertGreater(cond, 1e12)
       operator.assert_non_singular().run()
Пример #30
0
  def testRectangularBatch(self):
    with self.session(use_gpu=True):
      v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]])
      mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]],
                            [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]])

      mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0, 0.0]],
                                     [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0]]])
      output = array_ops.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 2, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))

      if compat.forward_compatible(2019, 7, 4):
        # Diagonal bands.
        for _, tests in [tall_cases(), fat_cases()]:
          for diags, pair in tests.items():
            vecs, banded_mat = pair
            mask = banded_mat == 0
            input_mat = np.random.randint(10, size=mask.shape)
            solution = input_mat * mask + banded_mat
            output = array_ops.matrix_set_diag(input_mat, vecs, k=diags)
            self.assertEqual(output.get_shape(), solution.shape)
            self.assertAllEqual(output.eval(), solution)
Пример #31
0
 def _preprocess_tril(self, identity_multiplier, diag, tril, event_ndims):
     """Helper to preprocess a lower triangular matrix."""
     tril = array_ops.matrix_band_part(tril, -1, 0)  # Zero out TriU.
     if identity_multiplier is None and diag is None:
         return self._process_matrix(tril,
                                     min_rank=2,
                                     event_ndims=event_ndims)
     new_diag = array_ops.matrix_diag_part(tril)
     if identity_multiplier is not None:
         new_diag += identity_multiplier
     if diag is not None:
         new_diag += diag
     tril = array_ops.matrix_set_diag(tril, new_diag)
     return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
 def test_assert_non_singular_raises_if_cond_too_big_but_finite(self):
     with self.test_session():
         tril = linear_operator_test_util.random_tril_matrix(
             shape=(50, 50), dtype=np.float32)
         diag = np.logspace(-2, 2, 50).astype(np.float32)
         tril = array_ops.matrix_set_diag(tril, diag)
         matrix = math_ops.matmul(tril, tril, transpose_b=True).eval()
         operator = linalg.LinearOperatorFullMatrix(matrix)
         with self.assertRaisesOpError("Singular matrix"):
             # Ensure that we have finite condition number...just HUGE.
             cond = np.linalg.cond(matrix)
             self.assertTrue(np.isfinite(cond))
             self.assertGreater(cond, 1e12)
             operator.assert_non_singular().run()
Пример #33
0
  def _testSquareBatch(self, dtype):
    with self.cached_session(use_gpu=True):
      v_batch = np.array([[-1.0, 0.0, -3.0], [-4.0, -5.0, -6.0]]).astype(dtype)
      mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0], [1.0, 0.0, 3.0]],
                            [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0],
                             [2.0, 0.0, 6.0]]]).astype(dtype)

      mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, 0.0, 0.0],
                                      [1.0, 0.0, -3.0]],
                                     [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0],
                                      [2.0, 0.0, -6.0]]]).astype(dtype)

      output = array_ops.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 3, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
Пример #34
0
  def testSquare(self):
    with self.session(use_gpu=True):
      v = np.array([1.0, 2.0, 3.0])
      mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0]])
      mat_set_diag = np.array([[1.0, 1.0, 0.0], [1.0, 2.0, 1.0],
                               [1.0, 1.0, 3.0]])
      output = array_ops.matrix_set_diag(mat, v)
      self.assertEqual((3, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag, self.evaluate(output))

      # LINT.IfChange
      if compat.forward_compatible(2019, 11, 30):
      # LINT.ThenChange(//tensorflow/python/ops/array_ops.py)

        # Diagonal bands.
        _, tests = square_cases()
        for diags, pair in tests.items():
          vecs, banded_mat = pair
          mask = banded_mat[0] == 0
          input_mat = np.random.randint(10, size=mask.shape)
          solution = input_mat * mask + banded_mat[0]
          output = array_ops.matrix_set_diag(input_mat, vecs[0], k=diags)
          self.assertEqual(output.get_shape(), solution.shape)
          self.assertAllEqual(output.eval(), solution)
Пример #35
0
  def add_to_tensor(self, mat, name="add_to_tensor"):
    """Add matrix represented by this operator to `mat`.  Equiv to `I + mat`.

    Args:
      mat:  `Tensor` with same `dtype` and shape broadcastable to `self`.
      name:  A name to give this `Op`.

    Returns:
      A `Tensor` with broadcast shape and same `dtype` as `self`.
    """
    with self._name_scope(name, values=[mat]):
      mat = ops.convert_to_tensor(mat, name="mat")
      mat_diag = array_ops.matrix_diag_part(mat)
      new_diag = 1 + mat_diag
      return array_ops.matrix_set_diag(mat, new_diag)
Пример #36
0
  def _testSquareBatch(self, dtype):
    with self.cached_session(use_gpu=True):
      v_batch = np.array([[-1.0, 0.0, -3.0], [-4.0, -5.0, -6.0]]).astype(dtype)
      mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0], [1.0, 0.0, 3.0]],
                            [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0],
                             [2.0, 0.0, 6.0]]]).astype(dtype)

      mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, 0.0, 0.0],
                                      [1.0, 0.0, -3.0]],
                                     [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0],
                                      [2.0, 0.0, -6.0]]]).astype(dtype)

      output = array_ops.matrix_set_diag(mat_batch, v_batch)
      self.assertEqual((2, 3, 3), output.get_shape())
      self.assertAllEqual(mat_set_diag_batch, output.eval())
Пример #37
0
def eye(num_rows,
        num_columns=None,
        batch_shape=None,
        dtype=dtypes.float32,
        name=None):
  """Construct an identity matrix, or a batch of matrices.

  See `linalg_ops.eye`.
  """
  with ops.name_scope(
      name, default_name='eye', values=[num_rows, num_columns, batch_shape]):
    is_square = num_columns is None
    batch_shape = [] if batch_shape is None else batch_shape
    num_columns = num_rows if num_columns is None else num_columns

    # We cannot statically infer what the diagonal size should be:
    if (isinstance(num_rows, ops.Tensor) or
        isinstance(num_columns, ops.Tensor)):
      diag_size = math_ops.minimum(num_rows, num_columns)
    else:
      # We can statically infer the diagonal size, and whether it is square.
      if not isinstance(num_rows, compat.integral_types) or not isinstance(
          num_columns, compat.integral_types):
        raise TypeError(
            'num_rows and num_columns must be positive integer values.')
      is_square = num_rows == num_columns
      diag_size = np.minimum(num_rows, num_columns)

    # We can not statically infer the shape of the tensor.
    if isinstance(batch_shape, ops.Tensor) or isinstance(diag_size, ops.Tensor):
      batch_shape = ops.convert_to_tensor(
          batch_shape, name='shape', dtype=dtypes.int32)
      diag_shape = array_ops.concat((batch_shape, [diag_size]), axis=0)
      if not is_square:
        shape = array_ops.concat((batch_shape, [num_rows, num_columns]), axis=0)
    # We can statically infer everything.
    else:
      batch_shape = list(batch_shape)
      diag_shape = batch_shape + [diag_size]
      if not is_square:
        shape = batch_shape + [num_rows, num_columns]

    diag_ones = array_ops.ones(diag_shape, dtype=dtype)
    if is_square:
      return array_ops.matrix_diag(diag_ones)
    else:
      zero_matrix = array_ops.zeros(shape, dtype=dtype)
      return array_ops.matrix_set_diag(zero_matrix, diag_ones)
Пример #38
0
 def testGrad(self):
   shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8))
   with self.test_session(use_gpu=True):
     for shape in shapes:
       x = constant_op.constant(
           np.random.rand(*shape), dtype=dtypes_lib.float32)
       diag_shape = shape[:-2] + (min(shape[-2:]),)
       x_diag = constant_op.constant(
           np.random.rand(*diag_shape), dtype=dtypes_lib.float32)
       y = array_ops.matrix_set_diag(x, x_diag)
       error_x = gradient_checker.compute_gradient_error(
           x, x.get_shape().as_list(), y, y.get_shape().as_list())
       self.assertLess(error_x, 1e-4)
       error_x_diag = gradient_checker.compute_gradient_error(
           x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list())
       self.assertLess(error_x_diag, 1e-4)
Пример #39
0
 def testGrad(self):
   shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8))
   with self.test_session(use_gpu=self._use_gpu):
     for shape in shapes:
       x = constant_op.constant(
           np.random.rand(*shape), dtype=dtypes_lib.float32)
       diag_shape = shape[:-2] + (min(shape[-2:]),)
       x_diag = constant_op.constant(
           np.random.rand(*diag_shape), dtype=dtypes_lib.float32)
       y = array_ops.matrix_set_diag(x, x_diag)
       error_x = gradient_checker.compute_gradient_error(
           x, x.get_shape().as_list(), y, y.get_shape().as_list())
       self.assertLess(error_x, 1e-4)
       error_x_diag = gradient_checker.compute_gradient_error(
           x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list())
       self.assertLess(error_x_diag, 1e-4)
Пример #40
0
 def testGradWithNoShapeInformation(self):
   with self.test_session(use_gpu=True) as sess:
     v = array_ops.placeholder(dtype=dtypes_lib.float32)
     mat = array_ops.placeholder(dtype=dtypes_lib.float32)
     grad_input = array_ops.placeholder(dtype=dtypes_lib.float32)
     output = array_ops.matrix_set_diag(mat, v)
     grads = gradients_impl.gradients(output, [mat, v], grad_ys=grad_input)
     grad_input_val = np.random.rand(3, 3).astype(np.float32)
     grad_vals = sess.run(grads,
                          feed_dict={
                              v: 2 * np.ones(3),
                              mat: np.ones((3, 3)),
                              grad_input: grad_input_val
                          })
     self.assertAllEqual(np.diag(grad_input_val), grad_vals[1])
     self.assertAllEqual(grad_input_val - np.diag(np.diag(grad_input_val)),
                         grad_vals[0])
Пример #41
0
 def testGradWithNoShapeInformation(self):
   with self.test_session(use_gpu=self._use_gpu) as sess:
     v = array_ops.placeholder(dtype=dtypes_lib.float32)
     mat = array_ops.placeholder(dtype=dtypes_lib.float32)
     grad_input = array_ops.placeholder(dtype=dtypes_lib.float32)
     output = array_ops.matrix_set_diag(mat, v)
     grads = gradients_impl.gradients(output, [mat, v], grad_ys=grad_input)
     grad_input_val = np.random.rand(3, 3).astype(np.float32)
     grad_vals = sess.run(grads,
                          feed_dict={
                              v: 2 * np.ones(3),
                              mat: np.ones((3, 3)),
                              grad_input: grad_input_val
                          })
     self.assertAllEqual(np.diag(grad_input_val), grad_vals[1])
     self.assertAllEqual(grad_input_val - np.diag(np.diag(grad_input_val)),
                         grad_vals[0])
Пример #42
0
def eye(num_rows,
        num_columns=None,
        batch_shape=None,
        dtype=dtypes.float32,
        name=None):
    """Construct an identity matrix, or a batch of matrices.

  See `linalg_ops.eye`.
  """
    with ops.name_scope(name,
                        default_name='eye',
                        values=[num_rows, num_columns, batch_shape]):
        is_square = num_columns is None
        batch_shape = [] if batch_shape is None else batch_shape
        num_columns = num_rows if num_columns is None else num_columns
        if isinstance(num_rows, ops.Tensor) or isinstance(
                num_columns, ops.Tensor) or isinstance(batch_shape,
                                                       ops.Tensor):
            batch_shape = ops.convert_to_tensor(batch_shape,
                                                name='shape',
                                                dtype=dtypes.int32)
            diag_size = math_ops.minimum(num_rows, num_columns)
            diag_shape = array_ops.concat((batch_shape, [diag_size]), 0)
            if not is_square:
                shape = array_ops.concat(
                    (batch_shape, [num_rows, num_columns]), 0)
        else:
            if not isinstance(num_rows,
                              compat.integral_types) or not isinstance(
                                  num_columns, compat.integral_types):
                raise TypeError(
                    'num_rows and num_columns must be positive integer values.'
                )
            batch_shape = [dim for dim in batch_shape]
            is_square = num_rows == num_columns
            diag_shape = batch_shape + [np.minimum(num_rows, num_columns)]
            if not is_square:
                shape = batch_shape + [num_rows, num_columns]

        diag_ones = array_ops.ones(diag_shape, dtype=dtype)
        if is_square:
            return array_ops.matrix_diag(diag_ones)
        else:
            zero_matrix = array_ops.zeros(shape, dtype=dtype)
            return array_ops.matrix_set_diag(zero_matrix, diag_ones)
    def _assertOpOutputMatchesExpected(self,
                                       params,
                                       solution,
                                       high_level=True,
                                       rtol=1e-3,
                                       atol=1e-5):
        """Verifies that matrix_set_diag produces `solution` when fed `params`.

    Args:
      params: dictionary containing input parameters to matrix_set_diag.
      solution: numpy array representing the expected output of matrix_set_diag.
      high_level: call high_level matrix_set_diag
      rtol: relative tolerance for equality test.
      atol: absolute tolerance for equality test.
    """
        input = params["input"]  # pylint: disable=redefined-builtin
        diagonal = params["diagonal"]
        with self.session() as session:
            for dtype in self.numeric_types - {np.int8, np.uint8}:
                expected = solution.astype(dtype)
                with self.test_scope():
                    params["input"] = array_ops.placeholder(dtype,
                                                            input.shape,
                                                            name="input")
                    params["diagonal"] = array_ops.placeholder(dtype,
                                                               diagonal.shape,
                                                               name="diagonal")
                    if high_level:
                        # wraps gen_array_ops.matrix_set_diag_v3
                        output = array_ops.matrix_set_diag(**params)
                    else:
                        # TODO(b/201086188): Remove this case once MatrixDiag V1 is removed.
                        output = gen_array_ops.matrix_set_diag(**params)
                result = session.run(
                    output, {
                        params["input"]: input.astype(dtype),
                        params["diagonal"]: diagonal.astype(dtype)
                    })
                self.assertEqual(output.dtype, expected.dtype)
                self.assertAllCloseAccordingToType(expected,
                                                   result,
                                                   rtol=rtol,
                                                   atol=atol,
                                                   bfloat16_rtol=0.03)
def _MatrixSetDiagGrad(op, grad):
    input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape())
    diag_shape = op.inputs[1].get_shape()
    batch_shape = input_shape[:-2].merge_with(diag_shape[:-1])
    matrix_shape = input_shape[-2:]
    if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined():
        diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())]
    else:
        with ops.colocate_with(grad):
            grad_shape = array_ops.shape(grad)
            grad_rank = array_ops.rank(grad)
            batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2])
            matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2])
            min_dim = math_ops.reduce_min(matrix_shape)
            diag_shape = array_ops.concat([batch_shape, [min_dim]], 0)
    grad_input = array_ops.matrix_set_diag(
        grad, array_ops.zeros(diag_shape, dtype=grad.dtype))
    grad_diag = array_ops.matrix_diag_part(grad)
    return (grad_input, grad_diag)
def TriAngSolveCompositeGrad(l, grad):
  # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}

  # Compute ((l^{H} @ grad) * (tril(ones)-1/2*eye)) = middle
  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)

  # Compute l^{-H} @ middle = z
  l_inverse_middle = linalg_ops.matrix_triangular_solve(l, middle, adjoint=True)

  # We need to compute z @ l^{-1}. With matrix_triangular_solve we
  # actually compute l^{-H} @ z^{H} = grad. Since we later add grad^{H}
  # we can ommit the conjugate transpose here.
  z_h = math_ops.conj(array_ops.matrix_transpose(l_inverse_middle))
  grad_a = linalg_ops.matrix_triangular_solve(l, z_h, adjoint=True)
  grad_a += linalg.adjoint(grad_a)
  return grad_a * 0.5
Пример #46
0
def _CholeskyGrad(op, grad):
  """Gradient for Cholesky."""

  # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}
  l = op.outputs[0]
  num_rows = array_ops.shape(l)[-1]
  batch_shape = array_ops.shape(l)[:-2]
  l_inverse = linalg_ops.matrix_triangular_solve(
      l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype))

  middle = math_ops.matmul(l, grad, adjoint_a=True)
  middle = array_ops.matrix_set_diag(middle,
                                     0.5 * array_ops.matrix_diag_part(middle))
  middle = array_ops.matrix_band_part(middle, -1, 0)

  grad_a = math_ops.matmul(
      math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)

  grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a))
  return grad_a * 0.5
Пример #47
0
def _CholeskyGrad(op, grad):
    """Gradient for Cholesky."""

    # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1}
    l = op.outputs[0]
    num_rows = array_ops.shape(l)[-1]
    batch_shape = array_ops.shape(l)[:-2]
    l_inverse = linalg_ops.matrix_triangular_solve(
        l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype))

    middle = math_ops.matmul(l, grad, adjoint_a=True)
    middle = array_ops.matrix_set_diag(
        middle, 0.5 * array_ops.matrix_diag_part(middle))
    middle = array_ops.matrix_band_part(middle, -1, 0)

    grad_a = math_ops.matmul(
        math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse)

    grad_a += _linalg.adjoint(grad_a)
    return grad_a * 0.5
Пример #48
0
def _MatrixSetDiagGrad(op, grad):
  input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape())
  diag_shape = op.inputs[1].get_shape()
  batch_shape = input_shape[:-2].merge_with(diag_shape[:-1])
  matrix_shape = input_shape[-2:]
  if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined():
    diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())]
  else:
    with ops.colocate_with(grad):
      grad_shape = array_ops.shape(grad)
      grad_rank = array_ops.rank(grad)
      batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2])
      matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2])
      min_dim = math_ops.reduce_min(matrix_shape)
      diag_shape = array_ops.concat([batch_shape, [min_dim]], 0)
  grad_input = array_ops.matrix_set_diag(
      grad, array_ops.zeros(
          diag_shape, dtype=grad.dtype))
  grad_diag = array_ops.matrix_diag_part(grad)
  return (grad_input, grad_diag)
Пример #49
0
 def testInvalidShapeAtEval(self):
   with self.session(use_gpu=True):
     v = array_ops.placeholder(dtype=dtypes_lib.float32)
     with self.assertRaisesOpError("input must be at least 2-dim"):
       array_ops.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0})
     with self.assertRaisesOpError("diagonal must be at least 1-dim"):
       array_ops.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0})
     if compat.forward_compatible(2019, 7, 4):
       d = array_ops.placeholder(dtype=dtypes_lib.float32)
       with self.assertRaisesOpError(
           "first dimensions of diagonal don't match"):
         array_ops.matrix_set_diag(v, d).eval(feed_dict={
             v: np.zeros((2, 3, 3)),
             d: np.ones((2, 4))
         })
Пример #50
0
def clip_covariance(covariance_matrix, maximum_variance_ratio,
                    minimum_variance):
    """Enforce constraints on a covariance matrix to improve numerical stability.

  Args:
    covariance_matrix: A [..., N, N] batch of covariance matrices.
    maximum_variance_ratio: The maximum allowed ratio of two diagonal
      entries. Any entries lower than the maximum entry divided by this ratio
      will be set to that value.
    minimum_variance: A floor for diagonal entries in the returned matrix.
  Returns:
    A new covariance matrix with the requested constraints enforced. If the
    input was positive definite, the output will be too.
  """
    # TODO(allenl): Smarter scaling here so that correlations are preserved when
    # fiddling with diagonal elements.
    diagonal = array_ops.matrix_diag_part(covariance_matrix)
    maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True)
    new_diagonal = gen_math_ops.maximum(diagonal,
                                        maximum / maximum_variance_ratio)
    return array_ops.matrix_set_diag(
        covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
Пример #51
0
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
    """Returns a uniformly random `Tensor` of "correlation-like" matrices.

  A "correlation-like" matrix is a symmetric square matrix with all entries
  between -1 and 1 (inclusive) and 1s on the main diagonal.  Of these,
  the ones that are positive semi-definite are exactly the correlation
  matrices.

  Args:
    num_rows: Python `int` dimension of the correlation-like matrices.
    batch_shape: `Tensor` or Python `tuple` of `int` shape of the
      batch to return.
    dtype: `dtype` of the `Tensor` to return.
    seed: Random seed.

  Returns:
    matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
      and dtype `dtype`.  Each entry is in [-1, 1], and each matrix
      along the bottom two dimensions is symmetric and has 1s on the
      main diagonal.
  """
    num_entries = num_rows * (num_rows + 1) / 2
    ones = array_ops.ones(shape=[num_entries], dtype=dtype)
    # It seems wasteful to generate random values for the diagonal since
    # I am going to throw them away, but `fill_triangular` fills the
    # diagonal, so I probably need them.
    # It's not impossible that it would be more efficient to just fill
    # the whole matrix with random values instead of messing with
    # `fill_triangular`.  Then would need to filter almost half out with
    # `matrix_band_part`.
    unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
    tril = util.fill_triangular(unifs)
    symmetric = tril + array_ops.matrix_transpose(tril)
    diagonal_ones = array_ops.ones(shape=util.pad(batch_shape,
                                                  axis=0,
                                                  back=True,
                                                  value=num_rows),
                                   dtype=dtype)
    return array_ops.matrix_set_diag(symmetric, diagonal_ones)
 def _assertions(self, x):
   if not self.validate_args:
     return []
   shape = array_ops.shape(x)
   is_matrix = check_ops.assert_rank_at_least(
       x, 2, message="Input must have rank at least 2.")
   is_square = check_ops.assert_equal(
       shape[-2], shape[-1], message="Input must be a square matrix.")
   above_diagonal = array_ops.matrix_band_part(
       array_ops.matrix_set_diag(
           x, array_ops.zeros(shape[:-1], dtype=dtypes.float32)),
       0, -1)
   is_lower_triangular = check_ops.assert_equal(
       above_diagonal, array_ops.zeros_like(above_diagonal),
       message="Input must be lower triangular.")
   # A lower triangular matrix is nonsingular iff all its diagonal entries are
   # nonzero.
   diag_part = array_ops.matrix_diag_part(x)
   is_nonsingular = check_ops.assert_none_equal(
       diag_part, array_ops.zeros_like(diag_part),
       message="Input must have all diagonal entries nonzero.")
   return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
Пример #53
0
def clip_covariance(
    covariance_matrix, maximum_variance_ratio, minimum_variance):
  """Enforce constraints on a covariance matrix to improve numerical stability.

  Args:
    covariance_matrix: A [..., N, N] batch of covariance matrices.
    maximum_variance_ratio: The maximum allowed ratio of two diagonal
      entries. Any entries lower than the maximum entry divided by this ratio
      will be set to that value.
    minimum_variance: A floor for diagonal entries in the returned matrix.
  Returns:
    A new covariance matrix with the requested constraints enforced. If the
    input was positive definite, the output will be too.
  """
  # TODO(allenl): Smarter scaling here so that correlations are preserved when
  # fiddling with diagonal elements.
  diagonal = array_ops.matrix_diag_part(covariance_matrix)
  maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True)
  new_diagonal = gen_math_ops.maximum(
      diagonal, maximum / maximum_variance_ratio)
  return array_ops.matrix_set_diag(
      covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
Пример #54
0
 def loop_fn(i):
   matrix_i = array_ops.gather(matrices, i)
   diag_i = array_ops.gather(diags, i)
   results = [
       array_ops.matrix_set_diag(matrix_i, diag_i),
       array_ops.matrix_set_diag(matrices[0, ...], diag_i),
       array_ops.matrix_set_diag(matrix_i, diags[0, ...])
   ]
   if compat.forward_compatible(2019, 10, 31):
     k = (-1, 1)
     band_i = array_ops.gather(bands, i)
     results.extend([
         array_ops.matrix_set_diag(matrix_i, band_i, k=k),
         array_ops.matrix_set_diag(matrices[0, ...], band_i, k=k),
         array_ops.matrix_set_diag(matrix_i, bands[0, ...], k=k)
     ])
   return results
Пример #55
0
def _MatrixSetDiagGradV3(op, grad):
    """Gradient for MatrixSetDiagV3."""
    diag_shape = op.inputs[1].get_shape()
    align = op.get_attr("align")
    if not diag_shape.is_fully_defined():
        # Need to know the values of `d_lower` and `d_upper` to infer diag_shape.
        grad_shape = array_ops.shape(grad)
        batch_shape = grad_shape[:-2]
        matrix_shape = grad_shape[-2:]
        diag_index = array_ops.reshape(op.inputs[2],
                                       [-1])  # Converts to vector.
        d_lower = diag_index[0]
        d_upper = diag_index[-1]  # Works both when len(diag_index) is 1 and 2.
        y_offset = control_flow_ops.cond(math_ops.less(d_upper, 0),
                                         lambda: d_upper, lambda: 0)
        x_offset = control_flow_ops.cond(math_ops.greater(d_lower, 0),
                                         lambda: -d_lower, lambda: 0)

        max_diag_len = math_ops.minimum(matrix_shape[0] + y_offset,
                                        matrix_shape[1] + x_offset)
        # pylint: disable=g-long-lambda
        # pyformat: disable
        postfix = control_flow_ops.cond(
            math_ops.equal(d_lower, d_upper),
            lambda: ops.convert_to_tensor([max_diag_len]), lambda: ops.
            convert_to_tensor([d_upper - d_lower + 1, max_diag_len]))
        # pyformat: enable
        # pylint: enable=g-long-lambda
        diag_shape = array_ops.concat([batch_shape, postfix], 0)

    grad_input = array_ops.matrix_set_diag(grad,
                                           array_ops.zeros(diag_shape,
                                                           dtype=grad.dtype),
                                           k=op.inputs[2],
                                           align=align)
    grad_diag = array_ops.matrix_diag_part(grad, k=op.inputs[2], align=align)
    return (grad_input, grad_diag, None)
    def add_to_tensor(self, mat, name="add_to_tensor"):
        """Add matrix represented by this operator to `mat`.  Equiv to `I + mat`.

    Args:
      mat:  `Tensor` with same `dtype` and shape broadcastable to `self`.
      name:  A name to give this `Op`.

    Returns:
      A `Tensor` with broadcast shape and same `dtype` as `self`.
    """
        with self._name_scope(name):  # pylint: disable=not-callable
            # Shape [B1,...,Bb, 1]
            multiplier_vector = array_ops.expand_dims(self.multiplier, -1)

            # Shape [C1,...,Cc, M, M]
            mat = ops.convert_to_tensor_v2_with_dispatch(mat, name="mat")

            # Shape [C1,...,Cc, M]
            mat_diag = array_ops.matrix_diag_part(mat)

            # multiplier_vector broadcasts here.
            new_diag = multiplier_vector + mat_diag

            return array_ops.matrix_set_diag(mat, new_diag)
Пример #57
0
  def operator_and_matrix(
      self, build_info, dtype, use_placeholder,
      ensure_self_adjoint_and_pd=False):
    shape = list(build_info.shape)
    reflection_axis = linear_operator_test_util.random_sign_uniform(
        shape[:-1], minval=1., maxval=2., dtype=dtype)
    # Make sure unit norm.
    reflection_axis = reflection_axis / linalg_ops.norm(
        reflection_axis, axis=-1, keepdims=True)

    lin_op_reflection_axis = reflection_axis

    if use_placeholder:
      lin_op_reflection_axis = array_ops.placeholder_with_default(
          reflection_axis, shape=None)

    operator = householder.LinearOperatorHouseholder(lin_op_reflection_axis)

    mat = reflection_axis[..., array_ops.newaxis]
    matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True)
    matrix = array_ops.matrix_set_diag(
        matrix, 1. + array_ops.matrix_diag_part(matrix))

    return operator, matrix
Пример #58
0
def random_tril_matrix(shape,
                       dtype,
                       force_well_conditioned=False,
                       remove_upper=True):
    """[batch] lower triangular matrix.

  Args:
    shape:  `TensorShape` or Python `list`.  Shape of the returned matrix.
    dtype:  `TensorFlow` `dtype` or Python dtype
    force_well_conditioned:  Python `bool`. If `True`, returned matrix will have
      eigenvalues with modulus in `(1, 2)`.  Otherwise, eigenvalues are unit
      normal random variables.
    remove_upper:  Python `bool`.
      If `True`, zero out the strictly upper triangle.
      If `False`, the lower triangle of returned matrix will have desired
      properties, but will not have the strictly upper triangle zero'd out.

  Returns:
    `Tensor` with desired shape and dtype.
  """
    with ops.name_scope("random_tril_matrix"):
        # Totally random matrix.  Has no nice properties.
        tril = random_normal(shape, dtype=dtype)
        if remove_upper:
            tril = array_ops.matrix_band_part(tril, -1, 0)

        # Create a diagonal with entries having modulus in [1, 2].
        if force_well_conditioned:
            maxval = ops.convert_to_tensor(np.sqrt(2.), dtype=dtype.real_dtype)
            diag = random_sign_uniform(shape[:-1],
                                       dtype=dtype,
                                       minval=1.,
                                       maxval=maxval)
            tril = array_ops.matrix_set_diag(tril, diag)

        return tril
 def _covariance(self):
     p = self.probs
     ret = -math_ops.matmul(p[..., None], p[..., None, :])
     return array_ops.matrix_set_diag(ret, self._variance())