def testInvalidShapeAtEval(self): with self.session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) with self.assertRaisesOpError("input must be at least 2-dim"): array_ops.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0}) with self.assertRaisesOpError( r"but received input shape: \[1,1\] and diagonal shape: \[\]"): array_ops.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0})
def testRectangular(self): with self.session(use_gpu=True): v = np.array([3.0, 4.0]) mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0]]) expected = np.array([[3.0, 1.0, 0.0], [1.0, 4.0, 1.0]]) output = array_ops.matrix_set_diag(mat, v) self.assertEqual((2, 3), output.get_shape()) self.assertAllEqual(expected, self.evaluate(output)) v = np.array([3.0, 4.0]) mat = np.array([[0.0, 1.0], [1.0, 0.0], [1.0, 1.0]]) expected = np.array([[3.0, 1.0], [1.0, 4.0], [1.0, 1.0]]) output = array_ops.matrix_set_diag(mat, v) self.assertEqual((3, 2), output.get_shape()) self.assertAllEqual(expected, self.evaluate(output))
def _covariance(self): p = self.probs * array_ops.ones_like( self.total_count)[..., array_ops.newaxis] return array_ops.matrix_set_diag( -math_ops.matmul(self._mean_val[..., array_ops.newaxis], p[..., array_ops.newaxis, :]), # outer product self._variance())
def _to_dense(self): normalized_axis = self.reflection_axis / linalg.norm( self.reflection_axis, axis=-1, keepdims=True) mat = normalized_axis[..., array_ops.newaxis] matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True) return array_ops.matrix_set_diag( matrix, 1. + array_ops.matrix_diag_part(matrix))
def _sample_n(self, n, seed): batch_shape = self.batch_shape_tensor() event_shape = self.event_shape_tensor() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat([[n], batch_shape, event_shape], 0) # Complexity: O(nbk**2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) expanded_df = self.df * array_ops.ones( self.scale_operator.batch_shape_tensor(), dtype=self.df.dtype.base_dtype) g = random_ops.random_gamma(shape=[n], alpha=self._multi_gamma_sequence( 0.5 * expanded_df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk**2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk**2) perm = array_ops.concat([math_ops.range(1, ndims), [0]], 0) x = array_ops.transpose(x, perm) shape = array_ops.concat([batch_shape, [event_shape[0]], [-1]], 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for LinearOperatorDiag, each matmul is O(k**2), so # this complexity is O(nbk**2). For LinearOperatorLowerTriangular, # each matmul is O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator.matmul(x) # Undo make batch-op ready. # Complexity: O(nbk**2) shape = array_ops.concat([batch_shape, event_shape, [n]], 0) x = array_ops.reshape(x, shape) perm = array_ops.concat([[ndims - 1], math_ops.range(0, ndims - 1)], 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def random_tril_matrix(shape, dtype, force_well_conditioned=False, remove_upper=True): """[batch] lower triangular matrix. Args: shape: `TensorShape` or Python `list`. Shape of the returned matrix. dtype: `TensorFlow` `dtype` or Python dtype force_well_conditioned: Python `bool`. If `True`, returned matrix will have eigenvalues with modulus in `(1, 2)`. Otherwise, eigenvalues are unit normal random variables. remove_upper: Python `bool`. If `True`, zero out the strictly upper triangle. If `False`, the lower triangle of returned matrix will have desired properties, but will not have the strictly upper triangle zero'd out. Returns: `Tensor` with desired shape and dtype. """ with ops.name_scope("random_tril_matrix"): # Totally random matrix. Has no nice properties. tril = random_normal(shape, dtype=dtype) if remove_upper: tril = array_ops.matrix_band_part(tril, -1, 0) # Create a diagonal with entries having modulus in [1, 2]. if force_well_conditioned: maxval = ops.convert_to_tensor(np.sqrt(2.), dtype=dtype.real_dtype) diag = random_sign_uniform( shape[:-1], dtype=dtype, minval=1., maxval=maxval) tril = array_ops.matrix_set_diag(tril, diag) return tril
def _variance(self): p = self.p * array_ops.expand_dims(array_ops.ones_like(self.n), -1) outer_prod = math_ops.batch_matmul( array_ops.expand_dims(self._mean_val, -1), array_ops.expand_dims(p, -2)) return array_ops.matrix_set_diag(-outer_prod, self._mean_val - self._mean_val * p)
def matrix_diag_transform(matrix, transform=None, name=None): """Transform diagonal of [batch-]matrix, leave rest of matrix unchanged. Create a trainable covariance defined by a Cholesky factor: ```python # Transform network layer into 2 x 2 array. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) # Make the diagonal positive. If the upper triangle was zero, this would be a # valid Cholesky factor. chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # OperatorPDCholesky ignores the upper triangle. operator = OperatorPDCholesky(chol) ``` Example of heteroskedastic 2-D linear regression. ```python # Get a trainable Cholesky factor. matrix_values = tf.contrib.layers.fully_connected(activations, 4) matrix = tf.reshape(matrix_values, (batch_size, 2, 2)) chol = matrix_diag_transform(matrix, transform=tf.nn.softplus) # Get a trainable mean. mu = tf.contrib.layers.fully_connected(activations, 2) # This is a fully trainable multivariate normal! dist = tf.contrib.distributions.MVNCholesky(mu, chol) # Standard log loss. Minimizing this will "train" mu and chol, and then dist # will be a distribution predicting labels as multivariate Gaussians. loss = -1 * tf.reduce_mean(dist.log_prob(labels)) ``` Args: matrix: Rank `R` `Tensor`, `R >= 2`, where the last two dimensions are equal. transform: Element-wise function mapping `Tensors` to `Tensors`. To be applied to the diagonal of `matrix`. If `None`, `matrix` is returned unchanged. Defaults to `None`. name: A name to give created ops. Defaults to "matrix_diag_transform". Returns: A `Tensor` with same shape and `dtype` as `matrix`. """ with ops.name_scope(name, "matrix_diag_transform", [matrix]): matrix = ops.convert_to_tensor(matrix, name="matrix") if transform is None: return matrix # Replace the diag with transformed diag. diag = array_ops.matrix_diag_part(matrix) transformed_diag = transform(diag) transformed_mat = array_ops.matrix_set_diag(matrix, transformed_diag) return transformed_mat
def _variance(self): scale = self.alpha_sum * math_ops.sqrt(1. + self.alpha_sum) alpha = self.alpha / scale outer_prod = -math_ops.batch_matmul( array_ops.expand_dims(alpha, dim=-1), # column array_ops.expand_dims(alpha, dim=-2)) # row return array_ops.matrix_set_diag(outer_prod, alpha * (self.alpha_sum / scale - alpha))
def _verifyLu(self, x, output_idx_type=dtypes.int64): # Verify that Px = LU. lu, perm = linalg_ops.lu(x, output_idx_type=output_idx_type) # Prepare the lower factor of shape num_rows x num_rows lu_shape = np.array(lu.shape.as_list()) batch_shape = lu_shape[:-2] num_rows = lu_shape[-2] num_cols = lu_shape[-1] lower = array_ops.matrix_band_part(lu, -1, 0) if num_rows > num_cols: eye = linalg_ops.eye( num_rows, batch_shape=batch_shape, dtype=lower.dtype) lower = array_ops.concat([lower, eye[..., num_cols:]], axis=-1) elif num_rows < num_cols: lower = lower[..., :num_rows] # Fill the diagonal with ones. ones_diag = array_ops.ones( np.append(batch_shape, num_rows), dtype=lower.dtype) lower = array_ops.matrix_set_diag(lower, ones_diag) # Prepare the upper factor. upper = array_ops.matrix_band_part(lu, 0, -1) verification = math_ops.matmul(lower, upper) # Permute the rows of product of the Cholesky factors. if num_rows > 0: # Reshape the product of the triangular factors and permutation indices # to a single batch dimension. This makes it easy to apply # invert_permutation and gather_nd ops. perm_reshaped = array_ops.reshape(perm, [-1, num_rows]) verification_reshaped = array_ops.reshape(verification, [-1, num_rows, num_cols]) # Invert the permutation in each batch. inv_perm_reshaped = map_fn.map_fn(array_ops.invert_permutation, perm_reshaped) batch_size = perm_reshaped.shape.as_list()[0] # Prepare the batch indices with the same shape as the permutation. # The corresponding batch index is paired with each of the `num_rows` # permutation indices. batch_indices = math_ops.cast( array_ops.broadcast_to( math_ops.range(batch_size)[:, None], perm_reshaped.shape), dtype=output_idx_type) permuted_verification_reshaped = array_ops.gather_nd( verification_reshaped, array_ops.stack([batch_indices, inv_perm_reshaped], axis=-1)) # Reshape the verification matrix back to the original shape. verification = array_ops.reshape(permuted_verification_reshaped, lu_shape) self._verifyLuBase(x, lower, upper, perm, verification, output_idx_type)
def sign_magnitude_positive_definite( raw, off_diagonal_scale=0., overall_scale=0.): """Constructs a positive definite matrix from an unconstrained input matrix. We want to keep the whole matrix on a log scale, but also allow off-diagonal elements to be negative, so the sign of off-diagonal elements is modeled separately from their magnitude (using the lower and upper triangles respectively). Specifically: for i < j, we have: output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) * exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2) output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2) output = output_cholesky^T * output_cholesky where raw, off_diagonal_scale, and overall_scale are un-constrained real-valued variables. The resulting values are stable around zero due to the exponential (and the softsign keeps the function smooth). Args: raw: A [..., M, M] Tensor. off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative scale of off-diagonal values in the output matrix. overall_scale: A scalar or [...] shaped Tensor controlling the overall scale of the output matrix. Returns: The `output` matrix described above, a [..., M, M] positive definite matrix. """ raw = ops.convert_to_tensor(raw) diagonal = array_ops.matrix_diag_part(raw) def _right_pad_with_ones(tensor, target_rank): # Allow broadcasting even if overall_scale and off_diagonal_scale have batch # dimensions tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype) return array_ops.reshape(tensor, array_ops.concat( [ array_ops.shape(tensor), array_ops.ones( [target_rank - array_ops.rank(tensor)], dtype=target_rank.dtype) ], axis=0)) # We divide the log values by 2 to compensate for the squaring that happens # when transforming Cholesky factors into positive definite matrices. sign_magnitude = (gen_math_ops.exp( (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) + _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) * nn.softsign(array_ops.matrix_transpose(raw))) sign_magnitude.set_shape(raw.get_shape()) cholesky_factor = array_ops.matrix_set_diag( input=array_ops.matrix_band_part(sign_magnitude, 0, -1), diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones( overall_scale, array_ops.rank(diagonal))) / 2.)) return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
def testSquare(self): with self.session(use_gpu=True): v = np.array([1.0, 2.0, 3.0]) mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0]]) mat_set_diag = np.array([[1.0, 1.0, 0.0], [1.0, 2.0, 1.0], [1.0, 1.0, 3.0]]) output = array_ops.matrix_set_diag(mat, v) self.assertEqual((3, 3), output.get_shape()) self.assertAllEqual(mat_set_diag, self.evaluate(output))
def _sample_n(self, n, seed): batch_shape = self.batch_shape() event_shape = self.event_shape() batch_ndims = array_ops.shape(batch_shape)[0] ndims = batch_ndims + 3 # sample_ndims=1, event_ndims=2 shape = array_ops.concat(((n,), batch_shape, event_shape), 0) # Complexity: O(nbk^2) x = random_ops.random_normal(shape=shape, mean=0., stddev=1., dtype=self.dtype, seed=seed) # Complexity: O(nbk) # This parametrization is equivalent to Chi2, i.e., # ChiSquared(k) == Gamma(alpha=k/2, beta=1/2) g = random_ops.random_gamma(shape=(n,), alpha=self._multi_gamma_sequence( 0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, seed=distribution_util.gen_new_seed( seed, "wishart")) # Complexity: O(nbk^2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. # Complexity: O(nbk) x = array_ops.matrix_set_diag(x, math_ops.sqrt(g)) # Make batch-op ready. # Complexity: O(nbk^2) perm = array_ops.concat((math_ops.range(1, ndims), (0,)), 0) x = array_ops.transpose(x, perm) shape = array_ops.concat((batch_shape, (event_shape[0], -1)), 0) x = array_ops.reshape(x, shape) # Complexity: O(nbM) where M is the complexity of the operator solving a # vector system. E.g., for OperatorPDDiag, each matmul is O(k^2), so # this complexity is O(nbk^2). For OperatorPDCholesky, each matmul is # O(k^3) so this step has complexity O(nbk^3). x = self.scale_operator_pd.sqrt_matmul(x) # Undo make batch-op ready. # Complexity: O(nbk^2) shape = array_ops.concat((batch_shape, event_shape, (n,)), 0) x = array_ops.reshape(x, shape) perm = array_ops.concat(((ndims - 1,), math_ops.range(0, ndims - 1)), 0) x = array_ops.transpose(x, perm) if not self.cholesky_input_output_matrices: # Complexity: O(nbk^3) x = math_ops.matmul(x, x, adjoint_b=True) return x
def _GradWithInverseL(l, l_inverse, grad): middle = math_ops.matmul(l, grad, adjoint_a=True) middle = array_ops.matrix_set_diag(middle, 0.5 * array_ops.matrix_diag_part(middle)) middle = array_ops.matrix_band_part(middle, -1, 0) grad_a = math_ops.matmul( math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse) grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a)) return grad_a * 0.5
def eye( num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. ```python # Construct one identity matrix. tf.eye(2) ==> [[1., 0.], [0., 1.]] # Construct a batch of 3 identity matricies, each 2 x 2. # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. batch_identity = tf.eye(2, batch_shape=[3]) # Construct one 2 x 3 "identity" matrix tf.eye(2, num_columns=3) ==> [[ 1., 0., 0.], [ 0., 1., 0.]] ``` Args: num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows in each batch matrix. num_columns: Optional non-negative `int32` scalar `Tensor` giving the number of columns in each batch matrix. Defaults to `num_rows`. batch_shape: `int32` `Tensor`. If provided, returned `Tensor` will have leading batch dimensions of this shape. dtype: The type of an element in the resulting `Tensor` name: A name for this `Op`. Defaults to "eye". Returns: A `Tensor` of shape `batch_shape + [num_rows, num_columns]` """ with ops.name_scope( name, default_name="eye", values=[num_rows, num_columns, batch_shape]): batch_shape = [] if batch_shape is None else batch_shape batch_shape = ops.convert_to_tensor( batch_shape, name="shape", dtype=dtypes.int32) if num_columns is None: diag_size = num_rows else: diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat_v2((batch_shape, [diag_size]), 0) diag_ones = array_ops.ones(diag_shape, dtype=dtype) if num_columns is None: return array_ops.matrix_diag(diag_ones) else: shape = array_ops.concat_v2((batch_shape, [num_rows, num_columns]), 0) zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def testRectangularBatch(self): with self.session(use_gpu=True): v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]]) mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]], [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]]) mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0, 0.0]], [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0]]]) output = array_ops.matrix_set_diag(mat_batch, v_batch) self.assertEqual((2, 2, 3), output.get_shape()) self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
def _variance(self): alpha_sum = array_ops.expand_dims(self.alpha_sum, -1) normalized_alpha = self.alpha / alpha_sum variance = -math_ops.matmul( array_ops.expand_dims(normalized_alpha, -1), array_ops.expand_dims(normalized_alpha, -2)) variance = array_ops.matrix_set_diag(variance, normalized_alpha * (1. - normalized_alpha)) shared_factor = (self.n * (alpha_sum + self.n) / (alpha_sum + 1) * array_ops.ones_like(self.alpha)) variance *= array_ops.expand_dims(shared_factor, -1) return variance
def _preprocess_tril(self, identity_multiplier, diag, tril, event_ndims): """Helper to preprocess a lower triangular matrix.""" tril = array_ops.matrix_band_part(tril, -1, 0) # Zero out TriU. if identity_multiplier is None and diag is None: return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims) new_diag = array_ops.matrix_diag_part(tril) if identity_multiplier is not None: new_diag += identity_multiplier if diag is not None: new_diag += diag tril = array_ops.matrix_set_diag(tril, new_diag) return self._process_matrix(tril, min_rank=2, event_ndims=event_ndims)
def loop_fn(i): matrix_i = array_ops.gather(matrices, i) diag_i = array_ops.gather(diags, i) results = [ array_ops.matrix_set_diag(matrix_i, diag_i), array_ops.matrix_set_diag(matrices[0, ...], diag_i), array_ops.matrix_set_diag(matrix_i, diags[0, ...]), ] k = (-1, 1) band_i = array_ops.gather(bands, i) for align in ["RIGHT_LEFT", "LEFT_RIGHT"]: results.extend([ array_ops.matrix_set_diag(matrix_i, band_i, k=k, align=align), array_ops.matrix_set_diag(matrices[0, ...], band_i, k=k, align=align), array_ops.matrix_set_diag(matrix_i, bands[0, ...], k=k, align=align) ]) return results
def _SelfAdjointEigV2Grad(op, grad_e, grad_v): """Gradient for SelfAdjointEigV2.""" e = op.outputs[0] compute_v = op.get_attr("compute_v") # a = op.inputs[0], which satisfies # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i] with ops.control_dependencies([grad_e, grad_v]): if compute_v: v = op.outputs[1] # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate eigenvalues, the corresponding eigenvectors are only defined # up to arbitrary rotation in a (k-dimensional) subspace. f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) grad_a = math_ops.matmul( v, math_ops.matmul( array_ops.matrix_diag(grad_e) + f * math_ops.matmul(v, grad_v, adjoint_a=True), v, adjoint_b=True)) else: _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) grad_a = math_ops.matmul(v, math_ops.matmul( array_ops.matrix_diag(grad_e), v, adjoint_b=True)) # The forward op only depends on the lower triangular part of a, so here we # symmetrize and take the lower triangle grad_a = array_ops.matrix_band_part( grad_a + math_ops.conj(array_ops.matrix_transpose(grad_a)), -1, 0) grad_a = array_ops.matrix_set_diag(grad_a, 0.5 * array_ops.matrix_diag_part(grad_a)) return grad_a
def _EigGrad(op, grad_e, grad_v): """Gradient for Eig. Based on eq. 4.77 from paper by Christoph Boeddeker et al. https://arxiv.org/abs/1701.00392 See also "Computation of eigenvalue and eigenvector derivatives for a general complex-valued eigensystem" by Nico van der Aa. As for now only distinct eigenvalue case is considered. """ e = op.outputs[0] compute_v = op.get_attr("compute_v") # a = op.inputs[0], which satisfies # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i] with ops.control_dependencies([grad_e, grad_v]): if compute_v: v = op.outputs[1] vt = _linalg.adjoint(v) # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate eigenvalues, the corresponding eigenvectors are only defined # up to arbitrary rotation in a (k-dimensional) subspace. f = array_ops.matrix_set_diag( _SafeReciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) f = math_ops.conj(f) vgv = math_ops.matmul(vt, grad_v) mid = array_ops.matrix_diag(grad_e) diag_grad_part = array_ops.matrix_diag( array_ops.matrix_diag_part( math_ops.cast(math_ops.real(vgv), vgv.dtype))) mid += f * ( vgv - math_ops.matmul(math_ops.matmul(vt, v), diag_grad_part)) # vt is formally invertible as long as the original matrix is # diagonalizable. However, in practice, vt may # be ill-conditioned when matrix original matrix is close to # non-diagonalizable one grad_a = linalg_ops.matrix_solve(vt, math_ops.matmul(mid, vt)) else: _, v = linalg_ops.eig(op.inputs[0]) vt = _linalg.adjoint(v) # vt is formally invertible as long as the original matrix is # diagonalizable. However, in practice, vt may # be ill-conditioned when matrix original matrix is close to # non-diagonalizable one grad_a = linalg_ops.matrix_solve( vt, math_ops.matmul(array_ops.matrix_diag(grad_e), vt)) return math_ops.cast(grad_a, op.inputs[0].dtype)
def add_to_tensor(self, mat, name="add_to_tensor"): """Add matrix represented by this operator to `mat`. Equiv to `I + mat`. Args: mat: `Tensor` with same `dtype` and shape broadcastable to `self`. name: A name to give this `Op`. Returns: A `Tensor` with broadcast shape and same `dtype` as `self`. """ with self._name_scope(name): mat = ops.convert_to_tensor(mat, name="mat") mat_diag = array_ops.matrix_diag_part(mat) new_diag = 1 + mat_diag return array_ops.matrix_set_diag(mat, new_diag)
def testRectangularBatch(self): with self.test_session(use_gpu=True): v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]]) mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]], [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]]) mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0, 0.0]], [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0]]]) output = array_ops.matrix_set_diag(mat_batch, v_batch) self.assertEqual((2, 2, 3), output.get_shape()) self.assertAllEqual(mat_set_diag_batch, output.eval())
def _SelfAdjointEigV2Grad(op, grad_e, grad_v): """Gradient for SelfAdjointEigV2.""" e = op.outputs[0] compute_v = op.get_attr("compute_v") # a = op.inputs[0], which satisfies # a[...,:,:] * v[...,:,i] = e[...,i] * v[...,i] with ops.control_dependencies([grad_e, grad_v]): if compute_v: v = op.outputs[1] # Construct the matrix f(i,j) = (i != j ? 1 / (e_i - e_j) : 0). # Notice that because of the term involving f, the gradient becomes # infinite (or NaN in practice) when eigenvalues are not unique. # Mathematically this should not be surprising, since for (k-fold) # degenerate eigenvalues, the corresponding eigenvectors are only defined # up to arbitrary rotation in a (k-dimensional) subspace. f = array_ops.matrix_set_diag( math_ops.reciprocal( array_ops.expand_dims(e, -2) - array_ops.expand_dims(e, -1)), array_ops.zeros_like(e)) grad_a = math_ops.matmul( v, math_ops.matmul(array_ops.matrix_diag(grad_e) + f * math_ops.matmul(v, grad_v, adjoint_a=True), v, adjoint_b=True)) else: _, v = linalg_ops.self_adjoint_eig(op.inputs[0]) grad_a = math_ops.matmul( v, math_ops.matmul(array_ops.matrix_diag(grad_e), v, adjoint_b=True)) # The forward op only depends on the lower triangular part of a, so here we # symmetrize and take the lower triangle grad_a = array_ops.matrix_band_part(grad_a + _linalg.adjoint(grad_a), -1, 0) grad_a = array_ops.matrix_set_diag( grad_a, 0.5 * array_ops.matrix_diag_part(grad_a)) return grad_a
def _MatrixDiagPartV2Grad(op, grad): """Gradient for MatrixDiagPartV2.""" matrix_shape = op.inputs[0].get_shape()[-2:] if matrix_shape.is_fully_defined(): return array_ops.matrix_diag( grad, k=op.inputs[1], num_rows=matrix_shape[0], num_cols=matrix_shape[1]), None, None else: return array_ops.matrix_set_diag( array_ops.zeros_like(op.inputs[0]), grad, k=op.inputs[1]), None, None
def _MatrixSetDiagGrad(op, grad): diag_shape = op.inputs[1].get_shape() diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1]) diag_shape = diag_shape.merge_with(grad.get_shape()[:-1]) if diag_shape.is_fully_defined(): diag_shape = diag_shape.as_list() else: diag_shape = array_ops.shape(grad) diag_shape = array_ops.slice(diag_shape, [0], [array_ops.rank(grad) - 1]) grad_input = array_ops.matrix_set_diag( grad, array_ops.zeros( diag_shape, dtype=grad.dtype)) grad_diag = array_ops.matrix_diag_part(grad) return (grad_input, grad_diag)
def _testGrad(self, input_shape, diag_shape, diags): with self.session(use_gpu=True): x = constant_op.constant( np.random.rand(*input_shape), dtype=dtypes_lib.float32) x_diag = constant_op.constant( np.random.rand(*diag_shape), dtype=dtypes_lib.float32) # LINT.IfChange if compat.forward_compatible(2019, 11, 30): # LINT.ThenChange(//tensorflow/python/ops/array_ops.py) y = array_ops.matrix_set_diag(x, x_diag, k=diags) else: y = array_ops.matrix_set_diag(x, x_diag) error_x = gradient_checker.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x, 1e-4) error_x_diag = gradient_checker.compute_gradient_error( x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x_diag, 1e-4)
def _MatrixSetDiagGrad(op, grad): diag_shape = op.inputs[1].get_shape() diag_shape = diag_shape.merge_with(op.inputs[0].get_shape()[:-1]) diag_shape = diag_shape.merge_with(grad.get_shape()[:-1]) if diag_shape.is_fully_defined(): diag_shape = diag_shape.as_list() else: diag_shape = array_ops.shape(grad) diag_shape = array_ops.slice(diag_shape, [0], [array_ops.rank(grad) - 1]) grad_input = array_ops.matrix_set_diag( grad, array_ops.zeros(diag_shape, dtype=grad.dtype)) grad_diag = array_ops.matrix_diag_part(grad) return (grad_input, grad_diag)
def test_assert_non_singular_raises_if_cond_too_big_but_finite(self): with self.cached_session(): tril = linear_operator_test_util.random_tril_matrix( shape=(50, 50), dtype=np.float32) diag = np.logspace(-2, 2, 50).astype(np.float32) tril = array_ops.matrix_set_diag(tril, diag) matrix = math_ops.matmul(tril, tril, transpose_b=True).eval() operator = linalg.LinearOperatorFullMatrix(matrix) with self.assertRaisesOpError("Singular matrix"): # Ensure that we have finite condition number...just HUGE. cond = np.linalg.cond(matrix) self.assertTrue(np.isfinite(cond)) self.assertGreater(cond, 1e12) operator.assert_non_singular().run()
def testRectangularBatch(self): with self.session(use_gpu=True): v_batch = np.array([[-1.0, -2.0], [-4.0, -5.0]]) mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0]], [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0]]]) mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, -2.0, 0.0]], [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0]]]) output = array_ops.matrix_set_diag(mat_batch, v_batch) self.assertEqual((2, 2, 3), output.get_shape()) self.assertAllEqual(mat_set_diag_batch, self.evaluate(output)) if compat.forward_compatible(2019, 7, 4): # Diagonal bands. for _, tests in [tall_cases(), fat_cases()]: for diags, pair in tests.items(): vecs, banded_mat = pair mask = banded_mat == 0 input_mat = np.random.randint(10, size=mask.shape) solution = input_mat * mask + banded_mat output = array_ops.matrix_set_diag(input_mat, vecs, k=diags) self.assertEqual(output.get_shape(), solution.shape) self.assertAllEqual(output.eval(), solution)
def test_assert_non_singular_raises_if_cond_too_big_but_finite(self): with self.test_session(): tril = linear_operator_test_util.random_tril_matrix( shape=(50, 50), dtype=np.float32) diag = np.logspace(-2, 2, 50).astype(np.float32) tril = array_ops.matrix_set_diag(tril, diag) matrix = math_ops.matmul(tril, tril, transpose_b=True).eval() operator = linalg.LinearOperatorFullMatrix(matrix) with self.assertRaisesOpError("Singular matrix"): # Ensure that we have finite condition number...just HUGE. cond = np.linalg.cond(matrix) self.assertTrue(np.isfinite(cond)) self.assertGreater(cond, 1e12) operator.assert_non_singular().run()
def _testSquareBatch(self, dtype): with self.cached_session(use_gpu=True): v_batch = np.array([[-1.0, 0.0, -3.0], [-4.0, -5.0, -6.0]]).astype(dtype) mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0], [1.0, 0.0, 3.0]], [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0], [2.0, 0.0, 6.0]]]).astype(dtype) mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, 0.0, 0.0], [1.0, 0.0, -3.0]], [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0], [2.0, 0.0, -6.0]]]).astype(dtype) output = array_ops.matrix_set_diag(mat_batch, v_batch) self.assertEqual((2, 3, 3), output.get_shape()) self.assertAllEqual(mat_set_diag_batch, self.evaluate(output))
def testSquare(self): with self.session(use_gpu=True): v = np.array([1.0, 2.0, 3.0]) mat = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [1.0, 1.0, 1.0]]) mat_set_diag = np.array([[1.0, 1.0, 0.0], [1.0, 2.0, 1.0], [1.0, 1.0, 3.0]]) output = array_ops.matrix_set_diag(mat, v) self.assertEqual((3, 3), output.get_shape()) self.assertAllEqual(mat_set_diag, self.evaluate(output)) # LINT.IfChange if compat.forward_compatible(2019, 11, 30): # LINT.ThenChange(//tensorflow/python/ops/array_ops.py) # Diagonal bands. _, tests = square_cases() for diags, pair in tests.items(): vecs, banded_mat = pair mask = banded_mat[0] == 0 input_mat = np.random.randint(10, size=mask.shape) solution = input_mat * mask + banded_mat[0] output = array_ops.matrix_set_diag(input_mat, vecs[0], k=diags) self.assertEqual(output.get_shape(), solution.shape) self.assertAllEqual(output.eval(), solution)
def add_to_tensor(self, mat, name="add_to_tensor"): """Add matrix represented by this operator to `mat`. Equiv to `I + mat`. Args: mat: `Tensor` with same `dtype` and shape broadcastable to `self`. name: A name to give this `Op`. Returns: A `Tensor` with broadcast shape and same `dtype` as `self`. """ with self._name_scope(name, values=[mat]): mat = ops.convert_to_tensor(mat, name="mat") mat_diag = array_ops.matrix_diag_part(mat) new_diag = 1 + mat_diag return array_ops.matrix_set_diag(mat, new_diag)
def _testSquareBatch(self, dtype): with self.cached_session(use_gpu=True): v_batch = np.array([[-1.0, 0.0, -3.0], [-4.0, -5.0, -6.0]]).astype(dtype) mat_batch = np.array([[[1.0, 0.0, 3.0], [0.0, 2.0, 0.0], [1.0, 0.0, 3.0]], [[4.0, 0.0, 4.0], [0.0, 5.0, 0.0], [2.0, 0.0, 6.0]]]).astype(dtype) mat_set_diag_batch = np.array([[[-1.0, 0.0, 3.0], [0.0, 0.0, 0.0], [1.0, 0.0, -3.0]], [[-4.0, 0.0, 4.0], [0.0, -5.0, 0.0], [2.0, 0.0, -6.0]]]).astype(dtype) output = array_ops.matrix_set_diag(mat_batch, v_batch) self.assertEqual((2, 3, 3), output.get_shape()) self.assertAllEqual(mat_set_diag_batch, output.eval())
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. See `linalg_ops.eye`. """ with ops.name_scope( name, default_name='eye', values=[num_rows, num_columns, batch_shape]): is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns # We cannot statically infer what the diagonal size should be: if (isinstance(num_rows, ops.Tensor) or isinstance(num_columns, ops.Tensor)): diag_size = math_ops.minimum(num_rows, num_columns) else: # We can statically infer the diagonal size, and whether it is square. if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.') is_square = num_rows == num_columns diag_size = np.minimum(num_rows, num_columns) # We can not statically infer the shape of the tensor. if isinstance(batch_shape, ops.Tensor) or isinstance(diag_size, ops.Tensor): batch_shape = ops.convert_to_tensor( batch_shape, name='shape', dtype=dtypes.int32) diag_shape = array_ops.concat((batch_shape, [diag_size]), axis=0) if not is_square: shape = array_ops.concat((batch_shape, [num_rows, num_columns]), axis=0) # We can statically infer everything. else: batch_shape = list(batch_shape) diag_shape = batch_shape + [diag_size] if not is_square: shape = batch_shape + [num_rows, num_columns] diag_ones = array_ops.ones(diag_shape, dtype=dtype) if is_square: return array_ops.matrix_diag(diag_ones) else: zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def testGrad(self): shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8)) with self.test_session(use_gpu=True): for shape in shapes: x = constant_op.constant( np.random.rand(*shape), dtype=dtypes_lib.float32) diag_shape = shape[:-2] + (min(shape[-2:]),) x_diag = constant_op.constant( np.random.rand(*diag_shape), dtype=dtypes_lib.float32) y = array_ops.matrix_set_diag(x, x_diag) error_x = gradient_checker.compute_gradient_error( x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x, 1e-4) error_x_diag = gradient_checker.compute_gradient_error( x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x_diag, 1e-4)
def testGrad(self): shapes = ((3, 4, 4), (3, 3, 4), (3, 4, 3), (7, 4, 8, 8)) with self.test_session(use_gpu=self._use_gpu): for shape in shapes: x = constant_op.constant( np.random.rand(*shape), dtype=dtypes_lib.float32) diag_shape = shape[:-2] + (min(shape[-2:]),) x_diag = constant_op.constant( np.random.rand(*diag_shape), dtype=dtypes_lib.float32) y = array_ops.matrix_set_diag(x, x_diag) error_x = gradient_checker.compute_gradient_error( x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x, 1e-4) error_x_diag = gradient_checker.compute_gradient_error( x_diag, x_diag.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error_x_diag, 1e-4)
def testGradWithNoShapeInformation(self): with self.test_session(use_gpu=True) as sess: v = array_ops.placeholder(dtype=dtypes_lib.float32) mat = array_ops.placeholder(dtype=dtypes_lib.float32) grad_input = array_ops.placeholder(dtype=dtypes_lib.float32) output = array_ops.matrix_set_diag(mat, v) grads = gradients_impl.gradients(output, [mat, v], grad_ys=grad_input) grad_input_val = np.random.rand(3, 3).astype(np.float32) grad_vals = sess.run(grads, feed_dict={ v: 2 * np.ones(3), mat: np.ones((3, 3)), grad_input: grad_input_val }) self.assertAllEqual(np.diag(grad_input_val), grad_vals[1]) self.assertAllEqual(grad_input_val - np.diag(np.diag(grad_input_val)), grad_vals[0])
def testGradWithNoShapeInformation(self): with self.test_session(use_gpu=self._use_gpu) as sess: v = array_ops.placeholder(dtype=dtypes_lib.float32) mat = array_ops.placeholder(dtype=dtypes_lib.float32) grad_input = array_ops.placeholder(dtype=dtypes_lib.float32) output = array_ops.matrix_set_diag(mat, v) grads = gradients_impl.gradients(output, [mat, v], grad_ys=grad_input) grad_input_val = np.random.rand(3, 3).astype(np.float32) grad_vals = sess.run(grads, feed_dict={ v: 2 * np.ones(3), mat: np.ones((3, 3)), grad_input: grad_input_val }) self.assertAllEqual(np.diag(grad_input_val), grad_vals[1]) self.assertAllEqual(grad_input_val - np.diag(np.diag(grad_input_val)), grad_vals[0])
def eye(num_rows, num_columns=None, batch_shape=None, dtype=dtypes.float32, name=None): """Construct an identity matrix, or a batch of matrices. See `linalg_ops.eye`. """ with ops.name_scope(name, default_name='eye', values=[num_rows, num_columns, batch_shape]): is_square = num_columns is None batch_shape = [] if batch_shape is None else batch_shape num_columns = num_rows if num_columns is None else num_columns if isinstance(num_rows, ops.Tensor) or isinstance( num_columns, ops.Tensor) or isinstance(batch_shape, ops.Tensor): batch_shape = ops.convert_to_tensor(batch_shape, name='shape', dtype=dtypes.int32) diag_size = math_ops.minimum(num_rows, num_columns) diag_shape = array_ops.concat((batch_shape, [diag_size]), 0) if not is_square: shape = array_ops.concat( (batch_shape, [num_rows, num_columns]), 0) else: if not isinstance(num_rows, compat.integral_types) or not isinstance( num_columns, compat.integral_types): raise TypeError( 'num_rows and num_columns must be positive integer values.' ) batch_shape = [dim for dim in batch_shape] is_square = num_rows == num_columns diag_shape = batch_shape + [np.minimum(num_rows, num_columns)] if not is_square: shape = batch_shape + [num_rows, num_columns] diag_ones = array_ops.ones(diag_shape, dtype=dtype) if is_square: return array_ops.matrix_diag(diag_ones) else: zero_matrix = array_ops.zeros(shape, dtype=dtype) return array_ops.matrix_set_diag(zero_matrix, diag_ones)
def _assertOpOutputMatchesExpected(self, params, solution, high_level=True, rtol=1e-3, atol=1e-5): """Verifies that matrix_set_diag produces `solution` when fed `params`. Args: params: dictionary containing input parameters to matrix_set_diag. solution: numpy array representing the expected output of matrix_set_diag. high_level: call high_level matrix_set_diag rtol: relative tolerance for equality test. atol: absolute tolerance for equality test. """ input = params["input"] # pylint: disable=redefined-builtin diagonal = params["diagonal"] with self.session() as session: for dtype in self.numeric_types - {np.int8, np.uint8}: expected = solution.astype(dtype) with self.test_scope(): params["input"] = array_ops.placeholder(dtype, input.shape, name="input") params["diagonal"] = array_ops.placeholder(dtype, diagonal.shape, name="diagonal") if high_level: # wraps gen_array_ops.matrix_set_diag_v3 output = array_ops.matrix_set_diag(**params) else: # TODO(b/201086188): Remove this case once MatrixDiag V1 is removed. output = gen_array_ops.matrix_set_diag(**params) result = session.run( output, { params["input"]: input.astype(dtype), params["diagonal"]: diagonal.astype(dtype) }) self.assertEqual(output.dtype, expected.dtype) self.assertAllCloseAccordingToType(expected, result, rtol=rtol, atol=atol, bfloat16_rtol=0.03)
def _MatrixSetDiagGrad(op, grad): input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape()) diag_shape = op.inputs[1].get_shape() batch_shape = input_shape[:-2].merge_with(diag_shape[:-1]) matrix_shape = input_shape[-2:] if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined(): diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())] else: with ops.colocate_with(grad): grad_shape = array_ops.shape(grad) grad_rank = array_ops.rank(grad) batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2]) matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2]) min_dim = math_ops.reduce_min(matrix_shape) diag_shape = array_ops.concat([batch_shape, [min_dim]], 0) grad_input = array_ops.matrix_set_diag( grad, array_ops.zeros(diag_shape, dtype=grad.dtype)) grad_diag = array_ops.matrix_diag_part(grad) return (grad_input, grad_diag)
def TriAngSolveCompositeGrad(l, grad): # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1} # Compute ((l^{H} @ grad) * (tril(ones)-1/2*eye)) = middle middle = math_ops.matmul(l, grad, adjoint_a=True) middle = array_ops.matrix_set_diag(middle, 0.5 * array_ops.matrix_diag_part(middle)) middle = array_ops.matrix_band_part(middle, -1, 0) # Compute l^{-H} @ middle = z l_inverse_middle = linalg_ops.matrix_triangular_solve(l, middle, adjoint=True) # We need to compute z @ l^{-1}. With matrix_triangular_solve we # actually compute l^{-H} @ z^{H} = grad. Since we later add grad^{H} # we can ommit the conjugate transpose here. z_h = math_ops.conj(array_ops.matrix_transpose(l_inverse_middle)) grad_a = linalg_ops.matrix_triangular_solve(l, z_h, adjoint=True) grad_a += linalg.adjoint(grad_a) return grad_a * 0.5
def _CholeskyGrad(op, grad): """Gradient for Cholesky.""" # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1} l = op.outputs[0] num_rows = array_ops.shape(l)[-1] batch_shape = array_ops.shape(l)[:-2] l_inverse = linalg_ops.matrix_triangular_solve( l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype)) middle = math_ops.matmul(l, grad, adjoint_a=True) middle = array_ops.matrix_set_diag(middle, 0.5 * array_ops.matrix_diag_part(middle)) middle = array_ops.matrix_band_part(middle, -1, 0) grad_a = math_ops.matmul( math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse) grad_a += math_ops.conj(array_ops.matrix_transpose(grad_a)) return grad_a * 0.5
def _CholeskyGrad(op, grad): """Gradient for Cholesky.""" # Gradient is l^{-H} @ ((l^{H} @ grad) * (tril(ones)-1/2*eye)) @ l^{-1} l = op.outputs[0] num_rows = array_ops.shape(l)[-1] batch_shape = array_ops.shape(l)[:-2] l_inverse = linalg_ops.matrix_triangular_solve( l, linalg_ops.eye(num_rows, batch_shape=batch_shape, dtype=l.dtype)) middle = math_ops.matmul(l, grad, adjoint_a=True) middle = array_ops.matrix_set_diag( middle, 0.5 * array_ops.matrix_diag_part(middle)) middle = array_ops.matrix_band_part(middle, -1, 0) grad_a = math_ops.matmul( math_ops.matmul(l_inverse, middle, adjoint_a=True), l_inverse) grad_a += _linalg.adjoint(grad_a) return grad_a * 0.5
def _MatrixSetDiagGrad(op, grad): input_shape = op.inputs[0].get_shape().merge_with(grad.get_shape()) diag_shape = op.inputs[1].get_shape() batch_shape = input_shape[:-2].merge_with(diag_shape[:-1]) matrix_shape = input_shape[-2:] if batch_shape.is_fully_defined() and matrix_shape.is_fully_defined(): diag_shape = batch_shape.as_list() + [min(matrix_shape.as_list())] else: with ops.colocate_with(grad): grad_shape = array_ops.shape(grad) grad_rank = array_ops.rank(grad) batch_shape = array_ops.slice(grad_shape, [0], [grad_rank - 2]) matrix_shape = array_ops.slice(grad_shape, [grad_rank - 2], [2]) min_dim = math_ops.reduce_min(matrix_shape) diag_shape = array_ops.concat([batch_shape, [min_dim]], 0) grad_input = array_ops.matrix_set_diag( grad, array_ops.zeros( diag_shape, dtype=grad.dtype)) grad_diag = array_ops.matrix_diag_part(grad) return (grad_input, grad_diag)
def testInvalidShapeAtEval(self): with self.session(use_gpu=True): v = array_ops.placeholder(dtype=dtypes_lib.float32) with self.assertRaisesOpError("input must be at least 2-dim"): array_ops.matrix_set_diag(v, [v]).eval(feed_dict={v: 0.0}) with self.assertRaisesOpError("diagonal must be at least 1-dim"): array_ops.matrix_set_diag([[v]], v).eval(feed_dict={v: 0.0}) if compat.forward_compatible(2019, 7, 4): d = array_ops.placeholder(dtype=dtypes_lib.float32) with self.assertRaisesOpError( "first dimensions of diagonal don't match"): array_ops.matrix_set_diag(v, d).eval(feed_dict={ v: np.zeros((2, 3, 3)), d: np.ones((2, 4)) })
def clip_covariance(covariance_matrix, maximum_variance_ratio, minimum_variance): """Enforce constraints on a covariance matrix to improve numerical stability. Args: covariance_matrix: A [..., N, N] batch of covariance matrices. maximum_variance_ratio: The maximum allowed ratio of two diagonal entries. Any entries lower than the maximum entry divided by this ratio will be set to that value. minimum_variance: A floor for diagonal entries in the returned matrix. Returns: A new covariance matrix with the requested constraints enforced. If the input was positive definite, the output will be too. """ # TODO(allenl): Smarter scaling here so that correlations are preserved when # fiddling with diagonal elements. diagonal = array_ops.matrix_diag_part(covariance_matrix) maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True) new_diagonal = gen_math_ops.maximum(diagonal, maximum / maximum_variance_ratio) return array_ops.matrix_set_diag( covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed): """Returns a uniformly random `Tensor` of "correlation-like" matrices. A "correlation-like" matrix is a symmetric square matrix with all entries between -1 and 1 (inclusive) and 1s on the main diagonal. Of these, the ones that are positive semi-definite are exactly the correlation matrices. Args: num_rows: Python `int` dimension of the correlation-like matrices. batch_shape: `Tensor` or Python `tuple` of `int` shape of the batch to return. dtype: `dtype` of the `Tensor` to return. seed: Random seed. Returns: matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]` and dtype `dtype`. Each entry is in [-1, 1], and each matrix along the bottom two dimensions is symmetric and has 1s on the main diagonal. """ num_entries = num_rows * (num_rows + 1) / 2 ones = array_ops.ones(shape=[num_entries], dtype=dtype) # It seems wasteful to generate random values for the diagonal since # I am going to throw them away, but `fill_triangular` fills the # diagonal, so I probably need them. # It's not impossible that it would be more efficient to just fill # the whole matrix with random values instead of messing with # `fill_triangular`. Then would need to filter almost half out with # `matrix_band_part`. unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed) tril = util.fill_triangular(unifs) symmetric = tril + array_ops.matrix_transpose(tril) diagonal_ones = array_ops.ones(shape=util.pad(batch_shape, axis=0, back=True, value=num_rows), dtype=dtype) return array_ops.matrix_set_diag(symmetric, diagonal_ones)
def _assertions(self, x): if not self.validate_args: return [] shape = array_ops.shape(x) is_matrix = check_ops.assert_rank_at_least( x, 2, message="Input must have rank at least 2.") is_square = check_ops.assert_equal( shape[-2], shape[-1], message="Input must be a square matrix.") above_diagonal = array_ops.matrix_band_part( array_ops.matrix_set_diag( x, array_ops.zeros(shape[:-1], dtype=dtypes.float32)), 0, -1) is_lower_triangular = check_ops.assert_equal( above_diagonal, array_ops.zeros_like(above_diagonal), message="Input must be lower triangular.") # A lower triangular matrix is nonsingular iff all its diagonal entries are # nonzero. diag_part = array_ops.matrix_diag_part(x) is_nonsingular = check_ops.assert_none_equal( diag_part, array_ops.zeros_like(diag_part), message="Input must have all diagonal entries nonzero.") return [is_matrix, is_square, is_lower_triangular, is_nonsingular]
def clip_covariance( covariance_matrix, maximum_variance_ratio, minimum_variance): """Enforce constraints on a covariance matrix to improve numerical stability. Args: covariance_matrix: A [..., N, N] batch of covariance matrices. maximum_variance_ratio: The maximum allowed ratio of two diagonal entries. Any entries lower than the maximum entry divided by this ratio will be set to that value. minimum_variance: A floor for diagonal entries in the returned matrix. Returns: A new covariance matrix with the requested constraints enforced. If the input was positive definite, the output will be too. """ # TODO(allenl): Smarter scaling here so that correlations are preserved when # fiddling with diagonal elements. diagonal = array_ops.matrix_diag_part(covariance_matrix) maximum = math_ops.reduce_max(diagonal, axis=-1, keep_dims=True) new_diagonal = gen_math_ops.maximum( diagonal, maximum / maximum_variance_ratio) return array_ops.matrix_set_diag( covariance_matrix, math_ops.maximum(new_diagonal, minimum_variance))
def loop_fn(i): matrix_i = array_ops.gather(matrices, i) diag_i = array_ops.gather(diags, i) results = [ array_ops.matrix_set_diag(matrix_i, diag_i), array_ops.matrix_set_diag(matrices[0, ...], diag_i), array_ops.matrix_set_diag(matrix_i, diags[0, ...]) ] if compat.forward_compatible(2019, 10, 31): k = (-1, 1) band_i = array_ops.gather(bands, i) results.extend([ array_ops.matrix_set_diag(matrix_i, band_i, k=k), array_ops.matrix_set_diag(matrices[0, ...], band_i, k=k), array_ops.matrix_set_diag(matrix_i, bands[0, ...], k=k) ]) return results
def _MatrixSetDiagGradV3(op, grad): """Gradient for MatrixSetDiagV3.""" diag_shape = op.inputs[1].get_shape() align = op.get_attr("align") if not diag_shape.is_fully_defined(): # Need to know the values of `d_lower` and `d_upper` to infer diag_shape. grad_shape = array_ops.shape(grad) batch_shape = grad_shape[:-2] matrix_shape = grad_shape[-2:] diag_index = array_ops.reshape(op.inputs[2], [-1]) # Converts to vector. d_lower = diag_index[0] d_upper = diag_index[-1] # Works both when len(diag_index) is 1 and 2. y_offset = control_flow_ops.cond(math_ops.less(d_upper, 0), lambda: d_upper, lambda: 0) x_offset = control_flow_ops.cond(math_ops.greater(d_lower, 0), lambda: -d_lower, lambda: 0) max_diag_len = math_ops.minimum(matrix_shape[0] + y_offset, matrix_shape[1] + x_offset) # pylint: disable=g-long-lambda # pyformat: disable postfix = control_flow_ops.cond( math_ops.equal(d_lower, d_upper), lambda: ops.convert_to_tensor([max_diag_len]), lambda: ops. convert_to_tensor([d_upper - d_lower + 1, max_diag_len])) # pyformat: enable # pylint: enable=g-long-lambda diag_shape = array_ops.concat([batch_shape, postfix], 0) grad_input = array_ops.matrix_set_diag(grad, array_ops.zeros(diag_shape, dtype=grad.dtype), k=op.inputs[2], align=align) grad_diag = array_ops.matrix_diag_part(grad, k=op.inputs[2], align=align) return (grad_input, grad_diag, None)
def add_to_tensor(self, mat, name="add_to_tensor"): """Add matrix represented by this operator to `mat`. Equiv to `I + mat`. Args: mat: `Tensor` with same `dtype` and shape broadcastable to `self`. name: A name to give this `Op`. Returns: A `Tensor` with broadcast shape and same `dtype` as `self`. """ with self._name_scope(name): # pylint: disable=not-callable # Shape [B1,...,Bb, 1] multiplier_vector = array_ops.expand_dims(self.multiplier, -1) # Shape [C1,...,Cc, M, M] mat = ops.convert_to_tensor_v2_with_dispatch(mat, name="mat") # Shape [C1,...,Cc, M] mat_diag = array_ops.matrix_diag_part(mat) # multiplier_vector broadcasts here. new_diag = multiplier_vector + mat_diag return array_ops.matrix_set_diag(mat, new_diag)
def operator_and_matrix( self, build_info, dtype, use_placeholder, ensure_self_adjoint_and_pd=False): shape = list(build_info.shape) reflection_axis = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) # Make sure unit norm. reflection_axis = reflection_axis / linalg_ops.norm( reflection_axis, axis=-1, keepdims=True) lin_op_reflection_axis = reflection_axis if use_placeholder: lin_op_reflection_axis = array_ops.placeholder_with_default( reflection_axis, shape=None) operator = householder.LinearOperatorHouseholder(lin_op_reflection_axis) mat = reflection_axis[..., array_ops.newaxis] matrix = -2 * math_ops.matmul(mat, mat, adjoint_b=True) matrix = array_ops.matrix_set_diag( matrix, 1. + array_ops.matrix_diag_part(matrix)) return operator, matrix
def random_tril_matrix(shape, dtype, force_well_conditioned=False, remove_upper=True): """[batch] lower triangular matrix. Args: shape: `TensorShape` or Python `list`. Shape of the returned matrix. dtype: `TensorFlow` `dtype` or Python dtype force_well_conditioned: Python `bool`. If `True`, returned matrix will have eigenvalues with modulus in `(1, 2)`. Otherwise, eigenvalues are unit normal random variables. remove_upper: Python `bool`. If `True`, zero out the strictly upper triangle. If `False`, the lower triangle of returned matrix will have desired properties, but will not have the strictly upper triangle zero'd out. Returns: `Tensor` with desired shape and dtype. """ with ops.name_scope("random_tril_matrix"): # Totally random matrix. Has no nice properties. tril = random_normal(shape, dtype=dtype) if remove_upper: tril = array_ops.matrix_band_part(tril, -1, 0) # Create a diagonal with entries having modulus in [1, 2]. if force_well_conditioned: maxval = ops.convert_to_tensor(np.sqrt(2.), dtype=dtype.real_dtype) diag = random_sign_uniform(shape[:-1], dtype=dtype, minval=1., maxval=maxval) tril = array_ops.matrix_set_diag(tril, diag) return tril
def _covariance(self): p = self.probs ret = -math_ops.matmul(p[..., None], p[..., None, :]) return array_ops.matrix_set_diag(ret, self._variance())