def _expand_independent_outputs(fvar, full_cov, full_output_cov): """ Reshapes fvar to the correct shape, specified by `full_cov` and `full_output_cov`. :param fvar: has shape N x P (full_cov = False) or P x N x N (full_cov = True). :return: 1. full_cov: True and full_output_cov: True fvar N x P x N x P 2. full_cov: True and full_output_cov: False fvar P x N x N 3. full_cov: False and full_output_cov: True fvar N x P x P 4. full_cov: False and full_output_cov: False fvar N x P """ if full_cov and full_output_cov: fvar = tf.matrix_diag(tf.transpose(fvar)) # N x N x P x P fvar = tf.transpose(fvar, [0, 2, 1, 3]) # N x P x N x P if not full_cov and full_output_cov: fvar = tf.matrix_diag(fvar) # N x P x P if full_cov and not full_output_cov: pass # P x N x N if not full_cov and not full_output_cov: pass # N x P return fvar
def _expectation(p, rbf_kern, feat1, lin_kern, feat2, nghp=None): """ Compute the expectation: expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n) - K_lin_{.,.} :: RBF kernel - K_rbf_{.,.} :: Linear kernel Different Z1 and Z2 are handled if p is diagonal and K_lin and K_rbf have disjoint active_dims, in which case the joint expectations simplify into a product of expectations :return: NxM1xM2 """ if rbf_kern.on_separate_dims(lin_kern) and isinstance(p, DiagonalGaussian): # no joint expectations required eKxz1 = expectation(p, (rbf_kern, feat1)) eKxz2 = expectation(p, (lin_kern, feat2)) return eKxz1[:, :, None] * eKxz2[:, None, :] if feat1 != feat2: raise NotImplementedError("Features have to be the same for both kernels.") if rbf_kern.active_dims != lin_kern.active_dims: raise NotImplementedError("active_dims have to be the same for both kernels.") with params_as_tensors_for(rbf_kern), params_as_tensors_for(lin_kern), \ params_as_tensors_for(feat1), params_as_tensors_for(feat2): # use only active dimensions Xcov = rbf_kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov) Z, Xmu = rbf_kern._slice(feat1.Z, p.mu) N = tf.shape(Xmu)[0] D = tf.shape(Xmu)[1] lin_kern_variances = lin_kern.variance if lin_kern.ARD \ else tf.zeros((D,), dtype=settings.tf_float) + lin_kern.variance rbf_kern_lengthscales = rbf_kern.lengthscales if rbf_kern.ARD \ else tf.zeros((D,), dtype=settings.tf_float) + rbf_kern.lengthscales ## Begin RBF eKxz code: chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(rbf_kern_lengthscales ** 2) + Xcov) # NxDxD Z_transpose = tf.transpose(Z) all_diffs = Z_transpose - tf.expand_dims(Xmu, 2) # NxDxM exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True) # NxDxM exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM sqrt_det_L = tf.reduce_prod(rbf_kern_lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N eKxz_rbf = rbf_kern.variance * (determinants[:, None] * exponent_mahalanobis) ## NxM <- End RBF eKxz code tiled_Z = tf.tile(tf.expand_dims(Z_transpose, 0), (N, 1, 1)) # NxDxM z_L_inv_Xcov = tf.matmul(tiled_Z, Xcov / rbf_kern_lengthscales[:, None] ** 2., transpose_a=True) # NxMxD cross_eKzxKxz = tf.cholesky_solve( chol_L_plus_Xcov, (lin_kern_variances * rbf_kern_lengthscales ** 2.)[..., None] * tiled_Z) # NxDxM cross_eKzxKxz = tf.matmul((z_L_inv_Xcov + Xmu[:, None, :]) * eKxz_rbf[..., None], cross_eKzxKxz) # NxMxM return cross_eKzxKxz
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None): """ Compute the expectation: expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n) - Ka_{.,.}, Kb_{.,.} :: RBF kernels Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims in which case the joint expectations simplify into a product of expectations :return: NxMxM """ if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian): # no joint expectations required eKxz1 = expectation(p, (kern1, feat1)) eKxz2 = expectation(p, (kern2, feat2)) return eKxz1[:, :, None] * eKxz2[:, None, :] if feat1 != feat2 or kern1 != kern2: raise NotImplementedError("The expectation over two kernels has only an " "analytical implementation if both kernels are equal.") kern = kern1 feat = feat1 with params_as_tensors_for(kern), params_as_tensors_for(feat): # use only active dimensions Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov) Z, Xmu = kern._slice(feat.Z, p.mu) N = tf.shape(Xmu)[0] D = tf.shape(Xmu)[1] squared_lengthscales = kern.lengthscales ** 2. if kern.ARD \ else tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales ** 2. sqrt_det_L = tf.reduce_prod(0.5 * squared_lengthscales) ** 0.5 C = tf.cholesky(0.5 * tf.matrix_diag(squared_lengthscales) + Xcov) # NxDxD dets = sqrt_det_L / tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(C)), axis=1)) # N C_inv_mu = tf.matrix_triangular_solve(C, tf.expand_dims(Xmu, 2), lower=True) # NxDx1 C_inv_z = tf.matrix_triangular_solve(C, tf.tile(tf.expand_dims(tf.transpose(Z) / 2., 0), [N, 1, 1]), lower=True) # NxDxM mu_CC_inv_mu = tf.expand_dims(tf.reduce_sum(tf.square(C_inv_mu), 1), 2) # Nx1x1 z_CC_inv_z = tf.reduce_sum(tf.square(C_inv_z), 1) # NxM zm_CC_inv_zn = tf.matmul(C_inv_z, C_inv_z, transpose_a=True) # NxMxM two_z_CC_inv_mu = 2 * tf.matmul(C_inv_z, C_inv_mu, transpose_a=True)[:, :, 0] # NxM exponent_mahalanobis = mu_CC_inv_mu + tf.expand_dims(z_CC_inv_z, 1) + \ tf.expand_dims(z_CC_inv_z, 2) + 2 * zm_CC_inv_zn - \ tf.expand_dims(two_z_CC_inv_mu, 2) - tf.expand_dims(two_z_CC_inv_mu, 1) # NxMxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxMxM # Compute sqrt(self.K(Z)) explicitly to prevent automatic gradient from # being NaN sometimes, see pull request #615 kernel_sqrt = tf.exp(-0.25 * kern.square_dist(Z, None)) return kern.variance ** 2 * kernel_sqrt * \ tf.reshape(dets, [N, 1, 1]) * exponent_mahalanobis
def _quadrature_expectation(p, obj1, feature1, obj2, feature2, num_gauss_hermite_points): """ General handling of quadrature expectations for Gaussians and DiagonalGaussians Fallback method for missing analytic expectations """ num_gauss_hermite_points = 100 if num_gauss_hermite_points is None else num_gauss_hermite_points warnings.warn("Quadrature is used to calculate the expectation. This means that " "an analytical implementations is not available for the given combination.") if obj2 is None: eval_func = lambda x: get_eval_func(obj1, feature1)(x) elif obj1 is None: raise NotImplementedError("First object cannot be None.") else: eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None])(x) * get_eval_func(obj2, feature2, np.s_[:, None, :])(x)) if isinstance(p, DiagonalGaussian): if isinstance(obj1, kernels.Kernel) and isinstance(obj2, kernels.Kernel) \ and obj1.on_separate_dims(obj2): # no joint expectations required eKxz1 = quadrature_expectation(p, (obj1, feature1), num_gauss_hermite_points=num_gauss_hermite_points) eKxz2 = quadrature_expectation(p, (obj2, feature2), num_gauss_hermite_points=num_gauss_hermite_points) return eKxz1[:, :, None] * eKxz2[:, None, :] else: cov = tf.matrix_diag(p.cov) else: cov = p.cov return mvnquad(eval_func, p.mu, cov, num_gauss_hermite_points)
def _build_predict(self, Xnew, full_cov=False): """ The posterior variance of F is given by q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1) Here we project this to F*, the values of the GP at Xnew which is given by q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} ) """ # compute kernel things Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) # predictive mean f_mean = tf.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew) # predictive var A = K + tf.matrix_diag(tf.transpose(1. / tf.square(self.q_lambda))) L = tf.cholesky(A) Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1]) LiKx = tf.matrix_triangular_solve(L, Kx_tiled) if full_cov: f_var = self.kern.K(Xnew) - tf.matmul(LiKx, LiKx, transpose_a=True) else: f_var = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(LiKx), 1) return f_mean, tf.transpose(f_var)
def _expectation(p, kern, feat, none1, none2, nghp=None): """ Compute the expectation: <K_{X, Z}>_p(X) - K_{.,.} :: RBF kernel :return: NxM """ with params_as_tensors_for(kern), params_as_tensors_for(feat): # use only active dimensions Xcov = kern._slice_cov(p.cov) Z, Xmu = kern._slice(feat.Z, p.mu) D = tf.shape(Xmu)[1] if kern.ARD: lengthscales = kern.lengthscales else: lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov) # NxDxD all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2) # NxDxM exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True) # NxDxM exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N return kern.variance * (determinants[:, None] * exponent_mahalanobis)
def _expectation(p, mean, none, kern, feat, nghp=None): """ Compute the expectation: expectation[n] = <x_n K_{x_n, Z}>_p(x_n) - K_{.,.} :: RBF kernel :return: NxDxM """ Xmu, Xcov = p.mu, p.cov with tf.control_dependencies([tf.assert_equal( tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int), message="Currently cannot handle slicing in exKxz.")]): Xmu = tf.identity(Xmu) with params_as_tensors_for(kern), params_as_tensors_for(feat): D = tf.shape(Xmu)[1] lengthscales = kern.lengthscales if kern.ARD \ else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov) # NxDxD all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2) # NxDxM sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs) # NxDxM non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True) non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term # NxDxM exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
def Test(self): np.random.seed(1) n = shape_[-1] batch_shape = shape_[:-2] a = np.random.uniform( low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(dtype_) a += a.T a = np.tile(a, batch_shape + (1, 1)) if dtype_ == np.float32: atol = 1e-4 else: atol = 1e-12 for compute_v in False, True: np_e, np_v = np.linalg.eig(a) with self.test_session(): if compute_v: tf_e, tf_v = tf.self_adjoint_eig(tf.constant(a)) # Check that V*diag(E)*V^T is close to A. a_ev = tf.batch_matmul( tf.batch_matmul(tf_v, tf.matrix_diag(tf_e)), tf_v, adj_y=True) self.assertAllClose(a_ev.eval(), a, atol=atol) # Compare to numpy.linalg.eig. CompareEigenDecompositions(self, np_e, np_v, tf_e.eval(), tf_v.eval(), atol) else: tf_e = tf.self_adjoint_eigvals(tf.constant(a)) self.assertAllClose( np.sort(np_e, -1), np.sort(tf_e.eval(), -1), atol=atol)
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None): """ Compute the expectation: expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n) - Ka_{.,.}, Kb_{.,.} :: Linear kernels Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims in which case the joint expectations simplify into a product of expectations :return: NxMxM """ if kern1.on_separate_dims(kern2) and isinstance(p, DiagonalGaussian): # no joint expectations required eKxz1 = expectation(p, (kern1, feat1)) eKxz2 = expectation(p, (kern2, feat2)) return eKxz1[:, :, None] * eKxz2[:, None, :] if kern1 != kern2 or feat1 != feat2: raise NotImplementedError("The expectation over two kernels has only an " "analytical implementation if both kernels are equal.") kern = kern1 feat = feat1 with params_as_tensors_for(kern), params_as_tensors_for(feat): # use only active dimensions Xcov = kern._slice_cov(tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov) Z, Xmu = kern._slice(feat.Z, p.mu) N = tf.shape(Xmu)[0] var_Z = kern.variance * Z tiled_Z = tf.tile(tf.expand_dims(var_Z, 0), (N, 1, 1)) # NxMxD XX = Xcov + tf.expand_dims(Xmu, 1) * tf.expand_dims(Xmu, 2) # NxDxD return tf.matmul(tf.matmul(tiled_Z, XX), tiled_Z, transpose_b=True)
def testSampleWithBroadcastScale(self): # mu corresponds to a 2-batch of 3-variate normals mu = np.zeros([2, 3]) # diag corresponds to no batches of 3-variate normals diag = np.ones([3]) with self.test_session(): dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True) mean = dist.mean() self.assertAllEqual([2, 3], mean.get_shape()) self.assertAllClose(mu + diag, mean.eval()) n = int(1e4) samps = dist.sample(n, seed=0).eval() samps_centered = samps - samps.mean(axis=0) cov_mat = tf.matrix_diag(diag).eval()**2 sample_cov = np.matmul(samps_centered.transpose([1, 2, 0]), samps_centered.transpose([1, 0, 2])) / n self.assertAllClose(mu + diag, samps.mean(axis=0), atol=0.10, rtol=0.05) self.assertAllClose([cov_mat, cov_mat], sample_cov, atol=0.10, rtol=0.05)
def K(self, X, X2=None, presliced=False): if X2 is None: d = tf.fill(tf.stack([tf.shape(X)[0]]), tf.squeeze(self.variance)) return tf.matrix_diag(d) else: shape = tf.stack([tf.shape(X)[0], tf.shape(X2)[0]]) return tf.zeros(shape, settings.float_type)
def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder): shape = list(shape) diag_shape = shape[:-1] diag = tf.random_normal(diag_shape, dtype=dtype.real_dtype) if dtype.is_complex: diag = tf.complex( diag, tf.random_normal(diag_shape, dtype=dtype.real_dtype)) diag_ph = tf.placeholder(dtype=dtype) if use_placeholder: # Evaluate the diag here because (i) you cannot feed a tensor, and (ii) # diag is random and we want the same value used for both mat and # feed_dict. diag = diag.eval() operator = linalg.LinearOperatorDiag(diag_ph) feed_dict = {diag_ph: diag} else: operator = linalg.LinearOperatorDiag(diag) feed_dict = None mat = tf.matrix_diag(diag) return operator, mat, feed_dict
def testVector(self): with self.test_session(use_gpu=self._use_gpu): v = np.array([1.0, 2.0, 3.0]) mat = np.diag(v) v_diag = tf.matrix_diag(v) self.assertEqual((3, 3), v_diag.get_shape()) self.assertAllEqual(v_diag.eval(), mat)
def K(self, X, X2=None, full_output_cov=True): K = self.kern.K(X, X2) # N x N2 if full_output_cov: Ks = tf.tile(K[..., None], [1, 1, self.P]) # N x N2 x P return tf.transpose(tf.matrix_diag(Ks), [0, 2, 1, 3]) # N x P x N2 x P else: return tf.tile(K[None, ...], [self.P, 1, 1]) # P x N x N2
def test_broadcast_apply_and_solve(self): # These cannot be done in the automated (base test class) tests since they # test shapes that tf.matmul cannot handle. # In particular, tf.matmul does not broadcast. with self.test_session() as sess: x = tf.random_normal(shape=(2, 2, 3, 4)) # This LinearOperatorDiag will be brodacast to (2, 2, 3, 3) during solve # and apply with 'x' as the argument. diag = tf.random_uniform(shape=(2, 1, 3)) operator = linalg.LinearOperatorDiag(diag) self.assertAllEqual((2, 1, 3, 3), operator.shape) # Create a batch matrix with the broadcast shape of operator. diag_broadcast = tf.concat(1, (diag, diag)) mat = tf.matrix_diag(diag_broadcast) self.assertAllEqual((2, 2, 3, 3), mat.get_shape()) # being pedantic. operator_apply = operator.apply(x) mat_apply = tf.matmul(mat, x) self.assertAllEqual(operator_apply.get_shape(), mat_apply.get_shape()) self.assertAllClose(*sess.run([operator_apply, mat_apply])) operator_solve = operator.solve(x) mat_solve = tf.matrix_solve(mat, x) self.assertAllEqual(operator_solve.get_shape(), mat_solve.get_shape()) self.assertAllClose(*sess.run([operator_solve, mat_solve]))
def K(self, X, X2=None, presliced=False): if X2 is None: d = tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance)) return tf.matrix_diag(d) else: shape = tf.concat([tf.shape(X)[:-2], tf.reshape(tf.shape(X)[-2], [1]), tf.reshape(tf.shape(X2)[-2], [1])], 0) return tf.zeros(shape, settings.float_type)
def testSample(self): mu = [-1., 1] diag = [1., -2] dist = tfd.VectorLaplaceDiag(mu, diag, validate_args=True) samps = self.evaluate(dist.sample(int(1e4), seed=0)) cov_mat = 2. * self.evaluate(tf.matrix_diag(diag))**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def K(self, X, X2=None): X, X2 = self._slice(X, X2) X = tf.cast(X[:, 0], tf.int32) if X2 is None: X2 = X else: X2 = tf.cast(X2[:, 0], tf.int32) B = tf.matmul(self.W, self.W, transpose_b=True) + tf.matrix_diag(self.kappa) return tf.gather(tf.transpose(tf.gather(B, X2)), X)
def testGrad(self): shapes = ((3,), (7, 4)) with self.test_session(use_gpu=self._use_gpu): for shape in shapes: x = tf.constant(np.random.rand(*shape), np.float32) y = tf.matrix_diag(x) error = tf.test.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error, 1e-4)
def _build_operator_and_mat(self, batch_shape, k, dtype=np.float64): # Build an identity matrix with right shape and dtype. # Build an operator that should act the same way. batch_shape = list(batch_shape) diag_shape = batch_shape + [k] matrix_shape = batch_shape + [k, k] diag = tf.ones(diag_shape, dtype=dtype) identity_matrix = tf.matrix_diag(diag) operator = operator_pd_identity.OperatorPDIdentity(matrix_shape, dtype) return operator, identity_matrix.eval()
def testSample(self): mu = [-1.0, 1.0] diag = [1.0, 2.0] with self.test_session(): dist = distributions.MultivariateNormalDiag(mu, diag) samps = dist.sample_n(1000, seed=0).eval() cov_mat = tf.matrix_diag(diag).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
def testMultivariateNormalDiagWithSoftplusStDev(self): mu = [-1.0, 1.0] diag = [-1.0, -2.0] with self.test_session(): dist = distributions.MultivariateNormalDiagWithSoftplusStDev(mu, diag) samps = dist.sample(1000, seed=0).eval() cov_mat = tf.matrix_diag(tf.nn.softplus(diag)).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
def testMultivariateNormalDiagWithSoftplusScale(self): mu = [-1.0, 1.0] diag = [-1.0, -2.0] dist = tfd.MultivariateNormalDiagWithSoftplusScale( mu, diag, validate_args=True) samps = self.evaluate(dist.sample(1000, seed=0)) cov_mat = self.evaluate(tf.matrix_diag(tf.nn.softplus(diag))**2) self.assertAllClose(mu, samps.mean(axis=0), atol=0.1) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
def _covariance(self): # Let # W = (w1,...,wk), with wj ~ iid Exponential(0, 1). # Then this distribution is # X = loc + LW, # and then since Cov(wi, wj) = 1 if i=j, and 0 otherwise, # Cov(X) = L Cov(W W^T) L^T = L L^T. if distribution_util.is_diagonal_scale(self.scale): return tf.matrix_diag(tf.square(self.scale.diag_part())) else: return self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
def testSample(self): mu = [-1., 1] diag = [1., -2] with self.test_session(): dist = tfd.MultivariateNormalDiag(mu, diag, validate_args=True) samps = dist.sample(int(1e3), seed=0).eval() cov_mat = tf.matrix_diag(diag).eval()**2 self.assertAllClose(mu, samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def testSample(self): mu = [-2., 1] diag = [1., -2] with self.test_session(): dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True) samps = dist.sample(int(1e4), seed=0).eval() cov_mat = tf.matrix_diag(diag).eval()**2 self.assertAllClose([-2 + 1, 1. - 2], samps.mean(axis=0), atol=0., rtol=0.05) self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.05, rtol=0.05)
def _covariance(self): # Let # W = (w1,...,wk), with wj ~ iid Laplace(0, 1). # Then this distribution is # X = loc + LW, # and since E[X] = loc, # Cov(X) = E[LW W^T L^T] = L E[W W^T] L^T. # Since E[wi wj] = 0 if i != j, and 2 if i == j, we have # Cov(X) = 2 LL^T if distribution_util.is_diagonal_scale(self.scale): return 2. * tf.matrix_diag(tf.square(self.scale.diag_part())) else: return 2. * self.scale.matmul(self.scale.to_dense(), adjoint_arg=True)
def testBatchVector(self): with self.test_session(use_gpu=self._use_gpu): v_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) mat_batch = np.array( [[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]) v_batch_diag = tf.matrix_diag(v_batch) self.assertEqual((2, 3, 3), v_batch_diag.get_shape()) self.assertAllEqual(v_batch_diag.eval(), mat_batch)
def _updated_mat(self, mat, v, diag): # Get dense matrix defined by its square root, which is an update of `mat`: # A = (mat + v D v^T) (mat + v D v^T)^T # D is the diagonal matrix with `diag` on the diagonal. # If diag is None, then it defaults to the identity matrix, so DV^T = V^T if diag is None: diag_vt = tf.matrix_transpose(v) else: diag_mat = tf.matrix_diag(diag) diag_vt = tf.matmul(diag_mat, v, adjoint_b=True) v_diag_vt = tf.matmul(v, diag_vt) sqrt = mat + v_diag_vt a = tf.matmul(sqrt, sqrt, adjoint_b=True) return a.eval()
def _operator_and_mat_and_feed_dict(self, shape, dtype, use_placeholder): diag = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) if use_placeholder: diag_ph = tf.placeholder(dtype=dtype) # Evaluate the diag here because (i) you cannot feed a tensor, and (ii) # diag is random and we want the same value used for both mat and # feed_dict. diag = diag.eval() operator = linalg.LinearOperatorDiag(diag_ph) feed_dict = {diag_ph: diag} else: operator = linalg.LinearOperatorDiag(diag) feed_dict = None mat = tf.matrix_diag(diag) return operator, mat, feed_dict
def batch_matrix_log(x, epsilon): """ Matrix log with epsilon to ensure stability. Input must be a Symmetric matrix. Parameters ---------- x : tf.Tensor with [..., dim1, dim2] epsilon Returns ------- log of eigen-values. """ s, u, v = tf.svd(x) # print(s.eval()) inner = s + epsilon inner = tf.log(inner) inner = tf.matrix_diag(inner) return tf.matmul(u, tf.matmul(inner, tf.transpose(u, [0,2,1])))
def _quadrature_expectation(p, obj1, feature1, obj2, feature2, num_gauss_hermite_points): """ General handling of quadrature expectations for Gaussians and DiagonalGaussians Fallback method for missing analytic expectations """ num_gauss_hermite_points = 100 if num_gauss_hermite_points is None else num_gauss_hermite_points logger.warn( "Quadrature is used to calculate the expectation. This means that " "an analytical implementations is not available for the given combination." ) if obj2 is None: eval_func = lambda x: get_eval_func(obj1, feature1)(x) elif obj1 is None: raise NotImplementedError("First object cannot be None.") else: eval_func = lambda x: (get_eval_func(obj1, feature1, np.s_[:, :, None]) (x) * get_eval_func(obj2, feature2, np. s_[:, None, :])(x)) if isinstance(p, DiagonalGaussian): if isinstance(obj1, kernels.Kernel) and isinstance(obj2, kernels.Kernel) \ and obj1.on_separate_dims(obj2): # no joint expectations required eKxz1 = quadrature_expectation( p, (obj1, feature1), num_gauss_hermite_points=num_gauss_hermite_points) eKxz2 = quadrature_expectation( p, (obj2, feature2), num_gauss_hermite_points=num_gauss_hermite_points) return eKxz1[:, :, None] * eKxz2[:, None, :] else: cov = tf.matrix_diag(p.cov) else: cov = p.cov return mvnquad(eval_func, p.mu, cov, num_gauss_hermite_points)
def constraints(self): # don't how to do this, to keep in consistent, need to return this twice # decisions_at_negative = tf.less(self._decision_vars, 0.0 - self.epsilon) decisions_at_bound = 1 - tf.abs( tf.cast(tf.equal(self._decision_vars, 0.0), dtype=tf.float32) + tf.cast(tf.equal(self._decision_vars, 1.0), dtype=tf.float32)) # decisions_at_bound *= 100 # positive_margin = 0.5 - tf.abs(self._decision_vars - 0.5) - self._epsilon # negative_margin = -0.5 + tf.abs(self._decision_vars - 0.5) - self._epsilon # positive_margin = tf.reshape(positive_margin, [-1]) # negative_margin = tf.reshape(negative_margin, [-1]) # assume epsilon won't be too large s.t. two interval overlap # decisions_at_bound = tf.abs( # tf.cast(tf.less(tf.abs(self._decision_vars - 0.0), self._epsilon), dtype=tf.float32) + # tf.cast(tf.less(tf.abs(self._decision_vars - 1.0), self._epsilon), dtype=tf.float32)) # decisions_at_negative = tf.less(self._decision_vars, 0.0, 'decisions_at_negative_side') decisions_at_bound = tf.reshape(decisions_at_bound, [-1]) decision_vars = tf.rint(self._decision_vars) lose_diagonal = decision_vars - tf.matrix_diag( tf.matrix_diag_part(decision_vars)) predecessor = tf.reduce_sum(lose_diagonal, 0) - 1 successor = tf.reduce_sum(lose_diagonal, 1) - 1 n = self._size mtz = [] # mtz, Miller-Tucker-Zemlin formulation # enforce single subtour for i in range(n): for j in range(n): if i != j: mtz.append(dummy_vars[i] - dummy_vars[j] + n * lose_diagonal[i][j] - n + 1) mtz = tf.stack(mtz) # n x (n-1) return tf.concat([ decisions_at_bound, decisions_at_bound, predecessor, successor, mtz ], 0)
def classification_loss(self): F_h_h = tf.matmul(self.h_temp, tf.transpose(self.h_temp)) F_hn_hn = tf.diag_part(F_h_h) F_h_h = tf.subtract(F_h_h, tf.matrix_diag(F_hn_hn)) classes = tf.reduce_max(self.gt) - tf.reduce_min(self.gt) + 1 label_onehot = tf.one_hot(self.gt - 1, classes) # gt begin from 1 label_num = tf.reduce_sum( label_onehot, 0, keep_dims=True) # should sub 1.Avoid numerical errors F_h_h_sum = tf.matmul(F_h_h, label_onehot) label_num_broadcast = tf.tile(label_num, [self.trainLen, 1]) - label_onehot F_h_h_mean = tf.divide(F_h_h_sum, label_num_broadcast) gt_ = tf.cast(tf.argmax(F_h_h_mean, axis=1), tf.int32) + 1 # gt begin from 1 F_h_h_mean_max = tf.reduce_max(F_h_h_mean, axis=1, keep_dims=False) theta = tf.cast(tf.not_equal(self.gt, gt_), tf.float32) F_h_hn_mean_ = tf.multiply(F_h_h_mean, label_onehot) F_h_hn_mean = tf.reduce_sum(F_h_hn_mean_, axis=1, name='F_h_hn_mean') return tf.reduce_sum( tf.nn.relu(tf.add(theta, tf.subtract(F_h_h_mean_max, F_h_hn_mean))))
def svd(A, full_matrices=False, compute_uv=True, name=None): # since dA = dUSVt + UdSVt + USdVt # we can simply recompute each matrix using A = USVt # while blocking gradients to the original op. _, M, N = A.get_shape().as_list() P = min(M, N) S0, U0, V0 = map(tf.stop_gradient, tf.svd(A, full_matrices=True, name=name)) Ui, Vti = map(tf.matrix_inverse, [U0, tf.transpose(V0, (0, 2, 1))]) # A = USVt # S = UiAVti S = tf.matmul(Ui, tf.matmul(A, Vti)) S = tf.matrix_diag_part(S) if not compute_uv: return S Si = tf.pad(tf.matrix_diag(1/S0), [[0,0], [0,N-P], [0,M-P]]) # U = AVtiSi U = tf.matmul(A, tf.matmul(Vti, Si)) U = U if full_matrices else U[:, :M, :P] # Vt = SiUiA V = tf.transpose(tf.matmul(Si, tf.matmul(Ui, A)), (0, 2, 1)) V = V if full_matrices else V[:, :N, :P] return S, U, V
def log_prob_fn(params): rho, alpha, sigma = tf.split(params, [num_features, 1, 1], -1) one = tf.ones(num_features) def indep(d): return tfd.Independent(d, 1) p_rho = indep(tfd.InverseGamma(5. * one, 5. * one)) p_alpha = indep(tfd.HalfNormal([1.])) p_sigma = indep(tfd.HalfNormal([1.])) rho_shape = tf.shape(rho) alpha_shape = tf.shape(alpha) x1 = tf.expand_dims(x, -2) x2 = tf.expand_dims(x, -3) exp = -0.5 * tf.squared_difference(x1, x2) exp /= tf.reshape( tf.square(rho), tf.concat([rho_shape[:1], [1, 1], rho_shape[1:]], 0)) exp = tf.reduce_sum(exp, -1, keep_dims=True) exp += 2. * tf.reshape( tf.log(alpha), tf.concat([alpha_shape[:1], [1, 1], alpha_shape[1:]], 0)) exp = tf.exp(exp[Ellipsis, 0]) exp += tf.matrix_diag( tf.tile(tf.square(sigma), [1, int(x.shape[0])]) + 1e-6) exp = tf.check_numerics(exp, "exp 2 has NaNs") with tf.control_dependencies([tf.print(exp[0], summarize=99999)]): exp = tf.identity(exp) p_y = tfd.MultivariateNormalFullCovariance(covariance_matrix=exp) log_prob = (p_rho.log_prob(rho) + p_alpha.log_prob(alpha) + p_sigma.log_prob(sigma) + p_y.log_prob(y)) return log_prob
def gradient_svd(op, ds, dU, dV): s, U, V = op.outputs u_sz = tf.squeeze(tf.slice(tf.shape(dU),[1],[1])) v_sz = tf.squeeze(tf.slice(tf.shape(dV),[1],[1])) s_sz = tf.squeeze(tf.slice(tf.shape(ds),[1],[1])) S = tf.matrix_diag(s) s_2 = tf.square(s) eye = tf.expand_dims(tf.eye(s_sz),0) k = (1 - eye)/(tf.expand_dims(s_2,2)-tf.expand_dims(s_2,1) + eye) KT = tf.matrix_transpose(k) KT = removenan(KT) def msym(X): return (X+tf.matrix_transpose(X)) def left_grad(U,S,V,dU,dV): U, V = (V, U); dU, dV = (dV, dU) D = tf.matmul(dU,tf.matrix_diag(1/(s+1e-8))) US = tf.matmul(U,S) grad = tf.matmul(D, V, transpose_b=True)\ +tf.matmul(tf.matmul(U,tf.matrix_diag(tf.matrix_diag_part(-tf.matmul(U,D,transpose_a=True)))), V, transpose_b=True)\ +tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,-tf.matmul(V,tf.matmul(D,US,transpose_a=True)),transpose_a=True)))),V,transpose_b=True) grad = tf.matrix_transpose(grad) return grad def right_grad(U,S,V,dU,dV): US = tf.matmul(U,S) grad = tf.matmul(2*tf.matmul(US, msym(KT*(tf.matmul(V,dV,transpose_a=True))) ),V,transpose_b=True) return grad grad = tf.cond(tf.greater(v_sz, u_sz), lambda : left_grad(U,S,V,dU,dV), lambda : right_grad(U,S,V,dU,dV)) return [grad]
def __init__(self, beta, gamma, mean, variance, conv_weights, strides=(1, 1), padding='same', epsilon=1e-3, dilation_rate=(1, 1), **kwargs): super(FuseConvBN, self).__init__(**kwargs) # conv layer config self.strides = strides self.padding = padding self.dilation_rate = dilation_rate # origin weights self.beta = tf.constant(beta, dtype='float32') self.gamma = tf.constant(gamma, dtype='float32') self.mean = tf.constant(mean, dtype='float32') self.variance = tf.constant(variance, dtype='float32') self.conv_weights = tf.constant(conv_weights, dtype='float32') # compute W_{bn} & b_{bn} k, k, filters_in, filters_out = K.int_shape( self.conv_weights) # build shape in keras Conv self.filters_out = filters_out weights_conv = tf.reshape( tf.transpose(self.conv_weights, (3, 0, 1, 2)), (filters_out, -1)) weights_bn = tf.matrix_diag(gamma / tf.sqrt(variance + epsilon)) bias_bn = beta - gamma * mean / tf.sqrt(variance + epsilon) # compute fused W & b fused_weights = tf.matmul(weights_bn, weights_conv) self.fused_weights = tf.transpose( tf.reshape(fused_weights, (filters_out, k, k, filters_in)), (1, 2, 3, 0)) self.fused_bias = bias_bn
def _expectation(p, kern, feat, none1, none2, nghp=None): """ Compute the expectation: <K_{X, Z}>_p(X) - K_{.,.} :: RBF kernel remember in this case, p(X) is factorized not independently. :return: NxM """ print('TGaussian Psi1') with params_as_tensors_for(kern, feat): Xcov = kern._slice_cov(p.cov) # QxNxN - because var distribution is factorized only per latent dimension Z, Xmu = kern._slice(feat.Z, p.mu) # MxQ and NxQ D = tf.shape(Xmu)[1] # Q if kern.ARD: lengthscales = kern.lengthscales else: lengthscales = tf.zeros((D,), dtype=settings.tf_float) + kern.lengthscales #The entry of this covariance matrix is the variance chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov) #QxNxN print(chol_L_plus_Xcov) all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2) all_diffs = tf.transpose(all_diffs, [1, 0, 2]) # QxNxM exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True) # QxNxN QxNxM exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1) # Q x M exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # Q x M sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N return kern.variance * (determinants[:, None] * exponent_mahalanobis)
def decode(self, vec_rep): """De-vectorizes the bosonic matrices. Reverse of the encode method. Arguments: vec_rep (tensor of shape (batch_size, K)): the vectorized gauge representatives Returns: rep (tensor of shape (batch_size, 3, N, N)): the bosonic matrix gauge representatives """ batch_size = int(vec_rep.shape[0]) if self.bijector is None: vec_rep = tf.reshape(vec_rep, [batch_size, -1, self.algebra.dim]) return self.algebra.vector_to_matrix(vec_rep) else: N = self.algebra.N dim = N - 1 diff = self.bijector.forward(vec_rep[:, :dim]) # by our gauge fixing, each row of e must be nondecreasing e = tf.concat( [tf.zeros([batch_size, 1]), tf.cumsum(diff, axis=-1)], axis=-1) # the sum of each row of e should also be zero e = e - tf.reduce_sum(e, axis=-1, keepdims=True) / N mat_e = tf.cast(tf.expand_dims(tf.matrix_diag(e), axis=-3), tf.complex64) # by our gauge fixing, first (N - 1) elements in each row of vec_rest must vanish # and the following (N - 1) elements must be positive # note this depends on our ordering of the SU(N) basis in algebra.py vec_rest = tf.concat([ tf.zeros([batch_size, dim]), self.bijector.forward(vec_rep[:, dim:2 * dim]), vec_rep[:, 2 * dim:] ], axis=-1) vec_rest = tf.reshape(vec_rest, [batch_size, 2, self.algebra.dim]) mat_rest = self.algebra.vector_to_matrix(vec_rest) rep = tf.concat([mat_e, mat_rest], axis=-3) return rep
def transition_model(self, states, covariances, odometry): """ Implements a stochastic transition model for localization. :param states: tf op (batch, K, 3), particle states before the update. :param covariances: tf op (batch, K, 3, 3) :param odometry: tf op (batch, 3), odometry reading, relative motion in the robot coordinate frame :return: particle_states updated with the odometry and optionally transition noise """ translation_std = self.params.transition_std[ 0] / self.params.map_pixel_in_meters # In pixels rotation_std = self.params.transition_std[1] # In radians with tf.name_scope('transition'): part_x, part_y, part_th = tf.unstack(states, axis=-1, num=3) odometry = tf.expand_dims(odometry, axis=1) odom_x, odom_y, odom_th = tf.unstack(odometry, axis=-1, num=3) cos_th = tf.cos(part_th) sin_th = tf.sin(part_th) delta_x = cos_th * odom_x - sin_th * odom_y delta_y = sin_th * odom_x + cos_th * odom_y delta_th = odom_th new_th = tf.mod(part_th + delta_th + np.pi, 2 * np.pi) - np.pi states = tf.stack([part_x + delta_x, part_y + delta_y, new_th], axis=-1) pose_cov = tf.square( tf.constant([translation_std, translation_std, rotation_std], tf.float32)) noise = tf.abs( tf.random_normal(states.get_shape(), mean=0.0, stddev=1.0)) * pose_cov noise = tf.matrix_diag(noise) covariances = covariances + noise return states, covariances
def get_params(self, x, c, b, m, id): B = tf.shape(x)[0] d = self.hps.dimension mask = np.arange(d, dtype=np.float32) mask = tf.mod(mask + id, 2) mask = tf.tile(tf.expand_dims(mask, axis=0), [B, 1]) inp = tf.concat([x * mask, mask, c, b, m], axis=1) params = self.nets[id](inp) scale, shift = tf.split(params, 2, axis=1) # reorder query = m * (1 - b) order = tf.contrib.framework.argsort(query, direction='DESCENDING', stable=True) t = tf.batch_gather(tf.matrix_diag(query), order) t = tf.transpose(t, perm=[0, 2, 1]) scale = tf.einsum('nd,ndi->ni', scale, t) shift = tf.einsum('nd,ndi->ni', shift, t) # mask scale = scale * (1. - mask) shift = shift * (1. - mask) return scale, shift
def _getMatrixTree(r, A, mask1, mask2, mask_multiply, mask_add): if mask_multiply is None: A_masked = A else: A_masked = A * mask_multiply L_reduce = tf.reduce_sum(A_masked, 1) L_diag = tf.matrix_diag(L_reduce) L_minus = L_diag - A_masked LL_diag = L_minus[:, 1:, :] LL = tf.concat([tf.expand_dims(r, [1]), LL_diag], 1) if mask_multiply is None: LL_inv = tf.matrix_inverse(LL) else: LL_masked = mask_multiply * LL LL_masked = LL_masked + mask_add LL_inv = tf.matrix_inverse(LL_masked) # batch_l, doc_l, doc_l d0 = tf.multiply(r, LL_inv[:, :, 0]) # root LL_inv_diag = tf.expand_dims(tf.matrix_diag_part(LL_inv), 2) tmp1 = tf.matrix_transpose(tf.multiply(tf.matrix_transpose(A_masked), LL_inv_diag)) tmp2 = tf.multiply(A_masked, tf.matrix_transpose(LL_inv)) d_no_root = mask1 * tmp1 - mask2 * tmp2 d = tf.concat([tf.expand_dims(d0,[1]), d_no_root], 1) # add column at beginning for root return d, d_no_root, LL
def dignoal(x, kernel_size, scatter_rate): #利用对角采样计算均值, b = tf.shape(x)[0] w = tf.shape(x)[1] h = tf.shape(x)[2] dig = tf.matrix_diag( (([0] * scatter_rate + [1]) * kernel_size)[:kernel_size]) dig = tf.tile(dig, [ tf.cast(tf.math.ceil(w / kernel_size), dtype=tf.int32), tf.cast(tf.math.ceil(w / kernel_size), dtype=tf.int32) ])[:w, :h] dig = tf.tile(tf.expand_dims(tf.expand_dims(dig, axis=0), axis=-1), [b, 1, 1, tf.shape(x)[-1]]) x_ = x * tf.cast(dig, dtype=tf.float32) num =tf.cast(b,dtype=tf.float64)*tf.math.floor(w/kernel_size)*tf.math.floor(h/kernel_size)*tf.cast(tf.math.floor(kernel_size/(scatter_rate+1)),dtype=tf.float64)+\ tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.floormod(w,kernel_size)/(scatter_rate+1))*tf.math.floor(h/kernel_size)+\ tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.floormod(h,kernel_size)/(scatter_rate+1))*tf.math.floor(w/kernel_size)+\ tf.cast(b,dtype=tf.float64)*tf.math.floor(tf.reduce_min([tf.floormod(w,kernel_size),tf.floormod(h,kernel_size)])/(scatter_rate+1)) ave = tf.reduce_sum(x_, axis=[0, 1, 2]) / tf.expand_dims( tf.cast(num, dtype=tf.float32), axis=-1) return ave
def dx_dtheta_log_px(self, dmu_log_px_, w_px_i_px_norm, exponent_, xi_): # Returns a n * d * # of components matrix dx_dw_log_px_, dx_dmu_log_px_, dx_dsigma2_log_px_ = [], [], [] zeta = tf.reduce_sum(dmu_log_px_, [0]) for i in range(self.weights.shape[0]): zeta_m_exponent = tf.expand_dims(zeta - exponent_[i], 1) w_px_i_px_norm_i = tf.expand_dims(w_px_i_px_norm[i], -1) dx_dw_log_px_.append(zeta_m_exponent * w_px_i_px_norm_i) diag_precision = tf.diag(1. / self.distributions[i].variance()) exponent_i_tensor = tf.expand_dims(exponent_[i], -1) dx_dmu_log_px_.append( (tf.matmul(exponent_i_tensor, zeta_m_exponent) + diag_precision) * w_px_i_px_norm_i) xi_i_tensor = tf.expand_dims(xi_[i], -1) exponent_i_diag = tf.matrix_diag(exponent_[i]) diag_precision = tf.expand_dims(diag_precision, 0) dx_dsigma2_log_px_.append( (tf.matmul(xi_i_tensor, zeta_m_exponent) + exponent_i_diag * diag_precision) * w_px_i_px_norm_i) dx_dw_log_px, dx_dmu_log_px, dx_dsigma2_log_px = tf.stack( dx_dw_log_px_), tf.stack(dx_dmu_log_px_), tf.stack( dx_dsigma2_log_px_) return [dx_dw_log_px, dx_dmu_log_px, dx_dsigma2_log_px]
def _no_cho(Kf=Kf, y=y): Kf = (Kf + tf.transpose(Kf, perm=[0, 2, 1])) / 2. e, v = tf.self_adjoint_eig(Kf) e = tf.where(e > 1e-14, e, 1e-14 * tf.ones_like(e)) Kf = tf.matmul(tf.matmul(v, tf.matrix_diag(e), transpose_a=True), v) logdet = tf.reduce_sum(tf.where(e > 1e-14, tf.log(e), tf.zeros_like(e)), axis=-1, name='logdet') #batch_size, n, 1 alpha = tf.squeeze(tf.matrix_solve(Kf, tf.expand_dims(y, -1), name='solve_alpha'), axis=2) fstar = tf.matmul(Knm, tf.expand_dims(alpha, -1), transpose_a=True) cov = Kmm cov -= tf.matmul(Knm, tf.matrix_solve(Kf, Knm), transpose_a=True) log_mar_like = (-tf.reduce_sum(y * alpha, axis=1) - logdet - n * np.log(2. * np.pi)) / 2. return fstar, cov, log_mar_like
def testSampleWithBroadcastScale(self): # mu corresponds to a 2-batch of 3-variate normals mu = np.zeros([2, 3]) # diag corresponds to no batches of 3-variate normals diag = np.ones([3]) dist = tfd.VectorExponentialDiag(mu, diag, validate_args=True) mean = dist.mean() self.assertAllEqual([2, 3], mean.shape) self.assertAllClose(mu + diag, self.evaluate(mean)) n = int(1e4) samps = self.evaluate(dist.sample(n, seed=0)) samps_centered = samps - samps.mean(axis=0) cov_mat = self.evaluate(tf.matrix_diag(diag))**2 sample_cov = np.matmul( samps_centered.transpose([1, 2, 0]), samps_centered.transpose([1, 0, 2 ])) / n self.assertAllClose(mu + diag, samps.mean(axis=0), atol=0.10, rtol=0.05) self.assertAllClose([cov_mat, cov_mat], sample_cov, atol=0.10, rtol=0.05)
def ds(x): # kx ky qx qy om # x[0] x[1] x[2] x[3] x[4] topkq = -complex(0, 1) * V0 * ((x[0] + x[2]) - complex(0, 1) * (x[1] + x[3])) botkq = complex(0, 1) * V0 * ((x[0] + x[2]) + complex(0, 1) * (x[1] + x[3])) innkq = x[4] + complex(0, 1) * Gamm - A * ((x[0] + x[2])**2 + (x[1] + x[3])**2) - V2 topk = -complex(0, 1) * V0 * (x[0] - complex(0, 1) * x[1]) botk = complex(0, 1) * V0 * (x[0] + complex(0, 1) * x[1]) innk = x[4] + complex(0, 1) * Gamm - A * (x[0]**2 + x[1]**2) - V2 # cent = tf.arange(-(N - 1) / 2, (N - 1) / 2 + 1, 1) d = hOmg * tf.matrix_diag(cent) Ginkq = tf.matrix_diag(topkq, k=1) + tf.matrix_diag( botkq, k=1) + tf.matrix_diag(innkq, k=0) - d Gink = tf.matrix_diag(topk, k=1) + tf.matrix_diag( botk, k=1) + tf.matrix_diag(innk, k=0) - d Grkq = tf.linalg.inv(Ginkq) Gakq = tf.transpose(tf.conj(Grkq)) Grk = tf.linalg.inv(Gink) Gak = tf.transpose(tf.conj(Grk)) fer = tf.heaviside(-(d + tf.eye(N) * (x[4] - mu)), 0) in1 = tf.matmul(Grkq, tf.matmul(Grk, tf.matmul(fer, Gak))) in2 = tf.matmul(Grkq, tf.matmul(fer, tf.matmul(Gakq, Gak))) tr = tf.trace(in1 + in2) # HERE i will divide by DOS, multiply by 2 for spin, and divide by (2pi)^3 dchi = -(4) * Gamm * tr / math.pi**2 return dchi
def loss(inputs, labels): norm_inputs = tf.nn.l2_normalize(inputs, axis=1) dot_matrix = tf.expand_dims(norm_inputs, 1) * tf.expand_dims( norm_inputs, 0) sim_matrix = tf.reduce_sum(dot_matrix, axis=2) mask = tf.equal(tf.expand_dims(labels, axis=1), tf.expand_dims(labels, axis=0)) mask = tf.cast(mask, dtype=tf.float32) pos_mask = mask - tf.matrix_diag(tf.ones_like(labels, dtype=tf.float32)) neg_mask = 1.0 - mask easy_a_p = tf.reduce_max(sim_matrix * pos_mask - 1e10 * (1.0 - pos_mask), axis=1, keepdims=True) easy_a_n = tf.reduce_min(sim_matrix * neg_mask + 1e10 * (1.0 - neg_mask), axis=1, keepdims=True) easy_a_n_mask = tf.logical_and(tf.equal(sim_matrix, easy_a_n), tf.cast(neg_mask, dtype=tf.bool)) sh_a_n_mask = tf.logical_and(tf.less(sim_matrix, easy_a_p), tf.cast(neg_mask, dtype=tf.bool)) sh_a_n_mask = tf.logical_or(sh_a_n_mask, easy_a_n_mask) sh_a_n_mask = tf.cast(sh_a_n_mask, dtype=tf.float32) sh_a_n = tf.reduce_max(sim_matrix * sh_a_n_mask - 1e10 * (1 - sh_a_n_mask), axis=1, keepdims=True) easy_a_p_exp = tf.exp(easy_a_p) sh_a_n_exp = tf.exp(sh_a_n) ep_loss = -tf.log(easy_a_p_exp / (easy_a_p_exp + sh_a_n_exp)) ep_loss_mean = tf.reduce_mean(ep_loss) return ep_loss_mean
def kb_module(self, H, ent_emb, ent_W): h_h, w = int(H.get_shape()[1]), int(H.get_shape()[2]) #30,64 print H.get_shape(), ent_emb.get_shape() h_e, w_e = int(ent_emb.get_shape()[2]), int(ent_emb.get_shape()[3]) #5 out1 = tf.reduce_mean(H, axis=1) #(?,64) reshape_h1 = tf.expand_dims(out1, 1) #(?,1,64) reshape_h1 = tf.expand_dims(reshape_h1, 1) #(?,1,1,64) reshape_h1 = tf.tile(reshape_h1, [1, h_h, h_e, 1]) #(?,30,5,64) reshape_h1 = tf.reshape(reshape_h1, [-1, w]) #(? * 30 * 5,64) reshape_h2 = tf.reshape(ent_emb, [-1, w_e]) #(? * 30 * 5,64) print reshape_h1.get_shape(), reshape_h2.get_shape() M = tf.tanh( tf.add(tf.matmul(reshape_h1, ent_W['Wqm']), tf.matmul(reshape_h2, ent_W['Wam']))) #(?,att) M = tf.matmul(M, ent_W['Wms']) #(?,1) S = tf.reshape(M, [-1, h_e]) #(?,5) S = tf.nn.softmax(S) S_diag = tf.matrix_diag(S) #(?,5,5) reshape_ent = tf.reshape(ent_emb, [-1, h_e, w_e]) #(?*30,5,64) attention_a = tf.matmul(S_diag, reshape_ent) #(?*30,5,64) attention_a = tf.reshape(attention_a, [-1, h_h, h_e, w_e]) #(?,30,5,64) #attention_a = tf.reshape(attention_a, [-1, h_h*h_e, w]) #(?,30,5,64) out2 = tf.reduce_mean(attention_a, axis=2) #output_a = self.avg_pooling(attention_a) #output_a = self.max_pooling(attention_a) #out = tf.reduce_mean(tf.concat([H, out2],2), axis=1) #return tf.tanh(out) return tf.concat([H, out2], 2), out2 return tf.concat([H, out2], 2) return out1, out2
def _expectation(p, kern1, feat1, kern2, feat2, nghp=None): """ Compute the expectation: expectation[n] = <Ka_{Z1, x_n} Kb_{x_n, Z2}>_p(x_n) - Ka_{.,.}, Kb_{.,.} :: Linear kernels Ka and Kb as well as Z1 and Z2 can differ from each other, but this is supported only if the Gaussian p is Diagonal (p.cov NxD) and Ka, Kb have disjoint active_dims in which case the joint expectations simplify into a product of expectations :return: NxMxM """ if kern1.on_separate_dims(kern2) and isinstance( p, DiagonalGaussian): # no joint expectations required eKxz1 = expectation(p, (kern1, feat1)) eKxz2 = expectation(p, (kern2, feat2)) return eKxz1[:, :, None] * eKxz2[:, None, :] if kern1 != kern2 or feat1 != feat2: raise NotImplementedError( "The expectation over two kernels has only an " "analytical implementation if both kernels are equal.") kern = kern1 feat = feat1 with params_as_tensors_for(kern), params_as_tensors_for(feat): # use only active dimensions Xcov = kern._slice_cov( tf.matrix_diag(p.cov) if isinstance(p, DiagonalGaussian) else p.cov ) Z, Xmu = kern._slice(feat.Z, p.mu) N = tf.shape(Xmu)[0] var_Z = kern.variance * Z tiled_Z = tf.tile(tf.expand_dims(var_Z, 0), (N, 1, 1)) # NxMxD XX = Xcov + tf.expand_dims(Xmu, 1) * tf.expand_dims(Xmu, 2) # NxDxD return tf.matmul(tf.matmul(tiled_Z, XX), tiled_Z, transpose_b=True)
def get_kl_terms(self,q_mu,q_sqrt): if self.white: alpha = tf.transpose(q_mu)[:,:,None] # DxMx1 else: alpha = tf.matrix_triangular_solve(self.Lu_tiled,tf.transpose(q_mu)[:,:,None], lower=True) # DxMxM * DxMx1 --> DxMx1 if self.q_diag: Lq = Lq_diag = q_sqrt # MxD Lq_full = tf.matrix_diag(tf.transpose(q_sqrt)) # DxMxM else: Lq = Lq_full = tf.matrix_band_part(q_sqrt, -1, 0) # force lower triangle # DxMxM Lq_diag = tf.transpose(tf.matrix_diag_part(Lq)) # MxD # Mahalanobis term: μqᵀ Σp⁻¹ μq mahalanobis = tf.reduce_sum(tf.square(alpha),axis=[1,2])[:,None] # Dx1 # Log-determinant of the covariance of q(x): logdet_qcov = tf.reduce_sum(tf.log(tf.square(Lq_diag)),axis=0)[:,None] # Dx1 # Trace term: tr(Σp⁻¹ Σq) if self.white: if self.q_diag: trace = tf.reduce_sum(tf.square(Lq),axis=0)[:,None] # MxD --> Dx1 else: trace = tf.reduce_sum(tf.square(Lq),axis=[1,2])[:,None] # DxMxM --> Dx1 else: LpiLq = tf.matrix_triangular_solve(self.Lu_tiled, Lq_full, lower=True) # DxMxM trace = tf.reduce_sum(tf.square(LpiLq),axis=[1,2])[:,None] # Dx1 # Log-determinant of the covariance of p(x): if not self.white: log_sqdiag_Lp = tf.stack([tf.log(tf.square(tf.matrix_diag_part(self.Lu_tiled[d]))) for d in range(self.num_outputs)],axis=0) #DxM logdet_pcov = tf.reduce_sum(log_sqdiag_Lp,axis=1)[:,None] #Dx1 else: logdet_pcov = 0 return logdet_pcov, logdet_qcov, mahalanobis, trace
def _expectation(p, kern, feat, none1, none2, nghp=None): """ Compute the expectation: <K_{X, Z}>_p(X) - K_{.,.} :: RBF kernel :return: NxM """ with params_as_tensors_for(kern), params_as_tensors_for(feat): # use only active dimensions Xcov = kern._slice_cov(p.cov) Z, Xmu = kern._slice(feat.Z, p.mu) D = tf.shape(Xmu)[1] if kern.ARD: lengthscales = kern.lengthscales else: lengthscales = tf.zeros( (D, ), dtype=settings.tf_float) + kern.lengthscales chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales**2) + Xcov) # NxDxD all_diffs = tf.transpose(Z) - tf.expand_dims(Xmu, 2) # NxDxM exponent_mahalanobis = tf.matrix_triangular_solve(chol_L_plus_Xcov, all_diffs, lower=True) # NxDxM exponent_mahalanobis = tf.reduce_sum(tf.square(exponent_mahalanobis), 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp( tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N return kern.variance * (determinants[:, None] * exponent_mahalanobis)
def call(self, x, mask=None): if not self.built: raise Exception("Secondary stat layer not built") logging.debug('Secondary_stat parameter', type(x)) # Confirm the type of x is indeed tensor4D cov_mat, x_mean = self.calculate_pre_cov(x) # print('call during second {}'.format(self.eps)) # cov_mat += self.eps * self.b if self.robust: """ Implement the robust estimate, by apply an elementwise function to it. """ if K.backend() != 'tensorflow': raise RuntimeError("Not support for theano now") import tensorflow as tf # with tf.device('/cpu:0'): s, u = tf.self_adjoint_eig(cov_mat) comp = tf.zeros_like(s) s = tf.where(tf.less(s, comp), comp, s) # s = tf.Print(s, [s], message='s:', summarize=self.out_dim) inner = robust_estimate_eigenvalues(s, alpha=self.cov_alpha) inner = tf.identity(inner, 'RobustEigen') # inner = tf.Print(inner, [inner], message='inner:', summarize=self.out_dim) cov_mat = tf.matmul(u, tf.matmul(tf.matrix_diag(inner), tf.transpose(u, [0, 2, 1]))) if self.cov_mode == 'mean' or self.cov_mode == 'pmean': # Encode mean into Cov mat. addition_array = K.mean(x_mean, axis=1, keepdims=True) addition_array /= addition_array # Make it 1 if self.cov_mode == 'pmean': x_mean = self.mean_p * x_mean new_cov = K.concatenate( [cov_mat + K.batch_dot(x_mean, K.permute_dimensions(x_mean, (0, 2, 1))), x_mean]) else: new_cov = K.concatenate([cov_mat, x_mean]) tmp = K.concatenate([K.permute_dimensions(x_mean, (0, 2, 1)), addition_array]) new_cov = K.concatenate([new_cov, tmp], axis=1) cov_mat = K.identity(new_cov, 'final_cov_mat') return cov_mat
def _expectation(p, mean, none, kern, feat, nghp=None): """ Compute the expectation: expectation[n] = <x_{n+1} K_{x_n, Z}>_p(x_{n:n+1}) - K_{.,.} :: RBF kernel - p :: MarkovGaussian distribution (p.cov 2x(N+1)xDxD) :return: NxDxM """ Xmu, Xcov = p.mu, p.cov with tf.control_dependencies([tf.assert_equal( tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int), message="Currently cannot handle slicing in exKxz.")]): Xmu = tf.identity(Xmu) with params_as_tensors_for(kern, feat): D = tf.shape(Xmu)[1] lengthscales = kern.lengthscales if kern.ARD \ else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov[0, :-1]) # NxDxD all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu[:-1], 2) # NxDxM sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs) # NxDxM non_exponent_term = tf.matmul(Xcov[1, :-1], exponent_mahalanobis, transpose_a=True) non_exponent_term = tf.expand_dims(Xmu[1:], 2) + non_exponent_term # NxDxM exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
def get_feature(input_q, input_a, att_W,index): h_q, w = int(input_q.get_shape()[1]), int(input_q.get_shape()[2]) h_a = int(input_a.get_shape()[1]) output_q = max_pooling(input_q) reshape_q = tf.expand_dims(output_q, 1) reshape_q = tf.tile(reshape_q, [1, h_a, 1]) reshape_q = tf.reshape(reshape_q, [-1, w]) reshape_a = tf.reshape(input_a, [-1, w]) M = tf.tanh(tf.add(tf.matmul(reshape_q, tf.squeeze(att_W['Wqm'][:,:,index])), tf.matmul(reshape_a, tf.squeeze(att_W['Wam'][:,:,index])))) M = tf.matmul(M, tf.expand_dims(att_W['Wms'][:,index],-1)) S = tf.reshape(M, [-1, h_a]) S = tf.nn.softmax(S) S_diag = tf.matrix_diag(S) attention_a = tf.matmul(S_diag, input_a) attention_a = tf.reshape(attention_a, [-1, h_a, w]) output_a = max_pooling(attention_a) return tf.tanh(output_q), tf.tanh(output_a)
def _attention_module(self, query, key, value, unit, in_kp): with tf.variable_scope('attention', reuse=True): query = tf.layers.dense( query, unit, name='qk_map', activation=tf.nn.relu, use_bias=False, kernel_initializer=self.u_init, reuse=tf.AUTO_REUSE, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003)) query = tf.nn.dropout(query, in_kp) key = tf.layers.dense( key, self.emb_size, name='qk_map', activation=tf.nn.relu, use_bias=False, kernel_initializer=self.u_init, reuse=tf.AUTO_REUSE, kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003)) key = tf.nn.dropout(key, in_kp) score = tf.matmul(query, tf.transpose(key, [0, 2, 1])) / math.sqrt( self.emb_size) # [B,T,T] #masks the diagonal of the affinity matrix a_mask = tf.ones([tf.shape(score)[1], tf.shape(score)[2]]) a_mask = a_mask - tf.matrix_diag(tf.ones([tf.shape(score)[1]])) a_mask = tf.expand_dims(a_mask, [0]) a_mask = tf.tile(a_mask, [tf.shape(score)[0], 1, 1]) score *= a_mask score = tf.nn.softmax(score, axis=2) output = tf.matmul(score, value) return output
def get_g(self, h_stack, x_t): """ calculate y_t ~ g(*|h_t-1, x_t) h_stack.shape = (n_particles, batch_size, Dh) x_t.shape = (n_particles, batch_size, Dx) """ x_t_ft = self.get_x_ft(x_t) # x_t_ft.shape = (n_particles, batch_size, Dx_1) with tf.variable_scope(self.variable_scope + '/get_g'): h_x_concat = tf.concat((h_stack, x_t_ft), axis = 2, name = 'h_x_concat') mu = fully_connected(h_x_concat, self.Dy, weights_initializer=xavier_initializer(uniform=False), activation_fn = None, reuse = tf.AUTO_REUSE, scope = "mu") # mu.shape = (n_paticles, batch_size, Dx) sigma = fully_connected(h_x_concat, self.Dy, weights_initializer=xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.6), activation_fn = tf.nn.softplus, reuse = tf.AUTO_REUSE, scope = "sigma") + self.sigma_cons # sigma.shape = (n_paticles, batch_size, Dx) g = tfd.MultivariateNormalFullCovariance(loc = mu, covariance_matrix = tf.matrix_diag(sigma), name = "g") return g
def build_likelihood(self, Z, kern, kern_t, give_KL=True): """ :param Z: inducing points :param kern: kernel for the q(X) :param kern_t: kernel for the p(X) :param give_KL: :return: """ # "The Dynamical Variational GP-LVM for Sequence Data" part in sec 3.3 of Andreas Damianou's Phd thesis. ######################################### Kxx = kern_t.K(self.t) + tf.eye(self.num_data, dtype=float_type) * 1e-6 # N x N, prior covariance for p(X) Lx = tf.cholesky(Kxx) Lambda = tf.matrix_diag(tf.transpose(self.X_variational_var)) # Q x N x N, prior covariance for q(X) tmp = tf.eye(self.num_data, dtype=float_type) + tf.einsum('ijk,kl->ijl', tf.einsum('ij,kil->kjl', Lx, Lambda), Lx) # I + Lx^T x Lambda x Lx in batch mode Ltmp = tf.cholesky(tmp) # Q x N x N tmp2 = tf.matrix_triangular_solve(Ltmp, tf.tile(tf.expand_dims(tf.transpose(Lx), 0), tf.stack([self.num_latent, 1, 1]))) S_full = tf.einsum('ijk,ijl->ikl', tmp2, tmp2) # Q x N x N S = tf.transpose(tf.matrix_diag_part(S_full)) # N x Q, marginal distribution of multivariate normal, from column-wise to row-wise. mu = tf.matmul(Kxx, self.X_variational_mean) # N x Q ########################################## psi0 = tf.reduce_sum(kern.eKdiag(mu, S), 0) # N psi1 = kern.eKxz(Z, mu, S) # N x M psi2 = tf.reduce_sum(kern.eKzxKxz(Z, mu, S), 0) # N x M x M # compute the KL[q(X) || p(X)] NQ = tf.cast(tf.size(mu), float_type) if give_KL: KL = -0.5 * NQ KL += tf.reduce_sum(tf.log(tf.matrix_diag_part(Ltmp))) # trace tricks KL += 0.5 * tf.reduce_sum(tf.trace(tf.cholesky_solve(tf.tile(tf.expand_dims(Lx, 0), tf.stack([self.num_latent, 1, 1])) , S_full + tf.einsum('ji,ki->ijk', mu, mu)))) return KL, psi0, psi1, psi2 else: return psi0, psi1, psi2
def build_predict_fs(self, Xnew, full_cov=False): """ The posterior variance of F is given by q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1) Here we project this to F*, the values of the GP at Xnew which is given by q(F*) = N ( F* | K_{*F} alpha + mean, K_{**} - K_{*f}[K_{ff} + diag(lambda**-2)]^-1 K_{f*} ) """ f_means, f_vars = [], [] for d in range(self.D): # compute kernel things Kx = self.kerns[d].K(self.X, Xnew) K = self.kerns[d].K(self.X) # predictive mean f_mean = tf.matmul(Kx, self.q_alpha[d, :, :], transpose_a=True) + self.mean_functions[d](Xnew) # predictive var A = K + tf.matrix_diag( tf.transpose(1. / tf.square(self.q_lambda[d, :, :]))) L = tf.cholesky(A) Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent.value, 1, 1]) LiKx = tf.matrix_triangular_solve(L, Kx_tiled) if full_cov: f_var = self.kerns[d].K(Xnew) - tf.matmul( LiKx, LiKx, transpose_a=True) else: f_var = self.kerns[d].Kdiag(Xnew) - tf.reduce_sum( tf.square(LiKx), 1) f_means.append(f_mean) f_vars.append(tf.transpose(f_var)) return tf.stack(f_means), tf.stack(f_vars)