def testInvalidShapeAtEval(self): with self.test_session(use_gpu=self._use_gpu): v = tf.placeholder(dtype=tf.float32) with self.assertRaisesOpError("input must be at least 2-dim"): tf.batch_matrix_diag_part(v).eval(feed_dict={v: 0.0}) with self.assertRaisesOpError("last two dimensions must be equal"): tf.batch_matrix_diag_part(v).eval( feed_dict={v: [[0, 1], [1, 0], [0, 0]]})
def testSample(self): with self.test_session(): scale = make_pd(1., 2) df = 4 chol_w = distributions.WishartCholesky( df, chol(scale), cholesky_input_output_matrices=False) x = chol_w.sample_n(1, seed=42).eval() chol_x = [chol(x[0])] full_w = distributions.WishartFull( df, scale, cholesky_input_output_matrices=False) self.assertAllClose(x, full_w.sample_n(1, seed=42).eval()) chol_w_chol = distributions.WishartCholesky( df, chol(scale), cholesky_input_output_matrices=True) self.assertAllClose(chol_x, chol_w_chol.sample_n(1, seed=42).eval()) eigen_values = tf.batch_matrix_diag_part( chol_w_chol.sample_n(1000, seed=42)) np.testing.assert_array_less(0., eigen_values.eval()) full_w_chol = distributions.WishartFull( df, scale, cholesky_input_output_matrices=True) self.assertAllClose(chol_x, full_w_chol.sample_n(1, seed=42).eval()) eigen_values = tf.batch_matrix_diag_part( full_w_chol.sample_n(1000, seed=42)) np.testing.assert_array_less(0., eigen_values.eval()) # Check first and second moments. df = 4. chol_w = distributions.WishartCholesky( df=df, scale=chol(make_pd(1., 3)), cholesky_input_output_matrices=False) x = chol_w.sample_n(10000, seed=42) self.assertAllEqual((10000, 3, 3), x.get_shape()) moment1_estimate = tf.reduce_mean(x, reduction_indices=[0]).eval() self.assertAllClose(chol_w.mean().eval(), moment1_estimate, rtol=0.05) # The Variance estimate uses the squares rather than outer-products # because Wishart.Variance is the diagonal of the Wishart covariance # matrix. variance_estimate = ( tf.reduce_mean(tf.square(x), reduction_indices=[0]) - tf.square(moment1_estimate)).eval() self.assertAllClose(chol_w.variance().eval(), variance_estimate, rtol=0.05)
def gauss_kl_white(q_mu, q_sqrt): """ Compute the KL divergence from q(x) = N(q_mu, q_sqrt^2) to p(x) = N(0, I) We assume multiple independent distributions, given by the columns of q_mu and the last dimension of q_sqrt. q_mu is a matrix, each column contains a mean q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root matrix of the covariance. """ KL = 0.5 * tf.reduce_sum(tf.square(q_mu)) # Mahalanobis term KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), tf.float64) # constant term L = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # force lower triangle KL -= 0.5 * tf.reduce_sum(tf.log(tf.square( tf.batch_matrix_diag_part(L)))) # logdet KL += 0.5 * tf.reduce_sum(tf.square(L)) # Trace term. return KL
def gauss_kl(q_mu, q_sqrt, K): """ Compute the KL divergence from q(x) = N(q_mu, q_sqrt^2) to p(x) = N(0, K) We assume multiple independent distributions, given by the columns of q_mu and the last dimension of q_sqrt. q_mu is a matrix, each column contains a mean. q_sqrt is a 3D tensor, each matrix within is a lower triangular square-root matrix of the covariance of q. K is a positive definite matrix: the covariance of p. """ L = tf.cholesky(K) alpha = tf.matrix_triangular_solve(L, q_mu, lower=True) KL = 0.5 * tf.reduce_sum(tf.square(alpha)) # Mahalanobis term. num_latent = tf.cast(tf.shape(q_sqrt)[2], tf.float64) KL += num_latent * 0.5 * tf.reduce_sum(tf.log(tf.square( tf.diag_part(L)))) # Prior log-det term. KL += -0.5 * tf.cast(tf.reduce_prod(tf.shape(q_sqrt)[1:]), tf.float64) # constant term Lq = tf.batch_matrix_band_part(tf.transpose(q_sqrt, (2, 0, 1)), -1, 0) # force lower triangle KL += -0.5 * tf.reduce_sum(tf.log(tf.square( tf.batch_matrix_diag_part(Lq)))) # logdet L_tiled = tf.tile(tf.expand_dims(L, 0), tf.pack([tf.shape(Lq)[0], 1, 1])) LiLq = tf.batch_matrix_triangular_solve(L_tiled, Lq, lower=True) KL += 0.5 * tf.reduce_sum(tf.square(LiLq)) # Trace term return KL
def testMatrix(self): with self.test_session(use_gpu=self._use_gpu): v = np.array([1.0, 2.0, 3.0]) mat = np.diag(v) mat_diag = tf.batch_matrix_diag_part(mat) self.assertEqual((3,), mat_diag.get_shape()) self.assertAllEqual(mat_diag.eval(), v)
def testMatrix(self): with self.test_session(use_gpu=self._use_gpu): v = np.array([1.0, 2.0, 3.0]) mat = np.diag(v) mat_diag = tf.batch_matrix_diag_part(mat) self.assertEqual((3, ), mat_diag.get_shape()) self.assertAllEqual(mat_diag.eval(), v)
def testGrad(self): shapes = ((3, 3), (5, 3, 3)) with self.test_session(use_gpu=self._use_gpu): for shape in shapes: x = tf.constant(np.random.rand(*shape), dtype=np.float32) y = tf.batch_matrix_diag_part(x) error = tf.test.compute_gradient_error(x, x.get_shape().as_list(), y, y.get_shape().as_list()) self.assertLess(error, 1e-4)
def vec2lower_triangle(vec, dim): """ Convert a vector M of size (n * m) into a matrix of shape (n, m) [[e^M[0], 0, 0, ..., 0] [M[n-1], e^M[n], 0, 0, ..., 0] [M[2n-1], M[2n], e^M[2n+1], 0, ..., 0] ... [M[m(n-1)], M[m(n-1)+1], ..., M[mn-2], e^M[mn-1]] """ L = tf.reshape(vec, [-1, dim, dim]) if int(tf.__version__.split('.')[1]) >= 10: L = tf.matrix_band_part(L, -1, 0) - tf.matrix_diag( tf.matrix_diag_part(L)) + tf.matrix_diag( tf.exp(tf.matrix_diag_part(L))) else: L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag( tf.batch_matrix_diag_part(L)) + tf.batch_matrix_diag( tf.exp(tf.batch_matrix_diag_part(L))) return L
def testBatchMatrix(self): with self.test_session(use_gpu=self._use_gpu): v_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) mat_batch = np.array([[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]) self.assertEqual(mat_batch.shape, (2, 3, 3)) mat_batch_diag = tf.batch_matrix_diag_part(mat_batch) self.assertEqual((2, 3), mat_batch_diag.get_shape()) self.assertAllEqual(mat_batch_diag.eval(), v_batch)
def testBatchMatrix(self): with self.test_session(use_gpu=self._use_gpu): v_batch = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) mat_batch = np.array( [[[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]], [[4.0, 0.0, 0.0], [0.0, 5.0, 0.0], [0.0, 0.0, 6.0]]]) self.assertEqual(mat_batch.shape, (2, 3, 3)) mat_batch_diag = tf.batch_matrix_diag_part(mat_batch) self.assertEqual((2, 3), mat_batch_diag.get_shape()) self.assertAllEqual(mat_batch_diag.eval(), v_batch)
def _sample(self, N): """ :param integer N: number of samples :Returns samples picked from the variational posterior. The Kulback_leibler divergence is stored as self._KL """ n = self.num_data R = self.num_latent # Match dimension of the posterior variance to the data. if self.q_diag: sqrt = tf.batch_matrix_diag(tf.transpose(self.q_sqrt)) # [R,n,n] else: sqrt = tf.batch_matrix_band_part( tf.transpose(self.q_sqrt,[2,0,1]), -1, 0) # [R,n,n] # Log determinant of matrix S = q_sqrt * q_sqrt^T logdet_S = tf.cast(N, float_type)*tf.reduce_sum( tf.log(tf.square(tf.batch_matrix_diag_part(sqrt)))) sqrt = tf.tile(tf.expand_dims(sqrt, 1), [1,N,1,1]) # [R,N,n,n] # noraml random samples, [R,N,n,1] v_samples = tf.random_normal([R,N,n,1], dtype=float_type) # Match dimension of the posterior mean, [R,N,n,1] mu = tf.tile(tf.expand_dims(tf.expand_dims( tf.transpose(self.q_mu), 1), -1), [1,N,1,1]) u_samples = mu + tf.batch_matmul(sqrt, v_samples) # Stochastic approximation of the Kulback_leibler KL[q(f)||p(f)] self._KL = - 0.5 * logdet_S\ - 0.5 * tf.reduce_sum(tf.square(v_samples)) \ + 0.5 * tf.reduce_sum(tf.square(u_samples)) # Cholesky factor of kernel [R,N,n,n] L = tf.tile(tf.expand_dims( tf.transpose(self.kern.Cholesky(self.X), [2,0,1]),1), [1,N,1,1]) # mean, sized [N,n,R] mean = tf.tile(tf.expand_dims( self.mean_function(self.X), 0), [N,1,1]) # sample from posterior, [N,n,R] f_samples = tf.transpose( tf.squeeze(tf.batch_matmul(L, u_samples),[-1]), # [R,N,n] [1,2,0]) + mean # return as Dict to deal with return f_samples
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.batch_cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum(tf.log( tf.batch_matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square(tf.batch_matrix_triangular_solve( cholesky, tf.transpose(diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def _define_full_covariance_probs(self, shard_id, shard): """Defines the full covariance probabilties per example in a class. Updates a matrix with dimension num_examples X num_classes. Args: shard_id: id of the current shard. shard: current data shard, 1 X num_examples X dimensions. """ diff = shard - self._means cholesky = tf.cholesky(self._covs + self._min_var) log_det_covs = 2.0 * tf.reduce_sum( tf.log(tf.batch_matrix_diag_part(cholesky)), 1) x_mu_cov = tf.square( tf.matrix_triangular_solve(cholesky, tf.transpose(diff, perm=[0, 2, 1]), lower=True)) diag_m = tf.transpose(tf.reduce_sum(x_mu_cov, 1)) self._probs[shard_id] = -0.5 * ( diag_m + tf.to_float(self._dimensions) * tf.log(2 * np.pi) + log_det_covs)
def build_likelihood(self): """ q_alpha, q_lambda are variational parameters, size N x R This method computes the variational lower bound on the likelihood, which is: E_{q(F)} [ \log p(Y|F) ] - KL[ q(F) || p(F)] with q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) . """ K = self.kern.K(self.X) K_alpha = tf.matmul(K, self.q_alpha) f_mean = K_alpha + self.mean_function(self.X) # compute the variance for each of the outputs I = tf.tile(tf.expand_dims(eye(self.num_data), 0), [self.num_latent, 1, 1]) A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \ tf.expand_dims(tf.transpose(self.q_lambda), 2) * K L = tf.batch_cholesky(A) Li = tf.batch_matrix_triangular_solve(L, I) tmp = Li / tf.transpose(self.q_lambda) f_var = 1./tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1)) # some statistics about A are used in the KL A_logdet = 2.0 * tf.reduce_sum(tf.log(tf.batch_matrix_diag_part(L))) trAi = tf.reduce_sum(tf.square(Li)) KL = 0.5 * (A_logdet + trAi - self.num_data * self.num_latent + tf.reduce_sum(K_alpha*self.q_alpha)) v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y) return tf.reduce_sum(v_exp) - KL
def testInvalidShape(self): with self.assertRaisesRegexp(ValueError, "must have rank at least 2"): tf.batch_matrix_diag_part(0) with self.assertRaisesRegexp(ValueError, r"Dimensions .* not compatible"): tf.batch_matrix_diag_part([[0, 1], [1, 0], [0, 0]])
# Declare k-value and batch size k = 4 batch_size=len(x_vals_test) # Placeholders x_data_train = tf.placeholder(shape=[None, num_features], dtype=tf.float32) x_data_test = tf.placeholder(shape=[None, num_features], dtype=tf.float32) y_target_train = tf.placeholder(shape=[None, 1], dtype=tf.float32) y_target_test = tf.placeholder(shape=[None, 1], dtype=tf.float32) # Declare weighted distance metric # Weighted - L2 = sqrt((x-y)^T * A * (x-y)) subtraction_term = tf.subtract(x_data_train, tf.expand_dims(x_data_test,1)) first_product = tf.matmul(subtraction_term, tf.tile(tf.expand_dims(weight_matrix,0), [batch_size,1,1])) second_product = tf.matmul(first_product, tf.transpose(subtraction_term, perm=[0,2,1])) distance = tf.sqrt(tf.batch_matrix_diag_part(second_product)) # Predict: Get min distance index (Nearest neighbor) top_k_xvals, top_k_indices = tf.nn.top_k(tf.negative(distance), k=k) x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1),1) x_sums_repeated = tf.matmul(x_sums,tf.ones([1, k], tf.float32)) x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1) top_k_yvals = tf.gather(y_target_train, top_k_indices) prediction = tf.squeeze(tf.matmul(x_val_weights,top_k_yvals), axis=[1]) # Calculate MSE mse = tf.div(tf.reduce_sum(tf.square(tf.subtract(prediction, y_target_test))), batch_size) # Calculate how many loops over training data num_loops = int(np.ceil(len(x_vals_test)/batch_size))
def testInvalidShape(self): with self.assertRaisesRegexp(ValueError, "must be at least rank 2"): tf.batch_matrix_diag_part(0) with self.assertRaisesRegexp(ValueError, r"Dimensions must be equal"): tf.batch_matrix_diag_part([[0, 1], [1, 0], [0, 0]])
def vec2trimat(vec, dim): L = tf.reshape(vec, [-1, dim, dim]) L = tf.batch_matrix_band_part(L, -1, 0) - tf.batch_matrix_diag(tf.batch_matrix_diag_part(L)) + \ tf.batch_matrix_diag(tf.exp(tf.batch_matrix_diag_part(L))) return L
Out[6]: array([1, 2, 3, 1, 2, 3], dtype=int32) sess.run(tp1) Out[7]: array([[1, 2, 3, 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6, 4, 5, 6], [1, 2, 3, 1, 2, 3, 1, 2, 3], [4, 5, 6, 4, 5, 6, 4, 5, 6]], dtype=int32) """ # Declare weighted distance metric # Weighted - L2 = sqrt((x-y)^T * A * (x-y)) subtraction_term = tf.sub(x_data_train, tf.expand_dims(x_data_test,1)) first_product = tf.batch_matmul(subtraction_term, tf.tile(tf.expand_dims(weight_matrix,0), [batch_size,1,1])) second_product = tf.batch_matmul(first_product, tf.transpose(subtraction_term, perm=[0,2,1])) distance = tf.sqrt(tf.batch_matrix_diag_part(second_product)) # Predict: Get min distance index (Nearest neighbor) top_k_xvals, top_k_indices = tf.nn.top_k(tf.neg(distance), k=k) x_sums = tf.expand_dims(tf.reduce_sum(top_k_xvals, 1),1) x_sums_repeated = tf.matmul(x_sums,tf.ones([1, k], tf.float32)) x_val_weights = tf.expand_dims(tf.div(top_k_xvals,x_sums_repeated), 1) top_k_yvals = tf.gather(y_target_train, top_k_indices) prediction = tf.squeeze(tf.batch_matmul(x_val_weights,top_k_yvals), squeeze_dims=[1]) # Calculate MSE mse = tf.div(tf.reduce_sum(tf.square(tf.sub(prediction, y_target_test))), batch_size) # Calculate how many loops over training data num_loops = int(np.ceil(len(x_vals_test)/batch_size))