def testTTMatTimesTTMatBroadcasting(self): # Multiply a batch of TT-matrices by another batch of TT-matrices with # broadcasting. left_shape = (2, 3) sum_shape = (4, 3) right_shape = (4, 4) with self.test_session() as sess: tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), tt_rank=3, batch_size=3, dtype=self.dtype) tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape), dtype=self.dtype) # TT-batch by one element TT-batch res_actual = ops.matmul(tt_mat_1, tt_mat_2) res_actual = ops.full(res_actual) # TT by TT-batch. res_actual2 = ops.matmul(ops.transpose(tt_mat_2[0]), ops.transpose(tt_mat_1)) res_actual2 = ops.full(ops.transpose(res_actual2)) res_desired = tf.einsum('oij,jk->oik', ops.full(tt_mat_1), ops.full(tt_mat_2[0])) to_run = [res_actual, res_actual2, res_desired] res_actual_val, res_actual2_val, res_desired_val = sess.run(to_run) self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5) self.assertAllClose(res_actual2_val, res_desired_val, atol=1e-5, rtol=1e-5)
def testHessianVectorProduct(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) z = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) projected_vector = riemannian.project(z, x) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 # Grad: <x, w> w # Hessian: w w.T # Hessian by vector: w <w, P_x z> desired1 = riemannian.project(w * ops.flat_inner(projected_vector, w), x) desired1 = ops.full(desired1) self._TestSingleHessianByVector(func1, x, z, desired1) def func2(x): return ops.bilinear_form(A, x, x) # Hessian of <x, Ax> is A + A.T hessian_by_vector = ops.matmul(ops.transpose(A) + A, projected_vector) desired2 = ops.full(riemannian.project(hessian_by_vector, x)) self._TestSingleHessianByVector(func1, x, z, desired1) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient or # hessian. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.hessian_vector_product(func3, x, z)) self.evaluate(actual3)
def testProjectMatmul(self): # Project a TT-matrix times TT-vector on a TT-vector. tt_mat = initializers.random_matrix(((2, 3, 4), (2, 3, 4))) tt_vec_what = initializers.random_matrix_batch(((2, 3, 4), None), batch_size=3) tt_vec_where = initializers.random_matrix(((2, 3, 4), None)) proj = riemannian.project_matmul(tt_vec_what, tt_vec_where, tt_mat) matvec = ops.matmul(tt_mat, tt_vec_what) proj_desired = riemannian.project(matvec, tt_vec_where) with self.test_session() as sess: actual_val, desired_val = sess.run((ops.full(proj), ops.full(proj_desired))) self.assertAllClose(desired_val, actual_val, atol=1e-5, rtol=1e-5)
def testUnknownRanksTTMatmul(self): # Tests tt_tt_matmul for matrices with unknown ranks K_1 = tf.placeholder(self.dtype, (1, 2, 2, None)) K_2 = tf.placeholder(self.dtype, (None, 3, 3, 1)) tt_mat = TensorTrain([K_1, K_2]) res_actual = ops.full(ops.matmul(tt_mat, tt_mat)) res_desired = tf.matmul(ops.full(tt_mat), ops.full(tt_mat)) np.random.seed(1) K_1_val = np.random.rand(1, 2, 2, 2) K_2_val = np.random.rand(2, 3, 3, 1) with self.test_session() as sess: res_actual_val = sess.run(res_actual, {K_1: K_1_val, K_2: K_2_val}) res_desired_val = sess.run(res_desired, {K_1: K_1_val, K_2: K_2_val}) self.assertAllClose(res_desired_val, res_actual_val)
def testTTMatTimesDenseVec(self): # Multiply a TT-matrix by a dense vector. inp_shape = (2, 3, 4) out_shape = (3, 4, 3) np.random.seed(1) vec = np.random.rand(np.prod(inp_shape), 1).astype(np.float32) with self.test_session() as sess: tf_vec = tf.constant(vec) tf.set_random_seed(1) tt_mat = initializers.random_matrix((out_shape, inp_shape)) res_actual = ops.matmul(tt_mat, tf_vec) res_desired = tf.matmul(ops.full(tt_mat), tf_vec) res_actual_val, res_desired_val = sess.run( [res_actual, res_desired]) self.assertAllClose(res_actual_val, res_desired_val)
def testDenseMatTimesTTVec(self): # Multiply a TT-matrix by a dense vector. inp_shape = (3, 3, 3, 3) out_shape = (3, 3, 3, 3) np.random.seed(1) mat = np.random.rand(np.prod(out_shape), np.prod(inp_shape)) mat = mat.astype(self.dtype.as_numpy_dtype) with self.test_session() as sess: tf_mat = tf.constant(mat) tf.set_random_seed(1) tt_vec = initializers.random_matrix((inp_shape, None), dtype=self.dtype) res_actual = ops.matmul(tf_mat, tt_vec) res_desired = tf.matmul(tf_mat, ops.full(tt_vec)) res_actual_val, res_desired_val = sess.run([res_actual, res_desired]) self.assertAllClose(res_actual_val, res_desired_val, atol=1e-4, rtol=1e-4)
def testTTMatTimesTTMat(self): # Multiply a TT-matrix by another TT-matrix. left_shape = (2, 3, 4) sum_shape = (4, 3, 5) right_shape = (4, 4, 4) with self.test_session() as sess: tt_mat_1 = initializers.random_matrix((left_shape, sum_shape), tt_rank=3, dtype=self.dtype) tt_mat_2 = initializers.random_matrix((sum_shape, right_shape), dtype=self.dtype) res_actual = ops.matmul(tt_mat_1, tt_mat_2) res_actual = ops.full(res_actual) res_desired = tf.matmul(ops.full(tt_mat_1), ops.full(tt_mat_2)) res_actual_val, res_desired_val = sess.run([res_actual, res_desired]) # TODO: why so bad accuracy? self.assertAllClose(res_actual_val, res_desired_val, atol=1e-4, rtol=1e-4)
def testTTMatTimesDenseVec(self): # Multiply a TT-matrix by a dense vector. inp_shape = (2, 3, 4) out_shape = (3, 4, 3) np.random.seed(1) vec = np.random.rand(np.prod(inp_shape), 1).astype(self.dtype.as_numpy_dtype) tf_vec = tf.constant(vec) tf.compat.v1.set_random_seed(1) tt_mat = initializers.random_matrix((out_shape, inp_shape), dtype=self.dtype) res_actual = ops.matmul(tt_mat, tf_vec) res_desired = tf.matmul(ops.full(tt_mat), tf_vec) res_actual_val, res_desired_val = self.evaluate( [res_actual, res_desired]) self.assertAllClose(res_actual_val, res_desired_val)
def testHalfKnownRanksTTMatmul(self): # Tests tt_tt_matmul for the case when one matrice has known ranks # and the other one doesn't np.random.seed(1) K_1 = tf.placeholder(self.dtype, (1, 2, 2, None)) K_2 = tf.placeholder(self.dtype, (None, 3, 3, 1)) tt_mat_known_ranks = TensorTrain([K_1, K_2], tt_ranks=[1, 3, 1]) tt_mat = TensorTrain([K_1, K_2]) res_actual = ops.full(ops.matmul(tt_mat_known_ranks, tt_mat)) res_desired = tf.matmul(ops.full(tt_mat_known_ranks), ops.full(tt_mat)) np.random.seed(1) K_1_val = np.random.rand(1, 2, 2, 3) K_2_val = np.random.rand(3, 3, 3, 1) with self.test_session() as sess: res_actual_val = sess.run(res_actual, {K_1: K_1_val, K_2: K_2_val}) res_desired_val = sess.run(res_desired, {K_1: K_1_val, K_2: K_2_val}) self.assertAllClose(res_desired_val, res_actual_val)
def testTTMatTimesTTMatSameBatchSize(self): # Multiply a batch of TT-matrices by another batch of TT-matrices with the # same batch sizes. left_shape = (2, 3) sum_shape = (4, 3) right_shape = (4, 4) with self.test_session() as sess: tt_mat_1 = initializers.random_matrix_batch((left_shape, sum_shape), tt_rank=3, batch_size=3, dtype=self.dtype) tt_mat_2 = initializers.random_matrix_batch((sum_shape, right_shape), batch_size=3, dtype=self.dtype) res_actual = ops.matmul(tt_mat_1, tt_mat_2) res_actual = ops.full(res_actual) res_desired = tf.matmul(ops.full(tt_mat_1), ops.full(tt_mat_2)) res_actual_val, res_desired_val = sess.run([res_actual, res_desired]) # TODO: why so bad accuracy? self.assertAllClose(res_actual_val, res_desired_val, atol=1e-5, rtol=1e-5)
def testGradients(self): w = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) A = initializers.random_matrix(([5] * 3, [5] * 3), dtype=self.dtype) x = initializers.random_matrix(([5] * 3, None), dtype=self.dtype) def func1(x): return 0.5 * ops.flat_inner(x, w) ** 2 desired1 = ops.full(riemannian.project(w, x) * ops.flat_inner(x, w)) self._TestSingleGradient(func1, x, desired1) def func2(x): return ops.bilinear_form(A, x, x) grad = ops.matmul(ops.transpose(A) + A, x) desired2 = ops.full(riemannian.project(grad, x)) self._TestSingleGradient(func2, x, desired2) def func3(x): # A function which is not invariant to different representations of the # same tensor, i.e. it does not even have a Riemannian gradient. return tf.add_n([tf.reduce_sum(c) for c in x.tt_cores]) ** 2 with self.assertRaises(tf.errors.InvalidArgumentError): actual3 = ops.full(autodiff.gradients(func3, x)) self.evaluate(actual3)