def test_add_mul_mix_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3]) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) x4 = ad.Variable(name="x4", shape=[3]) y = ad.sum(x1 + x2 * x3 * x4) grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4]) x1_val = 1 * T.ones(3) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) x4_val = 4 * T.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run( feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val, x4: x4_val }) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(x1_val + x2_val * x3_val * x4_val)) assert T.array_equal(grad_x1_val, T.ones_like(x1_val)) assert T.array_equal(grad_x2_val, x3_val * x4_val) assert T.array_equal(grad_x3_val, x2_val * x4_val) assert T.array_equal(grad_x4_val, x2_val * x3_val)
def test_einsum(): for datatype in backends: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_sub_two_vars(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) y = ad.sum(x2 - x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(x2_val - x3_val)) assert T.array_equal(grad_x2_val, T.ones_like(x2_val)) assert T.array_equal(grad_x3_val, -T.ones_like(x3_val))
def test_negative(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) y = ad.sum(-x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * T.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(-x2_val)) assert T.array_equal(grad_x2_val, -T.ones_like(x2_val))
def test_jacobian_summation_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) x_sum = ad.einsum('ij->', x) grad_x, = ad.jacobians(x_sum, [x]) executor = ad.Executor([x_sum, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) x_sum_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_x_sum_val = T.sum(x_val) expected_grad_x_val = T.ones_like(x_val) assert T.array_equal(x_sum_val, expected_x_sum_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_summation_einsum_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) y = ad.Variable(name="y", shape=[2, 2]) out = ad.sum(ad.einsum('ij,ab->ab', x, y)) grad_x, = ad.gradients(out, [x]) executor = ad.Executor([out, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) y_val = T.tensor([[5., 6.], [7., 8.]]) out_val, grad_x_val = executor.run(feed_dict={x: x_val, y: y_val}) expected_out_val = T.sum(T.einsum('ij,ab->ab', x_val, y_val)) expected_grad_x_val = T.sum(y_val) * T.ones_like(x_val) assert T.array_equal(out_val, expected_out_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_transpose_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 2]) y = ad.sum(ad.einsum("ij->ji", x)) grad_x, = ad.gradients(y, [x]) executor = ad.Executor([y, grad_x]) x_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 y_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_yval = T.sum(T.transpose(x_val)) expected_grad_x_val = T.ones_like(x_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_einsum_3op(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) x4 = ad.Variable(name="x4", shape=[3, 2]) matmul = ad.einsum('ik,kj,jl->il', x2, x3, x4) y = ad.sum(matmul) grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x2, x3, x4]) executor = ad.Executor([y, grad_x2, grad_x3, grad_x4]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 x4_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 y_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val, x4: x4_val }) expected_grad_sum = T.ones_like(T.dot(T.dot(x2_val, x3_val), x4_val)) expected_yval = T.sum(T.dot(T.dot(x2_val, x3_val), x4_val)) expected_grad_x2_val = T.einsum("il, kj, jl->ik", expected_grad_sum, x3_val, x4_val) expected_grad_x3_val = T.einsum("ik, il, jl->kj", x2_val, expected_grad_sum, x4_val) expected_grad_x4_val = T.einsum("ik, kj, il->jl", x2_val, x3_val, expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val) assert T.array_equal(grad_x4_val, expected_grad_x4_val)