def test_add_mul_mix_3(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) z = x2 * x2 + x2 + x3 + 3 y = ad.sum(z * z + x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) z_val = x2_val * x2_val + x2_val + x3_val + 3 expected_yval = z_val * z_val + x3_val expected_grad_x2_val = 2 * \ (x2_val * x2_val + x2_val + x3_val + 3) * (2 * x2_val + 1) expected_grad_x3_val = 2 * (x2_val * x2_val + x2_val + x3_val + 3) + 1 assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(expected_yval)) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_einsum_multiuse(backendopt): """ Test manual fuse. A B inputs |\ | | \ | | \ | | C | / | / output Note that here we assume A is split into 2 vars by some other operations. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) a_copy = ad.Variable(name="a2", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a_copy, c) # New graph out_new = fuse_einsums(output, [a, a_copy, b]) assert tree_eq(output, out_new, [a, a_copy, b])
def test_einsum_multitier(backendopt): for datatype in backendopt: T.set_backend(datatype) input_nodes1, zs1 = get_tree("set1") input_nodes2, zs2 = get_tree("set2") out1 = zs1 + zs2 input_nodes3, zs3 = get_tree("set3") input_nodes4, zs4 = get_tree("set4") out2 = zs3 + zs4 out = ad.einsum("ij, jk->ik", out1, out2) input_nodes = input_nodes1 + input_nodes2 + input_nodes3 + input_nodes4 generated_feed_dict = gen_dict(input_nodes) executor = ad.Executor([out]) z_val, = executor.run(feed_dict=generated_feed_dict) with OutputInjectedModeP(find_topo_sort_p([PseudoNode(out)])): trees = find_sub_einsumtree(PseudoNode(out)) for tree in trees: out_node, in_nodes = tree new_z = fuse_einsums(out_node.node, in_nodes) replace_node(out_node, new_z) executor = ad.Executor([out]) z_new_val, = executor.run(feed_dict=generated_feed_dict) assert float_eq(z_val, z_new_val)
def test_tree_distribution_two_layers(dist_op, backendopt): """ [Distributive] ((A + B) * G) * C will produce AGC + BGC Note that (A+B) * G is contracted first. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) c = ad.Variable(name="c", shape=[2, 3]) interm = ad.einsum('ik, kk->ik', dist_op(a, b), g) output = ad.einsum('ik,kj->ij', interm, c) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_prune_identity(backendopt): for datatype in backendopt: T.set_backend(datatype) a1 = ad.Variable(name="a1", shape=[3, 3]) a2 = ad.Variable(name="a2", shape=[3, 3]) i1 = ad.identity(3) i2 = ad.identity(3) i3 = ad.identity(3) out = ad.einsum("ab,cd,ac,be,ef->abdf", a1, a2, i1, i2, i3) prune_identity_nodes(out) """ Explanation to the einsum above: The identity node i1 means that a and c should be the same dim. we can get rid of i1 and rewrite the expr as ad.einsum("ab,ad,be,ef->abdf", a1, a2, i2, i3). we can also combine i2 and i3 cuz e is useless. Therefore, we can rewrite the expr as ad.einsum("ab,ad,bf->abdf", a1, a2, i2). """ out_expect = ad.einsum("ab,ad,bf->abdf", a1, a2, i2) assert len(out.inputs) == 3 assert tree_eq(out, out_expect, [a1, a2])
def test_mul_jacobian_one_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[2, 2]) # test both cases of left and right multiply a scalar for y in [x1 * x2, x2 * x1]: jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.tensor(2.) x2_val = T.tensor([[5., 6.], [7., 8.]]) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij->ab", I, I, x2_val) expected_jacobian_x2_val = x1_val * T.einsum("ai,bj->abij", I, I) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_add_jacobian_scalar(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor(1.) x2_val = T.tensor(1.) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) expected_jacobian_x2_val = T.tensor(1.) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_cpd_hessian_optimize_offdiag(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) hessian_offdiag = [hessian[0][1], hessian[1][0]] for node in hessian_offdiag: optimize(node) assert isinstance(node, ad.AddNode) num_operations = len( list( filter(lambda x: isinstance(x, ad.OpNode), find_topo_sort([node])))) # This is currently non-deterministic. # assert num_operations == 14 executor = ad.Executor(hessian_offdiag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, })
def test_cpd_shared_exec(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_cp(dim, size, rank) A_list, input_tensor_val = input_val A_val, B_val, C_val = A_list outputs = cpd_als_shared_exec(dim, size, rank, 1, input_val) # expected values A_val = T.einsum( "abc,bk,ck->ak", input_tensor_val, B_val, C_val) @ T.inv( (T.transpose(B_val) @ B_val) * (T.transpose(C_val) @ C_val)) B_val = T.einsum( "abc,ak,ck->bk", input_tensor_val, A_val, C_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(C_val) @ C_val)) C_val = T.einsum( "abc,ak,bk->ck", input_tensor_val, A_val, B_val) @ T.inv( (T.transpose(A_val) @ A_val) * (T.transpose(B_val) @ B_val)) assert T.norm(outputs[0] - A_val) < 1e-8 assert T.norm(outputs[1] - B_val) < 1e-8 assert T.norm(outputs[2] - C_val) < 1e-8
def test_hvp2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3, 1]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3, 1]) y = ad.sum( ad.einsum("ab,bc->ac", ad.einsum("ab,bc->ac", ad.transpose(x), H), x)) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) executor = ad.Executor([y, grad_x, Hv]) x_val = T.tensor([[1.], [2.], [3]]) # 3x1 v_val = T.tensor([[1.], [2.], [3]]) # 3x1 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 y_val, grad_x_val, Hv_val = executor.run(feed_dict={ x: x_val, H: H_val, v: v_val }) expected_yval = T.sum(T.transpose(x_val) @ H_val @ x_val) expected_grad_x_val = 2 * H_val @ x_val expected_hv_val = T.tensor([[4.], [8.], [12.]]) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val) assert T.array_equal(Hv_val, expected_hv_val)
def cpd_gradient_descent(size, rank, learning_rate): dim = 3 for datatype in BACKEND_TYPES: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list grad_A, grad_B, grad_C = ad.gradients(loss, [A, B, C]) executor = ad.Executor([loss, grad_A, grad_B, grad_C]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list for i in range(100): loss_val, grad_A_val, grad_B_val, grad_C_val = executor.run( feed_dict={ input_tensor: input_tensor_val, A: A_val, B: B_val, C: C_val }) A_val -= learning_rate * grad_A_val B_val -= learning_rate * grad_B_val C_val -= learning_rate * grad_C_val print(f'At iteration {i} the loss is: {loss_val}')
def test_jtjvps(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum('ab, b->a', A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) executor = ad.Executor([y, jtjvp_x]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3., 4.]) y_val, jtjvp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_jtjvp_x_val = T.einsum('ba, ac->bc', T.transpose(A_val), A_val) expected_jtjvp_x_val = T.einsum('ab, b->a', expected_jtjvp_x_val, v_val) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(jtjvp_x_val, expected_jtjvp_x_val)
def test_vjps(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum('ab, b->a', A, x) transposed_vjp_x, = ad.transposed_vjps(y, [x], v) executor = ad.Executor([y, transposed_vjp_x]) x_val = T.tensor([1., 2.]) # 1x3 A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([1., 2., 3.]) y_val, transposed_vjp_x_val = executor.run(feed_dict={ x: x_val, A: A_val, v: v_val }) expected_yval = T.einsum('ab, b->a', A_val, x_val) expected_transposed_vjp_x_val = T.einsum('b, ba->a', v_val, A_val) assert isinstance(transposed_vjp_x, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(transposed_vjp_x_val, expected_transposed_vjp_x_val)
def test_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 2]) x3 = ad.Variable(name="x3", shape=[2, 3]) matmul = ad.einsum('ik,kj->ij', x2, x3) y = ad.sum(matmul) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = T.tensor([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = T.tensor([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) expected_grad_sum = T.ones_like(T.dot(x2_val, x3_val)) expected_yval = T.sum(T.dot(x2_val, x3_val)) expected_grad_x2_val = T.dot(expected_grad_sum, T.transpose(x3_val)) expected_grad_x3_val = T.dot(T.transpose(x2_val), expected_grad_sum) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x2_val, expected_grad_x2_val) assert T.array_equal(grad_x3_val, expected_grad_x3_val)
def test_three_mul_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor([[1., 2.], [3., 4.]]) x2_val = T.tensor([[5., 6.], [7., 8.]]) x3_val = T.tensor([[9., 10.], [11., 12.]]) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x1_val = T.einsum("ai,bj,ij,ij->abij", I, I, x2_val, x3_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_s2s_hvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) v = ad.Variable(name="v", shape=[3]) y = ad.einsum("a,ab,b->", x, H, x) grad_x, = ad.gradients(y, [x]) Hv, = ad.hvp(output_node=y, node_list=[x], vector_list=[v]) x_val = T.tensor([1., 2., 3.]) # 3 v_val = T.tensor([1., 2., 3.]) # 3 H_val = T.tensor([[2., 0., 0.], [0., 2., 0.], [0., 0., 2.]]) # 3x3 expected_yval = T.einsum("a,ab,b->", x_val, H_val, x_val) expected_grad_x_val = 2 * T.einsum("ab,b->a", H_val, x_val) expected_hv_val = T.tensor([4., 8., 12.]) StS = SourceToSource() forward_str = StS.forward([y], backend=datatype) m = import_code(forward_str) y_val_s2s, = m.forward([x_val, H_val]) grad_str = StS.gradients(y, [x], backend=datatype) m = import_code(grad_str) grad_x_val_s2s, = m.gradients([x_val, H_val]) hvp_str = StS.hvp(y, [x], [v], backend=datatype) m = import_code(hvp_str) Hv_val_s2s, = m.hvp([x_val, H_val, v_val]) assert isinstance(y, ad.Node) assert T.array_equal(y_val_s2s, expected_yval) assert T.array_equal(grad_x_val_s2s, expected_grad_x_val) assert T.array_equal(Hv_val_s2s, expected_hv_val)
def test_three_mul_jacobian_scalars(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[]) x2 = ad.Variable(name="x2", shape=[]) x3 = ad.Variable(name="x3", shape=[]) y = x1 * x2 * x3 jacobian_x1, = ad.jacobians(y, [x1]) executor = ad.Executor([y, jacobian_x1]) x1_val = T.tensor(1.) x2_val = T.tensor(2.) x3_val = T.tensor(3.) y_val, jacobian_x1_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) expected_jacobian_x1_val = x2_val * x3_val assert isinstance(y, ad.Node) assert T.array_equal(y_val, x1_val * x2_val * x3_val) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_s2s_jtjvp(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2]) A = ad.Variable(name="A", shape=[3, 2]) v = ad.Variable(name="v", shape=[2]) y = ad.einsum("ab,b->a", A, x) jtjvp_x, = ad.jtjvps(y, [x], [v]) x_val = T.tensor([1., 2.]) A_val = T.tensor([[1., 2.], [3., 4.], [5, 6]]) v_val = T.tensor([3, 4]) expected_jtjvp_x_val = T.einsum("ba,bc,c->a", A_val, A_val, v_val) StS = SourceToSource() forward_str = StS.forward([jtjvp_x], function_name='jtjvp', backend=datatype) m = import_code(forward_str) jtjvp_x_val_s2s, = m.jtjvp([A_val, v_val]) assert isinstance(jtjvp_x, ad.Node) assert T.array_equal(jtjvp_x_val_s2s, expected_jtjvp_x_val)
def test_jacobian_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3, 3, 3]) x2 = ad.Variable(name="x2", shape=[3, 3, 3]) y = ad.einsum("ikl,jkl->ijk", x1, x2) jacobian_x1, jacobian_x2 = ad.jacobians(y, [x1, x2]) executor = ad.Executor([y, jacobian_x1, jacobian_x2]) x1_val = T.random((3, 3, 3)) x2_val = T.random((3, 3, 3)) y_val, jacobian_x1_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, }) I = T.identity(3) expected_jacobian_x1_val = T.einsum("im,kn,jno->ijkmno", I, I, x2_val) expected_jacobian_x2_val = T.einsum("jm,kn,ino->ijkmno", I, I, x1_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.einsum("ikl,jkl->ijk", x1_val, x2_val)) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_dmrg_shared_exec_iterative_solve_one_sweep(): max_mps_rank = 5 num = 7 mpo_rank = 5 size = 5 num_iter = 10 T.set_backend("numpy") h = qtn.MPO_rand_herm(num, mpo_rank, size) dmrg_quimb = qtn.DMRG2(h, bond_dims=[max_mps_rank]) h_tensors = load_quimb_tensors(h) mps_tensors = load_quimb_tensors(dmrg_quimb.state) # dmrg based on ad mps_tensors, energy = dmrg_shared_exec_iterative_solve( h_tensors, mps_tensors, num_iter=num_iter, max_mps_rank=max_mps_rank) # dmrg based on quimb opts = {'max_bond': max_mps_rank} for _ in range(num_iter): quimb_energy = dmrg_quimb.sweep_right(canonize=True, verbosity=0, **opts) # We only test on energy (lowest eigenvalue of h), rather than the output # mps (eigenvector), because the eigenvectors can vary a lot while keeping the # eigenvalue unchanged. assert (abs(energy - quimb_energy) < 1e-5)
def test_tree_distribution_w_add_output(dist_op, backendopt): """ Test C * (A + B) + F * (D + E) = (C * A + C * B) + (F * D + F * E) """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) b = ad.Variable(name="b", shape=[3, 3]) c = ad.Variable(name="c", shape=[3, 3]) d = ad.Variable(name="d", shape=[3, 3]) e = ad.Variable(name="e", shape=[3, 3]) f = ad.Variable(name="f", shape=[3, 3]) out1 = ad.einsum('ik,kj->ij', c, dist_op(a, b)) out2 = ad.einsum('ik,kj->ij', d, dist_op(e, f)) output = dist_op(out1, out2) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) for input_node in new_output.inputs: assert isinstance(input_node, dist_op) assert tree_eq(output, new_output, [a, b, c, d, e, f])
def test_executor_dependent(backendopt): for datatype in backendopt: T.set_backend(datatype) A = ad.Variable(name="A", shape=[3, 3]) B = ad.Variable(name="B", shape=[3, 3]) AA = ad.einsum('ab,ab->', A, A) BB = ad.einsum('ab,ab->', B, B) AB = ad.einsum('ab,ab->', A, B) out_A = AA + AB out_B = AB + AA executor = ad.Executor({out_A, out_B}) data = gen_dict([A, B]) A_val, = executor.run(feed_dict=data, reset_graph=False, out_nodes=[out_A]) data2 = gen_dict([A]) data2.update({B: data[B]}) B_val, = executor.run(feed_dict=data2, out_nodes=[out_B]) # This is checking A's val is not reused in B_val computationA. assert A_val != B_val
def test_tree_distribution_ppE(dist_op, backendopt): """ [Distributive] ((A + B) + C) * G will produce AG + BG + CG Note that (A+B) has parent (A + B) + C. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 2]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[3, 2]) g = ad.Variable(name="g", shape=[2, 2]) output = ad.einsum('ik,kk->ik', dist_op(dist_op(a, b), c), g) new_output = distribute_tree(output) assert isinstance(new_output, dist_op) assert tree_eq(output, new_output, [a, b, c, g])
def test_tucker_als_shared_exec(backendopt): for datatype in backendopt: T.set_backend(datatype) input_val = init_rand_tucker(dim, size, rank) A_val_list, _, X_val = input_val A_val_list_ad, core_val_ad, _ = tucker_als_shared_exec( dim, size, rank, 1, input_val) A1_val, A2_val, A3_val = A_val_list # expected values # ttmc: tensor times matrix chain ttmc = T.einsum("abc,bk,cl->akl", X_val, A2_val, A3_val) ttmc_inner = T.einsum("akl,bkl->ab", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A1_val = mat[:, :rank] ttmc = T.einsum("abc,ak,cl->kbl", X_val, A1_val, A3_val) ttmc_inner = T.einsum("kbl,kcl->bc", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A2_val = mat[:, :rank] ttmc = T.einsum("abc,ak,bl->klc", X_val, A1_val, A2_val) ttmc_inner = T.einsum("klc,kld->cd", ttmc, ttmc) mat, _, _ = T.svd(ttmc_inner) A3_val = mat[:, :rank] core_val = T.einsum("abc,ak,bl,cm->klm", X_val, A1_val, A2_val, A3_val) assert T.norm(A_val_list_ad[0] - A1_val) < 1e-8 assert T.norm(A_val_list_ad[1] - A2_val) < 1e-8 assert T.norm(A_val_list_ad[2] - A3_val) < 1e-8 assert T.norm(core_val_ad - core_val) < 1e-8
def test_einsum_fuse_w_identity(backendopt): """ [Fuse einsum with multiple identities] We want to fuse A identity identity | \ / | \ / | \ / | es | / | / | / | / es Here es is einsum. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) es_identity = ad.einsum('ik,kj->ij', ad.identity(3), ad.identity(3)) out = ad.einsum('ai,ij->aj', a, es_identity) tree, = find_sub_einsumtree(PseudoNode(out)) out, ins = tree new_out = fuse_einsums(out.node, ins) assert tree_eq(out.node, new_out, [a])
def test_add_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) y = x1 + x2 jacobian_x2, = ad.jacobians(y, [x2]) executor = ad.Executor([y, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) y_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(y, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(y_val, x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_einsum_multiuse_auto_copy(backendopt): """ Test autolinearization and auto fuse. A B inputs |\ | | \ | | \ | | C | / | / output Next: we would need to autoprune. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a1", shape=[3, 2]) b = ad.Variable(name="b", shape=[2, 3]) c = ad.einsum('ik,kj->ij', a, b) output = ad.einsum('ik,ij->kj', a, c) linearize(output) all_nodes = find_topo_sort([output]) cloned_nodes = [ tmp for tmp in all_nodes if isinstance(tmp, ad.CloneNode) ] out_new = fuse_einsums(output, [*cloned_nodes, b]) # Test that every inputs is now fused. assert all([not isinstance(x, ad.EinsumNode) for x in out_new.inputs]) assert tree_eq(output, out_new, [*cloned_nodes, b])
def test_sub_jacobian_w_chain(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2]) x3 = ad.Variable(name="x3", shape=[2, 2]) y = x1 - x2 z = x3 - y jacobian_x2, = ad.jacobians(z, [x2]) executor = ad.Executor([z, jacobian_x2]) x1_val = T.tensor([[1, 1], [1, 1]]) x2_val = T.tensor([[1, 1], [1, 1]]) x3_val = T.tensor([[1, 1], [1, 1]]) z_val, jacobian_x2_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) I = T.identity(2) expected_jacobian_x2_val = T.einsum("ac,bd->abcd", I, I) assert isinstance(z, ad.Node) assert isinstance(jacobian_x2, ad.Node) assert T.array_equal(z_val, x3_val - x1_val + x2_val) assert T.array_equal(jacobian_x2_val, expected_jacobian_x2_val)
def test_einsum_fuse_graph(backendopt): """ [Fuse einsum used twice] This case is rather subtle. We want to auto fuse A B C | \ / | es | /| | / | es | \ | es Here es is einsum. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[2, 3]) BC = ad.einsum('ik, kj->ij', b, c) # 3x3 ABC = ad.einsum('ik, kj->ij', a, BC) # 3x3 out = ad.einsum('jk, ki->ji', ABC, BC) # 3x3 linearize(out) tree, = find_sub_einsumtree(PseudoNode(out)) out, ins = tree new_z = fuse_einsums(out.node, ins) assert tree_eq(out.node, new_z, [a, b, c])
def test_add_mul_mix_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[3]) x2 = ad.Variable(name="x2", shape=[3]) x3 = ad.Variable(name="x3", shape=[3]) x4 = ad.Variable(name="x4", shape=[3]) y = ad.sum(x1 + x2 * x3 * x4) grad_x1, grad_x2, grad_x3, grad_x4 = ad.gradients(y, [x1, x2, x3, x4]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3, grad_x4]) x1_val = 1 * T.ones(3) x2_val = 2 * T.ones(3) x3_val = 3 * T.ones(3) x4_val = 4 * T.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val, grad_x4_val = executor.run( feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val, x4: x4_val }) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(x1_val + x2_val * x3_val * x4_val)) assert T.array_equal(grad_x1_val, T.ones_like(x1_val)) assert T.array_equal(grad_x2_val, x3_val * x4_val) assert T.array_equal(grad_x3_val, x2_val * x4_val) assert T.array_equal(grad_x4_val, x2_val * x3_val)