示例#1
0
文件: cpd.py 项目: ByzanTine/AutoHOOT
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]
    new_A_list = generate_sequential_optimal_tree(new_A_list, A_list)

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        t0 = time.time()
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})

            if i == 0:
                A_val_list[0], = executor.run(feed_dict=feed_dict,
                                              out_nodes=[new_A_list[0]])
            else:
                A_val_list[i], = executor.run(feed_dict=feed_dict,
                                              reset_graph=False,
                                              evicted_inputs=[A_list[i - 1]],
                                              out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)

        print(f'At iteration {iter} the loss is: {loss_val}')
        t1 = time.time()
        print(f"[ {iter} ] Sweep took {t1 - t0} seconds")

    return A_val_list
示例#2
0
文件: mps.py 项目: ByzanTine/AutoHOOT
def dmrg_shared_exec(mpo_tensors,
                     init_mps_tensors,
                     max_mps_rank,
                     num_iter=1,
                     sequence='R'):
    """
    Perform DMRG iterations with shared executions.
    """
    if sequence != "R":
        raise NotImplementedError

    num = len(mpo_tensors)
    size = mpo_tensors[0].shape[1]
    mpo_ranks = [mpo_tensors[i].shape[0] for i in range(1, len(mpo_tensors))]

    mps_tensors = copy.deepcopy(init_mps_tensors)
    mps_ranks = [mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))]

    dg = DmrgGraph.create(num, mpo_ranks, mps_ranks, size)
    for i, hes in enumerate(dg.hessians):
        dg.hessians[i] = simplify(hes)
        assert isinstance(hes, ad.EinsumNode)
    dg.hessians = generate_sequential_optimal_tree(dg.hessians, dg.mps_inputs)
    executor = ad.Executor(dg.hessians)

    # sequence is R
    for iter in range(num_iter):

        mps_tensors = gauge_transform_mps(mps_tensors, right=True)
        mps_ranks = [
            mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))
        ]

        for i in range(num - 1):

            dg.update_graph(num, mpo_ranks, mps_ranks, size)

            feed_dict = dict(zip(dg.mpo_inputs, mpo_tensors))
            feed_dict.update(dict(zip(dg.mps_inputs, mps_tensors)))

            hes_val, = executor.run(feed_dict=feed_dict,
                                    out_nodes=[dg.hessians[i]])

            # get the smallest eigenvalue and the corresponding eigenvector of the hesval
            eigvec_shape = dg.intermediates[i].shape
            eig_val, eigvec = get_smallest_eigenpair(hes_val,
                                                     dg.intermediates[i].shape)

            # Update the two sites of mps
            mps_tensors[i], mps_tensors[i + 1] = dmrg_local_update(
                dg.intermediates[i], eigvec, max_mps_rank)

            # update the rank
            mps_ranks[i] = mps_tensors[i + 1].shape[0]

        print(f'At iteration {iter} the smallest eigenvalue is: {eig_val}')

    return mps_tensors, eig_val
示例#3
0
def tucker_als_graph_shared_exec(dim, size, rank):
    """
    Build the graph used for Tucker ALS with shared execution.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executor: An shared executor
    loss: the optimized graph for tucker loss
    updates: an list containing updates graphs for each dimension
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    updates = []
    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        updates.append(simplify(new_core_A))

    loss = simplify(tg.losses[0])
    for i in range(1, len(tg.losses)):
        assert loss.name == simplify(tg.losses[i]).name

    updates = generate_sequential_optimal_tree(updates, tg.A_list)
    executor_updates = ad.Executor(updates)
    executor_loss = ad.Executor([loss])

    return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
示例#4
0
def tucker_als_graph(dim, size, rank):
    """
    Build the graph used for Tucker ALS.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executors: list of executors. Each executor is used for
        one step of Tucker ALS
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    executors_update = []

    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        executor = ad.Executor([simplify(new_core_A)])
        executors_update.append(executor)

    executor_loss = ad.Executor([simplify(tg.losses[0])])

    return tg, executors_update, executor_loss, tg.intermediates
def test_simplify_symmetric_einsum_expr(backendopt):

    H = ad.Variable(name="H", shape=[2, 2], symmetry=[[0, 1]])
    x1 = ad.Variable(name="x1", shape=[2])
    x2 = ad.Variable(name="x2", shape=[2])

    inner1 = ad.einsum("ab,a,b->", H, x1, x2)
    inner2 = ad.einsum("ab,b,a->", H, x1, x2)
    out = 0.5 * inner1 + 0.5 * inner2
    newout_simplify = simplify(out)

    # ad.einsum("ab,a,b->", H, x1, x2) or ad.einsum("ab,b,a->", H, x1, x2)
    assert isinstance(newout_simplify, ad.EinsumNode)
示例#6
0
文件: cpd.py 项目: ByzanTine/AutoHOOT
def cpd_als(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})
            A_val_list[i], = executor.run(feed_dict=feed_dict,
                                          out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)
        print(f'At iteration {iter} the loss is: {loss_val}')

    return A_val_list
def test_simplify_optimize_w_tail_einsum(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        A = ad.Variable(name="A", shape=[2, 2])

        out = ad.einsum("ab,bc->ac", A,
                        ad.einsum("ab,bc->ac", ad.identity(2), ad.identity(2)))
        newout_optimize = optimize(out)
        newout_simplify = simplify(out)

        assert newout_optimize == A
        assert newout_simplify == A
def test_simplify_inv_w_identity(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        A = ad.Variable(name="A", shape=[2, 2])

        out = ad.einsum("ab,cd->acbd", A, ad.tensorinv(ad.identity(3)))
        newout = simplify(out)

        assert isinstance(newout, ad.EinsumNode)
        assert isinstance(newout.inputs[1], ad.IdentityNode)

        assert tree_eq(out, newout, [A], tol=1e-6)
def test_simplify_inv_w_redundent_einsum(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        A = ad.Variable(name="A", shape=[2, 2])

        out = ad.einsum("ab,cd->abcd", A, ad.tensorinv(ad.einsum("ab->ab", A)))
        newout = simplify(out)

        inv_node = newout.inputs[1]

        assert isinstance(inv_node.inputs[0], ad.VariableNode)

        assert tree_eq(out, newout, [A], tol=1e-6)
示例#10
0
def test_dmrg_shared_exec_graph():

    from graph_ops.graph_transformer import simplify
    from graph_ops.graph_als_optimizer import generate_sequential_optimal_tree
    from utils import find_topo_sort

    num, rank, size = 4, 3, 2
    mpo_ranks = [rank for i in range(1, num)]
    mps_ranks = [rank for i in range(1, num)]

    dg = DmrgGraph.create(num, mpo_ranks, mps_ranks, size)
    for i, hes in enumerate(dg.hessians):
        dg.hessians[i] = simplify(hes)
        assert isinstance(hes, ad.EinsumNode)
    dg.hessians = generate_sequential_optimal_tree(dg.hessians, dg.mps_inputs)

    # 8 input variables (4 H term in MPO, 4 A term in MPS), 7 einsum nodes
    assert len(find_topo_sort(dg.hessians)) == 15
示例#11
0
def test_cpd_hessian_simplify(backendopt):
    dim = 3
    for datatype in backendopt:
        T.set_backend(datatype)

        A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)
        A, B, C = A_list
        A_list, input_tensor_val = init_rand_cp(dim, size, rank)
        A_val, B_val, C_val = A_list

        hessian = ad.hessian(loss, [A, B, C])
        # TODO (issue #101): test the off-diagonal elements
        hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]]
        for node in hessian_diag:
            node = simplify(node)
            input_node = node.inputs[0]
            assert len(input_node.inputs) == 5

        executor = ad.Executor(hessian_diag)
        hes_diag_vals = executor.run(feed_dict={
            A: A_val,
            B: B_val,
            C: C_val,
            input_tensor: input_tensor_val,
        })

        expected_hes_diag_val = [
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val,
                         T.identity(size))
        ]
        assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8
        assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8
        assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
示例#12
0
def dmrg_shared_exec_iterative_solve(mpo_tensors,
                                     init_mps_tensors,
                                     max_mps_rank,
                                     num_iter=1,
                                     sequence='R'):
    """
    Perform DMRG iterations with shared execution and iterative solve.
    """
    if sequence != "R":
        raise NotImplementedError

    num = len(mpo_tensors)
    size = mpo_tensors[0].shape[1]
    mpo_ranks = [mpo_tensors[i].shape[0] for i in range(1, len(mpo_tensors))]

    mps_tensors = copy.deepcopy(init_mps_tensors)
    mps_ranks = [mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))]

    dg = DmrgImplicitUpdateGraph.create(num, mpo_ranks, mps_ranks, size)
    for i, hvp in enumerate(dg.hvps):
        dg.hvps[i] = simplify(hvp)
        assert isinstance(hvp, ad.EinsumNode)
    dg.hvps = generate_sequential_optimal_tree(dg.hvps, dg.mps_inputs)

    executor_hvps = ad.Executor(dg.hvps)
    executor_intermediates = ad.Executor(dg.intermediates)

    # sequence is R
    for iter in range(num_iter):

        mps_tensors = gauge_transform_mps(mps_tensors, right=True)
        mps_ranks = [
            mps_tensors[i].shape[0] for i in range(1, len(mps_tensors))
        ]

        for i in range(num - 1):

            dg.update_graph(num, mpo_ranks, mps_ranks, size)

            feed_dict = dict(zip(dg.mpo_inputs, mpo_tensors))
            feed_dict.update(dict(zip(dg.mps_inputs, mps_tensors)))

            intermediate, = executor_intermediates.run(
                feed_dict=feed_dict, out_nodes=[dg.intermediates[i]])

            # Calculate the eigenvector using the implicit solver.
            # Note: This only supports NumPy datatype.
            # TODO: Add a general Lanczos solver that adapts to all the backends.
            operator = DMRGLinearOperator(dg, executor_hvps, i, feed_dict)
            # Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.eigsh.html
            eig_vals, eigvecs = spla.eigsh(operator,
                                           k=1,
                                           ncv=4,
                                           tol=1e-3,
                                           which='SA',
                                           v0=intermediate.ravel())
            eig_val, eigvec = eig_vals[0], eigvecs[:, 0]
            eigvec = T.reshape(eigvec, dg.intermediates[i].shape)

            # Update the two sites of mps
            mps_tensors[i], mps_tensors[i + 1] = dmrg_local_update(
                dg.intermediates[i], eigvec, max_mps_rank)

            # update the rank
            mps_ranks[i] = mps_tensors[i + 1].shape[0]
            print(f'At site {i}, the smallest eigenvalue is: {eig_val}')

        print(f'At iteration {iter} the smallest eigenvalue is: {eig_val}')
    return mps_tensors, eig_val