Exemplo n.º 1
0
def test_cpd_hessian_optimize_offdiag(backendopt):
    dim = 3
    for datatype in backendopt:
        T.set_backend(datatype)

        A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)
        A, B, C = A_list
        A_list, input_tensor_val = init_rand_cp(dim, size, rank)
        A_val, B_val, C_val = A_list

        hessian = ad.hessian(loss, [A, B, C])
        hessian_offdiag = [hessian[0][1], hessian[1][0]]
        for node in hessian_offdiag:
            optimize(node)
            assert isinstance(node, ad.AddNode)
            num_operations = len(
                list(
                    filter(lambda x: isinstance(x, ad.OpNode),
                           find_topo_sort([node]))))
            # This is currently non-deterministic.
            # assert num_operations == 14

        executor = ad.Executor(hessian_offdiag)
        hes_diag_vals = executor.run(feed_dict={
            A: A_val,
            B: B_val,
            C: C_val,
            input_tensor: input_tensor_val,
        })
Exemplo n.º 2
0
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]
    new_A_list = generate_sequential_optimal_tree(new_A_list, A_list)

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        t0 = time.time()
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})

            if i == 0:
                A_val_list[0], = executor.run(feed_dict=feed_dict,
                                              out_nodes=[new_A_list[0]])
            else:
                A_val_list[i], = executor.run(feed_dict=feed_dict,
                                              reset_graph=False,
                                              evicted_inputs=[A_list[i - 1]],
                                              out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)

        print(f'At iteration {iter} the loss is: {loss_val}')
        t1 = time.time()
        print(f"[ {iter} ] Sweep took {t1 - t0} seconds")

    return A_val_list
Exemplo n.º 3
0
def test_cpd_hessian_optimize_diag(backendopt):
    dim = 3
    for datatype in backendopt:
        T.set_backend(datatype)

        A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)
        A, B, C = A_list
        A_list, input_tensor_val = init_rand_cp(dim, size, rank)
        A_val, B_val, C_val = A_list

        hessian = ad.hessian(loss, [A, B, C])
        hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]]
        for node in hessian_diag:
            node = optimize(node)
            assert isinstance(node, ad.AddNode)
            num_operations = len(
                list(
                    filter(lambda x: isinstance(x, ad.OpNode),
                           find_topo_sort([node]))))
            """
            Use this assertion to test the optimize function.
            5 operations:
            1. T.einsum('ca,cb->ab',A,A),
            2. T.einsum('ca,cb->ab',B,B),
            3. T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),
            4. T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10)),
            5. (T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))+
            T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10)))
            """
            assert num_operations == 5

        executor = ad.Executor(hessian_diag)
        hes_diag_vals = executor.run(feed_dict={
            A: A_val,
            B: B_val,
            C: C_val,
            input_tensor: input_tensor_val,
        })

        expected_hes_diag_val = [
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val,
                         T.identity(size))
        ]
        assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8
        assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8
        assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
Exemplo n.º 4
0
def test_hessian_quadratic(backendopt):

    for datatype in backendopt:
        T.set_backend(datatype)

        x = ad.Variable(name="x", shape=[3])
        H = ad.Variable(name="H", shape=[3, 3])
        y = ad.einsum("i,ij,j->", x, H, x)

        hessian = ad.hessian(y, [x])
        executor = ad.Executor([hessian[0][0]])

        x_val = T.random([3])
        H_val = T.random((3, 3))
        hessian_val, = executor.run(feed_dict={x: x_val, H: H_val})

        assert T.array_equal(hessian_val, H_val + T.transpose(H_val))
Exemplo n.º 5
0
def tucker_als_graph_shared_exec(dim, size, rank):
    """
    Build the graph used for Tucker ALS with shared execution.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executor: An shared executor
    loss: the optimized graph for tucker loss
    updates: an list containing updates graphs for each dimension
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    updates = []
    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        updates.append(simplify(new_core_A))

    loss = simplify(tg.losses[0])
    for i in range(1, len(tg.losses)):
        assert loss.name == simplify(tg.losses[i]).name

    updates = generate_sequential_optimal_tree(updates, tg.A_list)
    executor_updates = ad.Executor(updates)
    executor_loss = ad.Executor([loss])

    return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
Exemplo n.º 6
0
def tucker_als_graph(dim, size, rank):
    """
    Build the graph used for Tucker ALS.

    Parameters
    ----------
    dim: dimensionality of the input tensor
    size: the size of input tensor's each dim
    rank: the rank of the decomposition

    Returns
    -------
    tg: an TuckerGraph object
    executors: list of executors. Each executor is used for
        one step of Tucker ALS
    intermediates: list of einsum nodes. Each node is the objective
        each Tucker ALS step optimized for
    """
    tg = TuckerGraph(dim, size, rank)

    executors_update = []

    for i in range(dim):

        core_A = tg.intermediates[i]
        hes = ad.hessian(tg.losses[i], [core_A])
        hes = hes[0][0]
        grad, = ad.gradients(tg.losses[i], [core_A])

        new_core_A = core_A - ad.tensordot(
            ad.tensorinv(hes), grad,
            [[i + dim for i in range(dim)], [i for i in range(dim)]])

        executor = ad.Executor([simplify(new_core_A)])
        executors_update.append(executor)

    executor_loss = ad.Executor([simplify(tg.losses[0])])

    return tg, executors_update, executor_loss, tg.intermediates
Exemplo n.º 7
0
    def _get_sub_hessian(cls, index, mpo_graph, mps_graph):

        # rebuild mps graph
        intermediate_set = {
            mps_graph.inputs[index], mps_graph.inputs[index + 1]
        }
        split_input_nodes = list(set(mps_graph.inputs) - intermediate_set)
        mps = split_einsum(mps_graph.output, split_input_nodes)

        # get the intermediate node
        intermediate, = [
            node for node in mps.inputs if isinstance(node, ad.EinsumNode)
        ]
        mps_outer_product = ad.tensordot(mps, mps, axes=[[], []])
        mpo_axes = list(range(len(mpo_graph.output.shape)))

        # The 0.5 factor makes sure that the Hessian can be written as an einsum
        objective = 0.5 * ad.tensordot(
            mps_outer_product, mpo_graph.output, axes=[mpo_axes, mpo_axes])
        hes = ad.hessian(objective, [intermediate])

        return intermediate, hes[0][0]
Exemplo n.º 8
0
def cpd_als(dim, size, rank, num_iter, input_val=[]):

    A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)

    full_hessian = ad.hessian(loss, A_list)
    hessians = [full_hessian[i][i] for i in range(len(full_hessian))]
    grads = ad.gradients(loss, A_list)

    updates = [
        ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]])
        for (hes, grad) in zip(hessians, grads)
    ]

    new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)]

    executor = ad.Executor(new_A_list)
    executor_loss = ad.Executor([simplify(loss)])

    if input_val == []:
        A_val_list, input_tensor_val = init_rand_cp(dim, size, rank)
    else:
        A_val_list, input_tensor_val = input_val

    for iter in range(num_iter):
        # als iterations
        for i in range(len(A_list)):

            feed_dict = dict(zip(A_list, A_val_list))
            feed_dict.update({input_tensor: input_tensor_val})
            A_val_list[i], = executor.run(feed_dict=feed_dict,
                                          out_nodes=[new_A_list[i]])

        feed_dict = dict(zip(A_list, A_val_list))
        feed_dict.update({input_tensor: input_tensor_val})
        loss_val, = executor_loss.run(feed_dict=feed_dict)
        print(f'At iteration {iter} the loss is: {loss_val}')

    return A_val_list
Exemplo n.º 9
0
def test_cpd_hessian_simplify(backendopt):
    dim = 3
    for datatype in backendopt:
        T.set_backend(datatype)

        A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank)
        A, B, C = A_list
        A_list, input_tensor_val = init_rand_cp(dim, size, rank)
        A_val, B_val, C_val = A_list

        hessian = ad.hessian(loss, [A, B, C])
        # TODO (issue #101): test the off-diagonal elements
        hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]]
        for node in hessian_diag:
            node = simplify(node)
            input_node = node.inputs[0]
            assert len(input_node.inputs) == 5

        executor = ad.Executor(hessian_diag)
        hes_diag_vals = executor.run(feed_dict={
            A: A_val,
            B: B_val,
            C: C_val,
            input_tensor: input_tensor_val,
        })

        expected_hes_diag_val = [
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val,
                         T.identity(size)),
            2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val,
                         T.identity(size))
        ]
        assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8
        assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8
        assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8