Exemplo n.º 1
0
def test_proj_head():
    print('-' * 100)
    edges, weights = simu_graph(25)  # get grid graph
    sub_graph = [6, 7, 8, 9]
    x = np.random.normal(0.0, 0.1, 25)
    x[sub_graph] = 5.
    n, m = len(weights), edges.shape[1]
    re = head_proj(edges=edges, weights=weights, x=x, g=1, s=4, budget=3.,
                   delta=1. / 169., err_tol=1e-6, max_iter=30, root=-1,
                   pruning='strong', epsilon=1e-6, verbose=0)
    re_nodes, re_edges, p_x = re
    print('test1 result head nodes: ', re_nodes)
    print('test1 result head edges: ', re_edges)
    print(p_x)
    re = head_proj(edges=edges, weights=weights, x=np.zeros(n), g=1, s=4,
                   budget=3., delta=1. / 169., err_tol=1e-6, max_iter=30,
                   root=-1, pruning='strong', epsilon=1e-6, verbose=0)
    re_nodes, re_edges, p_x = re
    print('test2 result head nodes: ', re_nodes)
    print('test2 result head edges: ', re_edges)
    print(p_x)
Exemplo n.º 2
0
def test_proj_algo():
    print('-' * 100)
    edges, weights = simu_graph(25)  # get grid graph
    sub_graph = [6, 7, 8, 9]
    x = np.random.normal(0.0, 0.1, 25)
    x[sub_graph] = 5.
    n, m = len(weights), edges.shape[1]
    re = head_proj(edges=edges, weights=weights, x=x, g=1, s=4, budget=3.,
                   delta=1. / 169., err_tol=1e-6, max_iter=30, root=-1,
                   pruning='strong', epsilon=1e-6, verbose=0)
    re_nodes, re_edges, p_x = re
    print('test1 result head nodes: ', re_nodes)
    print('test1 result head edges: ', re_edges)
    re = head_proj(edges=edges, weights=weights, x=np.zeros(n), g=1, s=4,
                   budget=3., delta=1. / 169., err_tol=1e-6, max_iter=30,
                   root=-1, pruning='strong', epsilon=1e-6, verbose=0)
    re_nodes, re_edges, p_x = re
    print('test2 result head nodes: ', re_nodes)
    print('test2 result head edges: ', re_edges)
    re = tail_proj(edges=edges, weights=weights, x=x, g=1, s=4, root=-1,
                   max_iter=20, budget=3., nu=2.5)
    re_nodes, re_edges, p_x = re
    print('test3 result tail nodes: ', re_nodes)
    print('test3 result tail edges: ', re_nodes)
    re = tail_proj(edges=edges, weights=weights, x=np.zeros(n), g=1, s=4,
                   root=-1, max_iter=20, budget=3., nu=2.5)
    re_nodes, re_edges, p_x = re
    print('test4 result tail nodes: ', re_nodes)
    print('test4 result tail edges: ', re_nodes)
    wrapper = HeadTailWrapper(edges=edges, weights=weights)
    re = wrapper.run_head(x=x, g=1, s=4, budget=3., delta=1. / 169.)
    re_nodes, re_edges, p_x = re
    print('test5 result head nodes: ', re_nodes)
    print('test5 result head edges: ', re_nodes)
    re = wrapper.run_tail(x=x, g=1, s=4, budget=3, nu=2.5)
    re_nodes, re_edges, p_x = re
    print('test6 result tail nodes: ', re_nodes)
    print('test6 result tail edges: ', re_nodes)
Exemplo n.º 3
0
def optimize(instance, sparsity, trade_off, max_iter, epsilon=1e-3):

    graph = instance['graph']
    true_subgraph = instance['subgraph']
    features = instance['features']
    block_node_sets = instance['block_node_sets'] # block id - global node id
    block_boundary_edges_dict = instance['block_boundary_edges_dict'] # block id - edge set

    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    num_blocks = len(block_boundary_edges_dict)

    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: serial graph block-structured matching pursuit')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of blocks: {:d}'.format(num_blocks))

    node_id_dict = relabel_nodes(block_node_sets) # relabel nodes of each block with block local index, global node id - block node id
    relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict) # relabel edges of each block with block node id

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.

    func = SerialSumEMS(features=features, block_node_sets=block_node_sets, node_id_dict=node_id_dict, block_boundary_edges_dict=block_boundary_edges_dict, trade_off=trade_off)
    true_x = np.zeros(num_nodes)
    true_x[true_subgraph] = 1.
    true_x = np.array(true_x)
    true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x, block_boundary_edges_dict)
    logger.debug('ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(true_obj_val, true_sum_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(block_node_sets[b])]
            block_boundary_edge_x_dict = get_boundary_edge_x_dict(current_x, block_boundary_edges_dict[b], node_id_dict) # (block node 1, global node 2) - value of node 2
            block_features = features[sorted(block_node_sets[b])]
            block_grad = func.get_gradient(block_x, block_features, block_boundary_edge_x_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x, dtype=np.float64)

            normalized_block_grad = normalize_gradient(block_x, block_grad)
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))

            start_proj_time = time.time()
            re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_block_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            block_gamma_x = set(re_nodes)
            block_supp_x = set([ind for ind, _ in enumerate(block_x) if not 0 == _])
            block_omega_x = block_gamma_x | block_supp_x
            omega_x_list.append(block_omega_x)

        bx = func.argmax_obj_with_proj_serial(current_x, omega_x_list)

        for b in range(num_blocks):
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))
            block_bx = bx[sorted(block_node_sets[b])]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges, weights=edge_weights, x=block_bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)
            block_x = np.zeros_like(current_x[block_node_sets[b]])
            block_x[list(psi_x)] = block_bx[list(psi_x)]
            current_x[sorted(block_node_sets[b])] = block_x

        acc_proj_time += iter_proj_time

        # post process

        obj_val, sum_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict)
        logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, sum_ems_val, penalty))
        logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
        logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
        logger.debug('acc projection time: {:.5f}'.format(acc_proj_time))  # accumulative projection time
        logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if diff_norm_x < epsilon: # todo, terminate condition, add stop when objective value decreases
            break

    run_time = time.time() - start_time
    obj_val, global_ems_val, penalty = func.get_obj_val(current_x, block_boundary_edges_dict)
    logger.debug('objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x
Exemplo n.º 4
0
def optimize(instance, sparsity, threshold, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None):

    first_graph = instance['first_graph']
    second_graph = instance['second_graph']
    # true_subgraph = instance['true_subgraph']
    true_subgraph = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # note
    features = instance['weight']

    first_graph_edges = np.array(first_graph.edges)
    # second_graph_edges = np.array(second_graph.edges)
    first_graph_edge_weights = np.ones(first_graph.number_of_edges())
    # second_graph_edge_weights = np.ones(second_graph.number_of_edges())

    print(first_graph.number_of_nodes())
    print(second_graph.number_of_nodes())

    if first_graph.number_of_nodes() != second_graph.number_of_nodes():
        raise('error, wrong dual network input !!!')

    num_nodes = first_graph.number_of_nodes()
    num_edges_first_graph = first_graph.number_of_edges()
    num_edges_second_graph = second_graph.number_of_edges()

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges in first graph: {:d}'.format(num_edges_first_graph))
        logger.debug('number of edges in second graph: {:d}'.format(num_edges_second_graph))
        logger.debug('density of first graph: {:.5f}'.format(nx.density(first_graph)))
        logger.debug('density of second graph: {:.5f}'.format(nx.density(second_graph)))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    # calculate shortest path lengths for all node pairs, which will be used in density projection
    start_time = time.time()
    shortest_path_lengths = {}
    for node in second_graph.nodes():
        x = nx.shortest_path_length(second_graph, source=node)
        shortest_path_lengths[node] = x
    print('shortest_path time: {:.5f}'.format(time.time() - start_time))

    start_time = time.time()
    acc_proj_time = 0.

    func = DualEMS(features, trade_off)

    if logger:
        print(sorted(true_subgraph))

        true_x = np.zeros(num_nodes)
        # print(type(true_subgraph))
        true_x[list(true_subgraph)] = 1.
        true_x = np.array(true_x)
        true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val(true_x, true_x)
        print('ground truth values: {}, {}, {}, {}'.format(true_obj_val, x_ems_val, y_ems_val, penalty))

    # initialize node coefficients
    current_x, current_y = func.get_init_x_zeros()
    current_x += 1e-6 # from not zeros but close to zero, avoid error divided by zero
    current_y += 1e-6

    print('iteration start funval', func.get_obj_val(current_x, current_y))

    # start optimization
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x, prev_y = np.copy(current_x), np.copy(current_y)

        # handle first graph
        grad_x = func.get_gradient(current_x, current_y)
        iter_proj_time = 0.
        # iter_time = time.time()
        if iter == 0:
            norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x)
        else:
            norm_grad_x = normalize_gradient(current_x, grad_x)


        # head projection
        start_proj_time = time.time()
        re_head = head_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=norm_grad_x, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0)  # head projection
        re_nodes, _, _ = re_head
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for x: {:.5f}'.format(time.time() - start_proj_time))
        gamma_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(gamma_x)] = 1.
        if iter == 0:
            tmp_x = np.zeros_like(current_x) + learning_rate * grad_x * indicator_x  # note, not update current variables, only use the intermediate results
        else:
            tmp_x = current_x + learning_rate * grad_x * indicator_x

        omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

        # handle second graph
        grad_y = func.get_gradient(current_y, current_x)
        # note, test not normalize
        if iter == 0:
            norm_grad_y = normalize_gradient(np.zeros_like(current_y), grad_y)
        else:
            norm_grad_y = normalize_gradient(current_y, grad_y)
            # norm_grad_y = grad_y # note, test !!!

        # note, should positive, eventallpairs algorithm require that node weights should be positive
        norm_grad_y = np.absolute(norm_grad_y)

        start = 5
        ratio = 5
        steps = 8
        progression = [start * ratio ** i for i in range(steps)] # generate geometric sequence, enumerate as lambda parameter in eventallpairs algorithm
        print('lmbd progression', progression)
        start_proj_time = time.time()
        print(norm_grad_y)
        gamma_y = dense_projection(second_graph, norm_grad_y, threshold, progression, shortest_path_lengths, normalize=True, sort=False) # normalize True
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for y: {:.5f}'.format(time.time() - start_proj_time))

        indicator_y = np.zeros(num_nodes)
        indicator_y[list(gamma_y)] = 1.
        if iter == 0:
            tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y # note, not update current variables, only use the intermediate results
        else:
            tmp_y = current_y + learning_rate * grad_y * indicator_y

        omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _])

        print('solve argmax')
        # solve argmax
        start_max_time = time.time()
        bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y)
        print('solve argmax time: {:.5f}'.format(time.time() - start_max_time))

        # tail projection for the first graph
        start_proj_time = time.time()
        re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)  # tail projection
        re_nodes, _, _ = re_tail
        iter_proj_time += time.time() - start_proj_time
        print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time))
        psi_x = set(re_nodes)

        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]
        current_x = normalize(current_x)  # note, constrain current_x in [0, 1], is this step necessary

        # print(by) # note, by in [0, 1], so we should change lmbd range

        # start = 5
        # ratio = 10
        progression = [start * ratio ** i for i in range(steps)]
        # tail projection for the second graph
        start_proj_time = time.time()
        psi_y = dense_projection(second_graph, by, threshold, progression, shortest_path_lengths, normalize=False, sort=False) # note, normalize ???
        iter_proj_time += (time.time() - start_proj_time)

        print('tail projection time for y: {:.5f}'.format(time.time() - start_proj_time))

        current_y = np.zeros_like(current_y)
        # current_y[list(psi_y)] = bx[list(psi_y)] # note, error, but resutls are good
        current_y[list(psi_y)] = by[list(psi_y)]
        current_y = normalize(current_y)  # constrain current_y in [0, 1]

        print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y))

        acc_proj_time += iter_proj_time

        if logger:
            print('iter proj time: {:.5f}'.format(iter_proj_time))

        diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2)
        if logger:
            logger.debug('difference norm: {}'.format(diff_norm))

            # raw_pred_subgraph_x = np.nonzero(current_x)[0]
            #
            # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_x)
            #
            # logger.debug('-' * 5 + ' performance of x prediction ' + '-' * 5)
            # logger.debug('precision: {:.5f}'.format(prec))
            # logger.debug('recall   : {:.5f}'.format(rec))
            # logger.debug('f-measure: {:.5f}'.format(fm))
            # logger.debug('iou      : {:.5f}'.format(iou))
            #
            # raw_pred_subgraph_y = np.nonzero(current_y)[0]
            #
            # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_y)
            #
            # logger.debug('-' * 5 + ' performance of y prediction ' + '-' * 5)
            # logger.debug('precision: {:.5f}'.format(prec))
            # logger.debug('recall   : {:.5f}'.format(rec))
            # logger.debug('f-measure: {:.5f}'.format(fm))
            # logger.debug('iou      : {:.5f}'.format(iou))


        if diff_norm < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        pass

    return current_x, current_y, run_time
Exemplo n.º 5
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']
    true_subgraphs = instance['true_subgraphs']
    edges = np.array(graph.edges)
    tag = False
    start = end = 0
    for t, subgraph in enumerate(true_subgraphs):
        if subgraph and not tag:
            start = t
            tag = True
        if not subgraph and tag:
            end = t - 1
            tag = False

    num_time_stamps = len(true_subgraphs)
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured IHT')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of time stamps: {:d}'.format(num_time_stamps))
        logger.debug('signal interval: [{:d}, {:d}]'.format(start, end))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = GlobalEMS(features=instance['features'], trade_off=trade_off)
    true_x_array = []
    for true_subgraph in true_subgraphs:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x_array.append(true_x)
    true_x_array = np.array(true_x_array)
    true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val(
        true_x_array)
    if logger:
        logger.debug(
            'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_gloabl_ems_val, true_penalty))

    current_x_array = func.get_init_x_zeros() + 1e-6
    # current_x_array = true_x_array
    # print('start from grount truth')
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x_array = np.copy(current_x_array)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for t in range(num_time_stamps):
            grad_x = func.get_gradient(current_x_array, t)
            print(grad_x)
            current_x = current_x_array[t] if iter > 0 else np.zeros_like(
                current_x_array[t], dtype=np.float64)
            normalized_grad = normalize_gradient(current_x, grad_x)
            start_proj_time = time.time()
            re_head = head_proj(edges=edges,
                                weights=edge_weights,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            omega_x = set(re_nodes)
            omega_x_list.append(omega_x)
            print(sorted(omega_x))

        # update style 1
        # update all blocks simultaneously at each iteration
        # this style is analogous to gradient descent
        # pls refer to Andrew Ng' Machine Learning course
        bx_array = np.zeros_like(current_x_array)  # update
        for t in range(num_time_stamps):
            indicator_x = np.zeros(num_nodes)
            indicator_x[list(omega_x_list[t])] = 1.
            bx_array[t] = current_x_array[t] + learning_rate * func.get_gradient(
                current_x_array, t
            ) * indicator_x  # since bx as an intermediate variable, we can update gradient simultaneously

        for t in range(num_time_stamps):
            bx = bx_array[t]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=edge_weights,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)

            current_x = np.zeros_like(current_x_array[t])
            current_x[list(psi_x)] = bx[list(psi_x)]
            current_x = normalize(
                current_x)  # note, restrict current_x in [0, 1]

            current_x_array[t] = current_x

            # print(t, sorted(np.nonzero(current_x)))

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, global_ems_val, penalty = func.get_obj_val(
                current_x_array)
            logger.debug(
                'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, global_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x_array - prev_x_array)
        if logger:
            logger.debug('difference norm x: {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
        logger.debug(
            'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
            .format(obj_val, global_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x_array, run_time
Exemplo n.º 6
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None,
             tao=None):

    graph = instance['graph']  # get graph structure
    true_subgraph = instance['true_subgraph']  # get ground truth
    features = instance['features']
    # edges = np.array(graph.edges())

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = ParallelSumEMS(features=instance['features'],
                          trade_off=trade_off,
                          nodes_set=nodes_set,
                          boundary_edges_dict=boundary_edges_dict,
                          nodes_id_dict=nodes_id_dict,
                          tao=tao)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(
                current_x, boundary_edges_dict[b], nodes_id_dict
            )  # key is boundary edge, value is adjacent x in other blocks
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(re_nodes)
            indicator_x = np.zeros(len(block_x))
            indicator_x[list(gamma_x)] = 1.
            tmp_x = block_x + learning_rate * grad_x * indicator_x  # note, not update current variables, only use the intermediate results
            omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

            omega_x_list.append(omega_x)

        bx_array = func.argmax_obj_with_proj_parallel(
            current_x,
            omega_x_list)  # solve argmax problem with block coordinate ascent

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)  # tail projection
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            current_x[nodes_set[b]] = block_x

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference norm x: {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time
Exemplo n.º 7
0
def optimize(instance,
             sparsity,
             threshold,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    first_graph = instance['first_graph']
    second_graph = instance['second_graph']
    true_subgraph = instance['true_subgraph']
    # true_subgraph = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] # note
    features = instance['weight']

    first_graph_edges = np.array(first_graph.edges)
    # second_graph_edges = np.array(second_graph.edges)
    first_graph_edge_weights = np.ones(first_graph.number_of_edges())
    # second_graph_edge_weights = np.ones(second_graph.number_of_edges())

    print(first_graph.number_of_nodes())
    print(second_graph.number_of_nodes())

    if first_graph.number_of_nodes() != second_graph.number_of_nodes():
        raise ('error, wrong dual network input !!!')

    num_nodes = first_graph.number_of_nodes()
    num_edges_first_graph = first_graph.number_of_edges()
    num_edges_second_graph = second_graph.number_of_edges()

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges in first graph: {:d}'.format(
            num_edges_first_graph))
        logger.debug('number of edges in second graph: {:d}'.format(
            num_edges_second_graph))
        logger.debug('density of first graph: {:.5f}'.format(
            nx.density(first_graph)))
        logger.debug('density of second graph: {:.5f}'.format(
            nx.density(second_graph)))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    weight = instance['weight']
    lcc = max(nx.connected_component_subgraphs(second_graph), key=len)
    tmp = 0
    start_node = 0
    for node in lcc:
        if weight[node] > tmp:
            tmp = weight[node]
            start_node = node

    lcc_diameter = 9

    start_time = time.time()
    acc_proj_time = 0.

    func = DualEMS(features, trade_off)

    if logger:
        print(sorted(true_subgraph))

        true_x = np.zeros(num_nodes)
        # print(type(true_subgraph))
        true_x[list(true_subgraph)] = 1.
        true_x = np.array(true_x)
        true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val(
            true_x, true_x)
        print('ground truth values: {}, {}, {}, {}'.format(
            true_obj_val, x_ems_val, y_ems_val, penalty))

    current_x, current_y = func.get_init_x_zeros()
    current_x += 1e-6  # from not zeros but close to zero
    current_y += 1e-6

    print('iteration start funval', func.get_obj_val(current_x, current_y))

    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x, prev_y = np.copy(current_x), np.copy(current_y)

        # handle first graph
        grad_x = func.get_gradient(current_x, current_y)
        iter_proj_time = 0.
        # iter_time = time.time()
        if iter == 0:
            norm_grad_x = normalize_gradient(
                np.zeros_like(current_x),
                grad_x)  # default, start from all zero
        else:
            norm_grad_x = normalize_gradient(current_x, grad_x)

        start_proj_time = time.time()
        re_head = head_proj(edges=first_graph_edges,
                            weights=first_graph_edge_weights,
                            x=norm_grad_x,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)  # head projection
        re_nodes, _, _ = re_head
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for x: {:.5f}'.format(time.time() -
                                                          start_proj_time))
        gamma_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(gamma_x)] = 1.
        if iter == 0:
            tmp_x = np.zeros_like(
                current_x
            ) + learning_rate * grad_x * indicator_x  # note, not update current variables, only use the intermediate results
        else:
            tmp_x = current_x + learning_rate * grad_x * indicator_x

        omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

        # handle second graph
        grad_y = func.get_gradient(current_y, current_x)  # note, order
        # note, test not normalize
        if iter == 0:
            norm_grad_y = normalize_gradient(np.zeros_like(current_y), grad_y)
        else:
            # norm_grad_y = normalize_gradient(current_y, grad_y)
            norm_grad_y = grad_y  # note, !!!

        # note, should positive
        norm_grad_y = np.absolute(norm_grad_y)

        min = 100
        maxx = 5002
        step = 500
        start_proj_time = time.time()

        # print(norm_grad_y)
        # gamma_y = dense_projection(second_graph, norm_grad_y, threshold, min, maxx, step, start_node, lcc_diameter=2*lcc_diameter, normalize=False, sort=False)
        gamma_y = dense_projection(second_graph,
                                   norm_grad_y,
                                   threshold,
                                   min,
                                   maxx,
                                   step,
                                   start_node,
                                   lcc_diameter=2 * lcc_diameter,
                                   normalize=True,
                                   sort=False)
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for y: {:.5f}'.format(time.time() -
                                                          start_proj_time))

        indicator_y = np.zeros(num_nodes)
        indicator_y[list(gamma_y)] = 1.
        if iter == 0:
            tmp_y = np.zeros_like(
                current_y
            ) + learning_rate * grad_y * indicator_y  # note, not update current variables, only use the intermediate results
        else:
            tmp_y = current_y + learning_rate * grad_y * indicator_y

        omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _])

        print('solve argmax')
        # solve argmax
        start_max_time = time.time()
        bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x,
                                           omega_y)
        print('solve argmax time: {:.5f}'.format(time.time() - start_max_time))

        # tail projection for the first graph
        start_proj_time = time.time()
        re_tail = tail_proj(edges=first_graph_edges,
                            weights=first_graph_edge_weights,
                            x=bx,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            nu=2.5,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            verbose=0)  # tail projection
        re_nodes, _, _ = re_tail
        iter_proj_time += time.time() - start_proj_time
        print('tail projection time for x: {:.5f}'.format(time.time() -
                                                          start_proj_time))
        psi_x = set(re_nodes)

        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]
        current_x = normalize(
            current_x
        )  # note, constrain current_x in [0, 1], is this step necessary

        # print(by) # note, by in [0, 1], so we should change lmbd range

        min = 10000
        maxx = 20001
        step = 1000
        # tail projection for the second graph
        start_proj_time = time.time()
        psi_y = dense_projection(second_graph,
                                 by,
                                 threshold,
                                 min,
                                 maxx,
                                 step,
                                 start_node,
                                 lcc_diameter=2 * lcc_diameter,
                                 normalize=False,
                                 sort=False)
        iter_proj_time += (time.time() - start_proj_time)

        print('tail projection time for y: {:.5f}'.format(time.time() -
                                                          start_proj_time))

        current_y = np.zeros_like(current_y)
        # current_y[list(psi_y)] = bx[list(psi_y)]

        # union current_y and psi_y, make psi_y in the current_y
        # psi_y = psi_y & np.nonzero(current_y)[0] # note, improvement, avoid result extended randomly

        current_y[list(psi_y)] = by[list(psi_y)]
        current_y = normalize(current_y)  # constrain current_y in [0, 1]

        print('{} iteration funval'.format(iter),
              func.get_obj_val(current_x, current_y))

        acc_proj_time += iter_proj_time

        if logger:
            print('iter proj time: {:.5f}'.format(iter_proj_time))

        diff_norm = np.sqrt(
            np.linalg.norm(current_x - prev_x)**2 +
            np.linalg.norm(current_y - prev_y)**2)
        if logger:
            logger.debug('difference norm: {}'.format(diff_norm))

            # raw_pred_subgraph_x = np.nonzero(current_x)[0]
            #
            # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_x)
            #
            # logger.debug('-' * 5 + ' performance of x prediction ' + '-' * 5)
            # logger.debug('precision: {:.5f}'.format(prec))
            # logger.debug('recall   : {:.5f}'.format(rec))
            # logger.debug('f-measure: {:.5f}'.format(fm))
            # logger.debug('iou      : {:.5f}'.format(iou))
            #
            # raw_pred_subgraph_y = np.nonzero(current_y)[0]
            #
            # prec, rec, fm, iou = evaluate_block(instance['true_subgraph'], raw_pred_subgraph_y)
            #
            # logger.debug('-' * 5 + ' performance of y prediction ' + '-' * 5)
            # logger.debug('precision: {:.5f}'.format(prec))
            # logger.debug('recall   : {:.5f}'.format(rec))
            # logger.debug('f-measure: {:.5f}'.format(fm))
            # logger.debug('iou      : {:.5f}'.format(iou))

        if diff_norm < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        pass

    return current_x, current_y, run_time
Exemplo n.º 8
0
def optimize(instance,
             sparsity,
             threshold,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    first_graph = instance['first_graph']
    second_graph = instance['second_graph']
    true_subgraph = instance['true_subgraph']
    features = instance['weight']
    A = adj_matrix(
        second_graph
    )  # get adjacent matrix of second graph, used for density projection

    first_graph_edges = np.array(first_graph.edges)
    first_graph_edge_weights = np.ones(
        first_graph.number_of_edges())  # edge weight, default 1

    print('number of nodes in first graph', first_graph.number_of_nodes())
    print('number of nodes in second graph', second_graph.number_of_nodes())

    if first_graph.number_of_nodes() != second_graph.number_of_nodes():
        raise ('error, wrong dual network input !!!')

    num_nodes = first_graph.number_of_nodes()
    num_edges_first_graph = first_graph.number_of_edges()
    num_edges_second_graph = second_graph.number_of_edges()

    if logger:
        # print some basic information
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges in first graph: {:d}'.format(
            num_edges_first_graph))
        logger.debug('number of edges in second graph: {:d}'.format(
            num_edges_second_graph))
        logger.debug('density of first graph: {:.5f}'.format(
            nx.density(first_graph)))
        logger.debug('density of second graph: {:.5f}'.format(
            nx.density(second_graph)))
        logger.debug('density of true subgraph in second graph: {:.5f}'.format(
            nx.density(nx.subgraph(second_graph, true_subgraph))))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.

    func = DualEMS(features, trade_off)
    if logger:
        print(sorted(true_subgraph))

        true_x = np.zeros(num_nodes)
        # print(type(true_subgraph))
        true_x[list(true_subgraph)] = 1.
        true_x = np.array(true_x)
        true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val(
            true_x, true_x)
        print('ground truth values: {}, {}, {}, {}'.format(
            true_obj_val, x_ems_val, y_ems_val, penalty))

    current_x, current_y = func.get_init_x_zeros(
    )  # are there some other better initialization methods?
    current_x += 1e-6  # start from zero, plus 1e-6 avoid divide by zero error
    current_y += 1e-6

    print('iteration start funval', func.get_obj_val(current_x, current_y))

    for iter in range(max_iter):  # external iteration
        if logger:
            logger.debug('iteration: {:d}'.format(iter))

        prev_x, prev_y = np.copy(current_x), np.copy(
            current_y)  # store previous vectors for early termination

        # handle first graph
        grad_x = func.get_gradient(current_x, current_y)
        iter_proj_time = 0.
        if iter == 0:  # from all zero vector
            norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x)
        else:
            norm_grad_x = normalize_gradient(current_x, grad_x)

        start_proj_time = time.time()
        # head projection for the connected constraint, so projection should be on first graph
        re_head = head_proj(edges=first_graph_edges,
                            weights=first_graph_edge_weights,
                            x=norm_grad_x,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for x: {:.5f}'.format(time.time() -
                                                          start_proj_time))
        gamma_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(gamma_x)] = 1.
        # there is no differene between using grad_x and norm_grad_x, because indicator_x is got from norm_grad_x
        if iter == 0:
            tmp_x = np.zeros_like(
                current_x
            ) + learning_rate * norm_grad_x * indicator_x  # start from all zeros
        else:
            tmp_x = current_x + learning_rate * norm_grad_x * indicator_x

        omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

        # head projection for y
        grad_y = func.get_gradient(current_y,
                                   current_x)  # note, reverse order x & y
        # note, test not normalize
        if iter == 0:
            norm_grad_y = normalize_gradient(
                np.zeros_like(current_y),
                grad_y)  # # note, is it necessary for density projection?
        else:
            norm_grad_y = normalize_gradient(current_y, grad_y)
            # norm_grad_y = grad_y # note !!!

        # note, should be positive for gradient, input for density projection should be positive
        # note, why baojian's code does not consider positive value, head projection
        abs_norm_grad_y = np.absolute(
            norm_grad_y
        )  # take absolute value of gradient, since larger absolute value represent larger affection to objective function

        np.set_printoptions(linewidth=3000)

        # print(norm_grad_y)

        # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1, 0.12, 0.15, 0.17, 0.2, 0.2, 0.2, 0.2, 0.21, 0.22, 0.23, 0.18, 0.18, 0.18, 0.17] # normalize
        lmbd_list = [0.23]  # normalize
        # lmbd_list = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006] # normalize
        # sparsity_list = [250, 250, 260, 270, 270, 275, 275, 280, 280, 280, 265, 270, 275, 275, 280, 285, 260, 255, 250, 245] # normalize
        sparsity_list = [275]  # normalize
        # sparsity_list = [50, 50, 50, 50, 55] # normalize
        lmbd_sparsity_list = zip(lmbd_list, sparsity_list)
        # sparsity_list = [50]
        print('start head projection for y')
        start_proj_time = time.time()
        # gamma_y = density_projection(second_graph, norm_grad_y, A, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False)
        gamma_y = density_projection(
            second_graph,
            abs_norm_grad_y,
            A,
            threshold,
            lmbd_sparsity_list,
            normalize=True,
            true_subgraph=true_subgraph
        )  # test not normalize, need new lambda sparsity list
        # gamma_y = density_projection(second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # test not normalize, need new lambda sparsity list
        iter_proj_time += (time.time() - start_proj_time)
        print('head projection time for y: {:.5f}'.format(time.time() -
                                                          start_proj_time))

    #
    #
    #     indicator_y = np.zeros(num_nodes)
    #     indicator_y[list(gamma_y)] = 1.
    #     if iter == 0:
    #         # tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y
    #         tmp_y = np.zeros_like(current_y) + learning_rate * norm_grad_y * indicator_y # todo, pls note that update gradient should be normalized gradient
    #     else:
    #         # tmp_y = current_y + learning_rate * grad_y * indicator_y
    #         tmp_y = current_y + learning_rate * norm_grad_y * indicator_y
    #
    #     omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _])
    #
    #     print('omega_x', len(omega_x))
    #     print(sorted(list(omega_x)))
    #
    #     print('omega_y', len(omega_y))
    #     print(sorted(list(omega_y)))
    #
    #     print('intersect', len(omega_y & omega_x))
    #     print(sorted(list(omega_y & omega_x)))
    #
    #     # break
    #
    #     print('solve argmax')
    #     start_max_time = time.time()
    #     bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y)
    #     print('solve argmax time {:.5f}'.format(time.time() - start_max_time))
    #
    #     # break
    #
    #     start_proj_time = time.time()
    #     # tail projection on first graph
    #     re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)  # tail projection
    #     re_nodes, _, _ = re_tail
    #     iter_proj_time += time.time() - start_proj_time
    #     print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time))
    #     psi_x = set(re_nodes)
    #
    #     current_x = np.zeros_like(current_x)
    #     current_x[list(psi_x)] = bx[list(psi_x)]
    #     current_x = normalize(current_x)
    #
    #     lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1]
    #     # lmbd_list = [0.006, 0.08]
    #     sparsity_list = [250, 250, 270, 270, 275, 275]
    #     lmbd_sparsity_list = zip(lmbd_list, sparsity_list)
    #
    #     start_proj_time = time.time()
    #     # psi_y = density_projection(second_graph, by, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False)
    #     psi_y = density_projection(second_graph, by, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # not normalize, not absolute value, since by is in [0, 1]
    #     iter_proj_time += (time.time() - start_proj_time)
    #
    #     print('tail projetion time for y: {:.5f}'.format(time.time() - start_proj_time))
    #
    #     current_y = np.zeros_like(current_y)
    #     print('1', len(np.nonzero(by)[0]))
    #     print('by nonzero', sorted(list(np.nonzero(by)[0])))
    #     print('1v', len(np.nonzero(bx)[0]))
    #     print('2', len(psi_y))
    #     print('psi_y', sorted(list(psi_y)))
    #     print('2v', len(psi_x))
    #     current_y[list(psi_y)] = by[list(psi_y)]
    #     print('3', len(np.nonzero(current_y)[0]))
    #     print('3v', len(np.nonzero(current_x)[0]))
    #     current_y = normalize(current_y)
    #     print('4', len(np.nonzero(current_y)[0]))
    #     print('4v', len(np.nonzero(current_x)[0]))
    #
    #     print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y))
    #
    #     acc_proj_time += iter_proj_time
    #
    #     if logger:
    #         print('iter proj time: {:.5f}'.format(iter_proj_time))
    #
    #     diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2)
    #     if logger:
    #         logger.debug('difference norm: {}'.format(diff_norm))
    #
    #     if diff_norm < epsilon:
    #         break
    #
    run_time = time.time() - start_time
    if logger:
        pass

    return current_x, current_y, run_time
Exemplo n.º 9
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']
    true_subgraph = instance['true_subgraph']
    features = instance['features']

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured IHT')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=instance['features'],
                       trade_off=trade_off,
                       nodes_set=nodes_set,
                       boundary_edges_dict=boundary_edges_dict,
                       nodes_id_dict=nodes_id_dict)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(
                re_nodes
            )  # note, elements in block_gamma_x are all local (block) node id
            omega_x_list.append(gamma_x)

        # update
        bx_array = np.zeros_like(current_x)
        for b in range(num_blocks):
            block_x = current_x[nodes_set[b]]
            feat = features[sorted(nodes_set[b])]
            indicator_x = np.zeros(len(block_x))  # note, zeros, not
            indicator_x[list(omega_x_list[b])] = 1.
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)
            bx_array[nodes_set[b]] = current_x[
                nodes_set[b]] + learning_rate * grad_x * indicator_x

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[nodes_set[
                b]] = normalized_block_x  # constrain current block x in [0, 1]

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time


# def run_instance(instance, sparsity, trade_off, learning_rate, max_iter=10000, epsilon=1e-3):
#
#     opt_x = optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon)
#
#     pred_subgraph = np.nonzero(opt_x)[0]
#
#     prec, rec, fm, iou = evaluate_block(instance['subgraph'], pred_subgraph)
#
#     logger.debug('-' * 5 + ' raw performance ' + '-' * 5)
#     logger.debug('precision: {:.5f}'.format(prec))
#     logger.debug('recall   : {:.5f}'.format(rec))
#     logger.debug('f-measure: {:.5f}'.format(fm))
#     logger.debug('iou      : {:.5f}'.format(iou))
#
#     refined_pred_subgraph = post_process_block(instance['graph'], pred_subgraph)
#     prec, rec, fm, iou = evaluate_block(instance['subgraph'], refined_pred_subgraph)
#     logger.debug('-' * 5 + ' refined performance ' + '-' * 5)
#     logger.debug('precision: {:.5f}'.format(prec))
#     logger.debug('recall   : {:.5f}'.format(rec))
#     logger.debug('f-measure: {:.5f}'.format(fm))
#     logger.debug('iou      : {:.5f}'.format(iou))
#
# if __name__ == '__main__':
#
#     path = '/network/rit/lab/ceashpc/share_data/GraphOpt/ijcai/app2/CondMat'
#     fn = 'test_9.pkl'
#
#     rfn = os.path.join(path, fn)
#     with open(rfn, 'rb') as rfile:
#         dataset = pickle.load(rfile)
#
#     instance = dataset[0]
#     sparsity = 534
#     trade_off = 0.0001
#     learning_rate = 1.
#     run_instance(instance, sparsity, trade_off, learning_rate)
Exemplo n.º 10
0
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon):

    graph = instance['graph']
    true_subgraph = instance['subgraph']
    features = instance['features']
    block_node_sets = instance['block_node_sets']
    block_boundary_edges_dict = instance['block_boundary_edges_dict']

    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    num_blocks = len(block_boundary_edges_dict)

    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: graph block-structured IHT')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of blocks: {:d}'.format(num_blocks))

    node_id_dict = relabel_nodes(block_node_sets)
    relabeled_edge_sets = relabel_edges(graph, block_node_sets, node_id_dict)

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=features,
                       nodes_set=block_node_sets,
                       boundary_edges_dict=block_boundary_edges_dict,
                       nodes_id_dict=node_id_dict,
                       trade_off=trade_off)

    true_x = np.zeros(num_nodes)
    true_x[true_subgraph] = 1.
    true_x = np.array(true_x)
    true_obj_val, true_sum_ems_val, true_penalty = func.get_obj_val(true_x)
    logger.debug(
        'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
        .format(true_obj_val, true_sum_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):  # cyclic block coordinate version
        for b in range(num_blocks):
            logger.debug('iteration: {:d}'.format(iter))
            prev_x = np.copy(current_x)
            iter_time = time.time()
            iter_proj_time = 0.

            sorted_node_set = sorted(block_node_sets[b])
            block_x = current_x[sorted_node_set]
            block_boundary_edge_x_dict = get_boundary_xs(
                current_x, block_boundary_edges_dict[b], node_id_dict)
            block_features = features[sorted_node_set]
            block_grad = func.get_gradient(block_x, block_features,
                                           block_boundary_edge_x_dict)
            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_block_grad = normalize_gradient(block_x, block_grad)
            edges = np.array(relabeled_edge_sets[b])
            edge_weights = np.ones(len(edges))

            start_proj_time = time.time()
            re_head = head_proj(edges=edges,
                                weights=edge_weights,
                                x=normalized_block_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            block_omega_x = set(
                re_nodes
            )  # note, elements in block_omega_x are all local (block) node id

            # update one block
            block_x = current_x[sorted_node_set]
            block_features = features[sorted_node_set]
            block_indicator_x = np.zeros_like(current_x[sorted_node_set])
            block_indicator_x[list(block_omega_x)] = 1.
            block_boundary_edge_x_dict = get_boundary_xs(
                current_x, block_boundary_edges_dict[b], node_id_dict)

            current_x[sorted_node_set] = current_x[
                sorted_node_set] + learning_rate * func.get_gradient(
                    block_x, block_features,
                    block_boundary_edge_x_dict) * block_indicator_x

            block_bx = current_x[sorted_node_set]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=edge_weights,
                                x=block_bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)
            block_x = np.zeros_like(current_x[sorted_node_set])
            block_x[list(psi_x)] = block_bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[
                sorted_node_set] = normalized_block_x  # constrain current block x in [0, 1]

            acc_proj_time += iter_proj_time

            obj_val, sum_ems_val, penalty = func.get_obj_val(current_x)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            pred_subgraph = np.nonzero(current_x)[0]
            prec, rec, fm, iou = evaluate_block(instance['subgraph'],
                                                pred_subgraph)
            logger.debug(
                'precision: {:.5f}, recall: {:.5f}, f-measure: {:.5f}'.format(
                    prec, rec, fm))
            logger.debug('-' * 10)

            diff_norm_x = np.linalg.norm(current_x - prev_x)
            if diff_norm_x < epsilon:
                print('difference {:.5f}'.format(diff_norm_x))
                break

    run_time = time.time() - start_time
    obj_val, sum_ems_val, penalty = func.get_obj_val(current_x)
    logger.debug(
        'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
        format(obj_val, sum_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x
Exemplo n.º 11
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']  # get graph structure
    true_subgraphs = instance['true_subgraphs']  # get ground truth
    edges = np.array(graph.edges)
    # decide interval
    tag = False
    start = end = 0
    for t, subgraph in enumerate(true_subgraphs):
        if subgraph and not tag:
            start = t
            tag = True
        if subgraph and tag:
            end = t

    num_time_stamps = len(true_subgraphs)
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)
    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured GHTP')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of time stamps: {:d}'.format(num_time_stamps))
        logger.debug('signal interval: [{:d}, {:d}]'.format(start, end))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = GlobalEMS(features=instance['features'],
                     trade_off=trade_off,
                     learning_rate=1.,
                     max_iter=1)
    if logger:
        true_x_array = []
        for true_subgraph in true_subgraphs:
            true_x = np.zeros(num_nodes)
            true_x[true_subgraph] = 1.
            true_x_array.append(true_x)
        true_x_array = np.array(true_x_array)
        true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val(
            true_x_array)
        logger.debug(
            'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_gloabl_ems_val, true_penalty))

    # initialization
    current_x_array = func.get_init_x_zeros() + 1e-6
    # print('start from ground truth')
    # current_x_array = true_x_array
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x_array = np.copy(current_x_array)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []  # get head projection set on each block
        for t in range(num_time_stamps):
            grad_x = func.get_gradient(
                current_x_array,
                t)  # calculate partial gradient of current block
            print(grad_x)
            current_x = current_x_array[t] if iter > 0 else np.zeros_like(
                current_x_array[t],
                dtype=np.float64)  # start from all zeros when iter == 0
            normalized_grad = normalize_gradient(
                current_x, grad_x
            )  # make gradient invalid when it will make updated result go beyond [0, 1]
            start_proj_time = time.time()
            re_head = head_proj(edges=edges,
                                weights=edge_weights,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)  # head projection
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            gamma_x = set(re_nodes)

            indicator_x = np.zeros(num_nodes)
            indicator_x[list(gamma_x)] = 1.
            # or normalized_grad
            tmp_x = current_x + learning_rate * grad_x * indicator_x  # note, not update current variables, only use the intermediate results
            omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])
            omega_x_list.append(omega_x)
            print(sorted(omega_x))

        bx_array = func.argmax_obj_with_proj(
            current_x_array,
            omega_x_list)  # solve argmax problem with block coordinate ascent

        for t in range(num_time_stamps):
            bx = bx_array[t]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=edge_weights,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)  # tail projection
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)

            current_x = np.zeros_like(current_x_array[t])
            current_x[list(psi_x)] = bx[list(psi_x)]
            current_x = normalize(current_x)  # constrain current_x in [0, 1]

            current_x_array[t] = current_x

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, global_ems_val, penalty = func.get_obj_val(
                current_x_array)
            logger.debug(
                'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, global_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x_array - prev_x_array)
        if logger:
            logger.debug('difference norm x: {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
        logger.debug(
            'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
            .format(obj_val, global_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x_array, run_time
Exemplo n.º 12
0
def optimize(instance, sparsity, trade_off, learning_rate, max_iter, epsilon=1e-3):

    graph = instance['graph']
    true_subgraphs = instance['subgraphs']
    edges = np.array(graph.edges)
    tag = False
    start = end = 0
    for t, subgraph in enumerate(true_subgraphs):
        if subgraph and not tag:
            start = t
            tag = True
        if subgraph and tag:
            end = t

    num_time_stamps = len(true_subgraphs)
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)
    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: graph block-structured IHT')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of time stamps: {:d}'.format(num_time_stamps))
    logger.debug('signal interval: [{:d}, {:d}]'.format(start, end))

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = GlobalEMS(features=instance['features'], trade_off=trade_off)
    true_x_array = []
    for true_subgraph in true_subgraphs:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x_array.append(true_x)
    true_x_array = np.array(true_x_array)
    true_obj_val, true_gloabl_ems_val, true_penalty = func.get_obj_val(true_x_array)
    logger.debug('ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(true_obj_val, true_gloabl_ems_val, true_penalty))

    current_x_array = func.get_init_x_zeros() + 1e-6 # initialization, variables are all from zeros, to avoid "divided by zero" error, by plus a small amount
    for iter in range(max_iter):

        tag = False
        # update style 2
        # update each block cyclically
        for t in range(num_time_stamps):

            logger.debug('iteration: {:d}, time stamps: {:d}'.format(iter, t))

            prev_x_array = np.copy(current_x_array)
            iter_time = time.time()
            iter_proj_time = 0.

            grad_x = func.get_gradient(current_x_array, t)
            current_x = current_x_array[t] if iter > 0 else np.zeros_like(current_x_array[t], dtype=np.float64)
            normalized_grad = normalize_gradient(current_x, grad_x)
            start_proj_time = time.time()
            re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_grad, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time
            omega_x = set(re_nodes)

            indicator_x = np.zeros(num_nodes)
            indicator_x[list(omega_x)] = 1.
            # bx as intermediate variables stores updated block variables
            bx = current_x_array[t] + learning_rate * func.get_gradient(current_x_array, t) * indicator_x

            # tail projection
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)

            current_x = np.zeros_like(current_x_array[t])
            current_x[list(psi_x)] = bx[list(psi_x)]
            current_x = normalize(current_x) # note, restrict current_x in [0, 1]

            current_x_array[t] = current_x # update current variables, will affect other gradient of blocks

            acc_proj_time += iter_proj_time

            obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
            logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

            diff_norm_x = np.linalg.norm(current_x_array - prev_x_array)
            logger.debug('difference norm x: {:.5f}'.format(diff_norm_x))
            if diff_norm_x < epsilon:
                # tag = True
                break

        # if tag:
        #     break

    run_time = time.time() - start_time
    obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
    logger.debug('objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.format(obj_val, global_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x_array
Exemplo n.º 13
0
def optimize(instance, sparsity, trade_off, max_iter=10, epsilon=1e-3):

    graph = instance['graph']
    true_subgraphs = instance['subgraphs']
    edges = np.array(graph.edges)
    tag = False
    start = end = 0
    for t, subgraph in enumerate(true_subgraphs):
        if subgraph and not tag:
            start = t
            tag = True
        if not subgraph and tag:
            end = t - 1
            tag = False

    num_time_stamps = len(true_subgraphs)
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)
    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('algorithm: graph block-structured matching pursuit')
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of time stamps: {:d}'.format(num_time_stamps))
    logger.debug('signal interval: [{:d}, {:d}]'.format(start, end))

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = GlobalEMS(features=instance['features'], trade_off=trade_off)
    true_x_array = []
    for true_subgraph in true_subgraphs:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x_array.append(true_x)
    true_x_array = np.array(true_x_array)
    true_obj_val, true_global_ems_val, true_penalty = func.get_obj_val(
        true_x_array)
    logger.debug(
        'ground truth, obj value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
        .format(true_obj_val, true_global_ems_val, true_penalty))

    # current_x_array = func.get_init_x_random()
    current_x_array = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        logger.debug('iteration: {:d}'.format(iter))
        prev_x_array = np.copy(current_x_array)
        iter_time = time.time()
        iter_proj_time = 0.

        omega_x_list = []
        for t in range(num_time_stamps):
            grad_x = func.get_gradient(current_x_array, t)
            current_x = current_x_array[t] if iter > 0 else np.zeros_like(
                current_x_array[t], dtype=np.float64)
            normalized_grad = normalize_gradient(current_x, grad_x)

            start_proj_time = time.time()
            re_head = head_proj(edges=edges,
                                weights=edge_weights,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, _, _ = re_head
            iter_proj_time += time.time() - start_proj_time

            gamma_x = set(re_nodes)
            supp_x = set(
                [ind for ind, _ in enumerate(current_x) if not 0. == _])
            omega_x = gamma_x | supp_x
            omega_x_list.append(omega_x)

        bx_array = func.argmax_obj_with_proj(current_x_array, omega_x_list)

        for t in range(num_time_stamps):
            bx = bx_array[t]
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=edge_weights,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, _, _ = re_tail
            iter_proj_time += time.time() - start_proj_time
            psi_x = set(re_nodes)

            current_x = np.zeros_like(current_x_array[t])
            current_x[list(psi_x)] = bx[list(psi_x)]

            current_x_array[t] = current_x

            print(t, sorted(np.nonzero(current_x)))

        acc_proj_time += iter_proj_time

        # post process

        obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
        logger.debug(
            'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'
            .format(obj_val, global_ems_val, penalty))
        logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
        logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
        logger.debug('acc projection time: {:.5f}'.format(
            acc_proj_time))  # accumulative projection time
        logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x_array - prev_x_array)
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    obj_val, global_ems_val, penalty = func.get_obj_val(current_x_array)
    logger.debug(
        'objective value: {:.5f}, global ems value: {:.5f}, penalty: {:.5f}'.
        format(obj_val, global_ems_val, penalty))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x_array
def block_graph_mp(data, k, max_iter, trade_off, log_file, func_name="EMS"):
    """
    :param func_name: score function name
    :param k: sparsity
    :param max_iter: max number of iterations
    :param G: networkx graph
    :param true_subgraph: a list of nodes that represents the ground truth subgraph
    :return: prediction xt, which denotes by the predicted abnormal nodes
    """
    G = data["graph"]
    features = np.array(data["features"])
    nodes_set = data["nodes_set"]
    boundary_edges_dict = data["block_boundary_edges_dict"]
    node_block_dict = data["node_block_dict"]
    true_subgraph = sorted(data["true_subgraph"])
    num_blocks = len(boundary_edges_dict)
    nodes_id_dict = relabel_nodes(
        nodes_set)  # key is global node id, value is local node id
    if func_name == "PartitionEMS":
        func = PartitionEMS.PartitionEMS(
            features=features,
            num_blocks=num_blocks,
            nodes_set=nodes_set,
            boundary_edges_dict=boundary_edges_dict,
            nodes_id_dict=nodes_id_dict,
            trade_off=trade_off)
    else:
        print("ERROR")

    num_nodes = G.number_of_nodes()
    num_edges = G.number_of_edges()
    nodes_id_dict = relabel_nodes(
        nodes_set)  # key is global node id, value is local node id
    relabeled_edges_set = relabel_edges(G, nodes_set, nodes_id_dict)

    print_log(log_file, "\n----------------initialization---------------\n")
    X = func.get_init_point_random()
    XT = np.copy(X)
    print_log(log_file, "\n------------------searching------------------\n")
    start_time = time.time()
    learning_rate = 1.
    for iter in range(max_iter):
        Omega_X = []
        X_prev = np.copy(XT)
        # print("iter: {}, time: {}".format(iter, time.asctime(time.localtime(time.time()))))
        for t in range(num_blocks):
            xt = XT[sorted(nodes_set[t])]
            boundary_xs_dict = get_boundary_xs(
                XT, boundary_edges_dict[t], nodes_id_dict
            )  # key is boundary edge (u local index, v global index), value is adjacent x in other blocks
            fea = features[sorted(nodes_set[t])]
            grad = func.get_loss_grad(xt, fea, boundary_xs_dict)

            if 0 == iter:  # because we initialize the x as 0.000001 to avoid the divided by zero error when calculating the gradient
                xt_zero = np.zeros_like(xt)
                normalized_grad = normalized_gradient(xt_zero, grad)
            else:
                normalized_grad = normalized_gradient(xt, grad)

            # g: number of connected component
            edges = np.array(relabeled_edges_set[t])
            costs = np.ones(len(edges))
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=k,
                                budget=k - 1,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            gamma_xt = set(re_nodes)
            indicator_x = np.zeros(len(xt))
            indicator_x[list(gamma_xt)] = 1.
            if iter == 0:
                tmp_x = np.zeros_like(
                    xt
                ) + learning_rate * grad * indicator_x  # note, not update current variables, only use the intermediate results
            else:
                tmp_x = xt + learning_rate * grad * indicator_x
            omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])
            # if 0 == iter:  # because we initialize the x as 0.000001 to avoid the divided by zero error when calculating the gradient
            #     omega_x = gamma_xt
            Omega_X.append(omega_x)

        # print("---Head Projection Finished: time: {}".format(time.asctime(time.localtime(time.time()))))
        # BX = func.get_argmax_fx_with_proj(XT, Omega_X)  # TODO: how to solve this argmax correctly
        # BX = func.get_argmax_fx_with_proj_accelerated(XT, Omega_X)  # TODO: how to solve this argmax correctly
        # BX = func.get_argmax_fx_with_proj_parallel(XT, Omega_X)  # use close form to solve
        BX = func.get_argmax_fx_with_proj_parallel_2(
            XT, Omega_X)  # use gradient descent to solve

        # print("---ArgMax Finished: time: {}".format(time.asctime(time.localtime(time.time()))))
        for t in range(num_blocks):
            edges = np.array(relabeled_edges_set[t])
            costs = np.ones(len(edges))
            bx = BX[nodes_set[t]]  # fixme, sort
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=k,
                                budget=k - 1,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            psi_x = re_nodes
            xt = np.zeros_like(XT[nodes_set[t]])
            xt[list(psi_x)] = bx[list(
                psi_x
            )]  # TODO: note the non-zero entries of xt[list(psi_x)] may not be connected
            XT[nodes_set[t]] = xt

        # print("---Tail Projection Finished: time: {}".format(time.asctime(time.localtime(time.time()))))
        gap_x = np.linalg.norm(XT - X_prev)**2
        if gap_x < 1e-6:
            break

        print_log(log_file,
                  '\ncurrent performance iteration: {}\n'.format(iter))
        obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
            XT, boundary_edges_dict)
        print_log(log_file, 'trade-off: {}\n'.format(trade_off))
        print_log(log_file,
                  'objective value of prediction: {:5f}\n'.format(obj_val))
        print_log(log_file,
                  'global ems score of prediction: {:5f}\n'.format(ems_score))
        print_log(log_file,
                  'penalty of prediction: {:5f}\n'.format(obj_val - ems_score))

        pred_subgraph = sorted(np.nonzero(XT)[0])
        print_log(
            log_file,
            "----------------- current predicted subgraph vs true subgraph:\n")
        print_log(log_file, "{}, size: {}\n".format(pred_subgraph,
                                                    len(pred_subgraph)))
        print_log(log_file, "{}, size: {}\n".format(true_subgraph,
                                                    len(true_subgraph)))

        print_log(log_file,
                  "----------------- info of current predicted subgraph:\n")
        fea = np.round(features[pred_subgraph], 5)
        print_log(
            log_file, "{}\n".format(
                zip(pred_subgraph, np.round(XT[pred_subgraph], 5), fea)))

        print_log(log_file,
                  "----------------- info of current true subgraph:\n")
        fea = np.round(features[true_subgraph], 5)
        print_log(
            log_file, "{}\n".format(
                zip(true_subgraph, np.round(XT[true_subgraph], 5), fea)))

        global_prec, global_rec, global_fm, global_iou = evaluate(
            true_subgraph, pred_subgraph)
        print_log(
            log_file,
            'global_prec={:4f},\nglobal_rec={:.4f},\nglobal_fm={:.4f},\nglobal_iou={:.4f}\n'
            .format(global_prec, global_rec, global_fm, global_iou))

    end_time = time.time()
    total_time = end_time - start_time
    print("time {}".format(total_time))
    return XT, total_time
Exemplo n.º 15
0
def optimize(instance,
             sparsity,
             trade_off,
             learning_rate,
             max_iter,
             epsilon=1e-3,
             logger=None):

    graph = instance['graph']
    true_subgraph = instance['true_subgraph']
    features = instance['features']

    num_blocks = len(instance['nodes_set'])
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    nodes_set = instance['nodes_set']
    boundary_edges_dict = instance['block_boundary_edges_dict']

    nodes_id_dict = relabel_nodes(nodes_set)
    relabeled_edges_set = relabel_edges(graph, nodes_set, nodes_id_dict)

    if logger:
        logger.debug('-' * 5 + ' related info ' + '-' * 5)
        logger.debug('algorithm: graph block-structured IHT')
        logger.debug('sparsity: {:d}'.format(sparsity))
        logger.debug('max iteration: {:d}'.format(max_iter))
        logger.debug('number of nodes: {:d}'.format(num_nodes))
        logger.debug('number of edges: {:d}'.format(num_edges))
        logger.debug('number of blocks: {:d}'.format(num_blocks))
        logger.debug('-' * 5 + ' start iterating ' + '-' * 5)

    start_time = time.time()
    acc_proj_time = 0.
    func = BlockSumEMS(features=instance['features'],
                       trade_off=trade_off,
                       nodes_set=nodes_set,
                       boundary_edges_dict=boundary_edges_dict,
                       nodes_id_dict=nodes_id_dict)
    if logger:
        true_x = np.zeros(num_nodes)
        true_x[true_subgraph] = 1.
        true_x = np.array(true_x)
        true_obj_val, true_ems_val, true_penalty, _ = func.get_obj_val(
            true_x, boundary_edges_dict)
        logger.debug(
            'ground truth, obj value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
            .format(true_obj_val, true_ems_val, true_penalty))

    current_x = func.get_init_x_zeros() + 1e-6
    for iter in range(max_iter):
        if logger:
            logger.debug('iteration: {:d}'.format(iter))
        prev_x = np.copy(current_x)
        iter_time = time.time()
        iter_proj_time = 0.

        bx_array = np.zeros_like(current_x)
        omega_x_list = []
        for b in range(num_blocks):
            block_x = current_x[sorted(nodes_set[b])]
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            feat = features[sorted(nodes_set[b])]
            grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)

            block_x = block_x if iter > 0 else np.zeros_like(block_x,
                                                             dtype=np.float64)

            normalized_grad = normalize_gradient(block_x, grad_x)

            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            iter_proj_time += (time.time() - start_proj_time)
            gamma_x = set(
                re_nodes
            )  # note, elements in block_gamma_x are all local (block) node id
            # omega_x_list.append(gamma_x)

            indicator_x = np.zeros(len(block_x))  # note, zeros, not
            indicator_x[list(gamma_x)] = 1.
            boundary_xs_dict = get_boundary_xs(current_x,
                                               boundary_edges_dict[b],
                                               nodes_id_dict)
            # grad_x = func.get_gradient(block_x, feat, boundary_xs_dict)
            bx_array[nodes_set[b]] = current_x[
                nodes_set[b]] + learning_rate * grad_x * indicator_x

        for b in range(num_blocks):
            bx = bx_array[nodes_set[b]]
            # get edges and edge weights of current block
            edges = np.array(relabeled_edges_set[b])
            costs = np.ones(len(edges))
            start_proj_time = time.time()
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=sparsity,
                                budget=sparsity - 1.,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            iter_proj_time += (time.time() - start_proj_time)
            psi_x = set(re_nodes)

            block_x = np.zeros_like(current_x[nodes_set[b]])
            block_x[list(psi_x)] = bx[list(psi_x)]
            normalized_block_x = normalize(block_x)
            current_x[nodes_set[
                b]] = normalized_block_x  # constrain current block x in [0, 1]

        acc_proj_time += iter_proj_time

        if logger:
            obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
                current_x, boundary_edges_dict)
            logger.debug(
                'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'
                .format(obj_val, sum_ems_val, penalty))
            logger.debug('iteration time: {:.5f}'.format(time.time() -
                                                         iter_time))
            logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
            logger.debug('acc projection time: {:.5f}'.format(
                acc_proj_time))  # accumulative projection time
            logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if logger:
            logger.debug('difference {:.5f}'.format(diff_norm_x))
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    if logger:
        obj_val, sum_ems_val, penalty, _ = func.get_obj_val(
            current_x, boundary_edges_dict)
        logger.debug(
            'objective value: {:.5f}, sum ems value: {:.5f}, penalty: {:.5f}'.
            format(obj_val, sum_ems_val, penalty))
        logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
        logger.debug(
            'accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x, run_time
Exemplo n.º 16
0
def optimize(instance,
             sparsity,
             learning_rate=0.01,
             max_iter=10,
             epsilon=1e-3):
    """
    graph-structured gradient hard thresholding pursuit algorithm
    :param instance:
    :param sparsity:
    :param learning_rate:
    :param max_iter:
    :param epsilon:
    :return:
    """

    graph = instance['graph']  # topology of network, networkx object
    true_subgraph = instance['subgraph']  # list or numpy one dim ndarray
    edges = np.array(graph.edges)

    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)
    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('sparsity: {:d}'.format(sparsity))
    print(max_iter)
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of nodes in true_subgraph: {:d}'.format(
        len(true_subgraph)))

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = EMS(features=instance['features'], graph=graph)
    # current_x = func.get_init_x_random() # note, not stable
    # current_x = func.get_init_x_maxcc()
    current_x = func.get_init_x_zeros() + 0.00001
    for iter in range(max_iter):
        logger.debug('iteration: {:d}'.format(iter))
        iter_time = time.time()
        iter_proj_time = 0.

        grad_x = func.get_gradient(current_x)
        normalized_grad = normalize_gradient(current_x, grad_x)

        start_proj_time = time.time()
        re_head = head_proj(edges=edges,
                            weights=edge_weights,
                            x=normalized_grad,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        iter_proj_time += time.time() - start_proj_time

        gamma_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(gamma_x)] = 1.
        tmp_x = current_x + learning_rate * normalized_grad * indicator_x
        omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _])

        bx = func.argmax_obj_with_proj(current_x, omega_x)

        start_proj_time = time.time()
        re_tail = tail_proj(edges=edges,
                            weights=edge_weights,
                            x=bx,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            nu=2.5,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            verbose=0)
        re_nodes, _, _ = re_tail
        iter_proj_time += time.time() - start_proj_time
        acc_proj_time += iter_proj_time
        psi_x = set(re_nodes)

        prev_x = current_x
        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]

        logger.debug('function value: {:.5f}'.format(
            func.get_obj_val(current_x)))
        logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
        logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
        logger.debug('acc projection time: {:.5f}'.format(
            acc_proj_time))  # accumulative projection time
        logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    logger.debug('final function value: {:.5f}'.format(
        func.get_obj_val(current_x)))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x
Exemplo n.º 17
0
def dynamic_graph_mp(data, k, max_iter, trade_off, log_file, func_name="EMS"):
    """
    :param func_name: score function name
    :param k: sparsity
    :param max_iter: max number of iterations
    :param G: networkx graph
    :param true_subgraph: a list of nodes that represents the ground truth subgraph
    :return: prediction xt, which denotes by the predicted abnormal nodes
    """
    if func_name == "SumEMS":
        features = data["features"]
        func = SumEMS(feature_matrix=features, trade_off=trade_off)
    elif func_name == "GlobalEMS":
        features = data["features"]
        func = GlobalEMS(feature_matrix=features, trade_off=trade_off)
    # elif func_name == "LaplacianEMS":
    #     features = data["features"]
    #     func = LaplacianEMS.LaplacianEMS(feature_matrix=features, trade_off=trade_off)
    else:
        print("ERROR")

    G = data["graph"]
    num_nodes = G.number_of_nodes()
    # costs = data["weights"]
    costs = np.ones(G.number_of_edges())
    T = len(data["subgraphs"])
    true_subgraphs = data["subgraphs"]
    edges = np.array(G.edges())

    print_log(log_file, "\n----------------initialization---------------\n")
    X = func.get_init_point_random()
    XT = np.copy(X)
    #
    print_log(log_file, "\n------------------searching------------------\n")
    for iter in range(max_iter):
        Omega_X = []
        X_prev = np.copy(XT)
        print("iter: {}, time: {}".format(
            iter, time.asctime(time.localtime(time.time()))))
        for t in range(T):
            xt = XT[t]
            grad = func.get_loss_grad(XT, t)

            if 0 == iter:
                xt_zero = np.zeros_like(xt)
                normalized_grad = normalized_gradient(
                    xt_zero, grad)  # rescale gradient of x into [0, 1]
            else:
                normalized_grad = normalized_gradient(
                    xt, grad)  # rescale gradient of x into [0, 1]

            # g: number of connected component
            re_head = head_proj(edges=edges,
                                weights=costs,
                                x=normalized_grad,
                                g=1,
                                s=k,
                                budget=k - 1,
                                delta=1. / 169.,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                epsilon=1e-10,
                                verbose=0)
            re_nodes, re_edges, p_x = re_head
            gamma_xt = set(re_nodes)
            supp_xt = set([ind for ind, _ in enumerate(xt) if _ != 0.])

            omega_x = gamma_xt.union(supp_xt)
            if 0 == iter:
                omega_x = gamma_xt
            Omega_X.append(omega_x)

        BX = func.get_argmax_fx_with_proj(
            XT, Omega_X)  # TODO: how to solve this argmax correctly

        for t in range(T):
            bx = BX[t]
            re_tail = tail_proj(edges=edges,
                                weights=costs,
                                x=bx,
                                g=1,
                                s=k,
                                budget=k - 1,
                                nu=2.5,
                                max_iter=100,
                                err_tol=1e-8,
                                root=-1,
                                pruning='strong',
                                verbose=0)
            re_nodes, re_edges, p_x = re_tail
            psi_x = re_nodes
            xt = np.zeros_like(XT[t])
            xt[list(psi_x)] = bx[list(psi_x)]
            XT[t] = xt
        gap_x = np.linalg.norm(XT - X_prev)**2
        if gap_x < 1e-6:
            break

        print_log(log_file,
                  '\ncurrent performance iteration: {}\n'.format(iter))
        if func_name == "SumEMS":
            obj_val, nonoverlap_penalty = func.get_obj_value(XT)
            ems_score = func.get_sum_ems(XT)
        elif func_name == "GlobalEMS":
            obj_val, ems_score, smooth_penalty, binarized_penalty = func.get_obj_value(
                XT)
        print_log(log_file, 'trade-off: {}\n'.format(trade_off))
        print_log(log_file,
                  'objective value of prediction: {:5f}\n'.format(obj_val))
        print_log(log_file,
                  'global ems score of prediction: {:5f}\n'.format(ems_score))
        print_log(log_file,
                  'penalty of prediction: {:5f}\n'.format(obj_val - ems_score))

        pred_subgraphs = [np.nonzero(x)[0] for x in XT]
        print_log(log_file, "----------------- current predicted subgraphs:\n")
        for t in range(T):
            pred_sub = sorted(pred_subgraphs[t])
            print_log(log_file, "{}, {}\n".format(t, pred_sub))

        print_log(log_file, "---------------------------------------------:\n")
        for t in range(T):
            pred_sub = sorted(pred_subgraphs[t])
            x = np.round(XT[t][pred_sub], 5)
            fea = np.round(features[t][pred_sub], 5)
            print_log(log_file, "{}, {}\n".format(t, zip(pred_sub, x, fea)))

        print_log(log_file, "----------------- current true subgraphs:\n")
        for t in range(T):
            true_sub = sorted(true_subgraphs[t])
            x = np.round(XT[t][true_sub], 5)
            fea = np.round(features[t][true_sub], 5)
            print_log(log_file, "{}, {}\n".format(t, zip(true_sub, x, fea)))

        global_prec, global_rec, global_fm, global_iou, valid_prec, valid_rec, valid_fm, valid_iou = evaluate(
            true_subgraphs, pred_subgraphs)
        print_log(
            log_file,
            'global_prec={:4f},\nglobal_rec={:.4f},\nglobal_fm={:.4f},\nglobal_iou={:.4f}\n'
            .format(global_prec, global_rec, global_fm, global_iou))
        print_log(
            log_file,
            'valid_prec={:.4f},\nvalid_rec={:.4f},\nvalid_fm={:.4f},\nvalid_iou={:.4f}\n'
            .format(valid_prec, valid_rec, valid_fm, valid_iou))

    return XT
Exemplo n.º 18
0
def proj_mp(graph, weight, A, sparsity, lmbd, max_iter=10, epsilon=1e-3):
    current_x = proj_init_point(graph.number_of_nodes()) + 1e-6

    edges = np.array(graph.edges)
    edge_weights = np.ones(graph.number_of_edges())
    start_time = time.time()
    for i in range(max_iter):
        print('iter {}'.format(i))
        iter_time = time.time()
        gradient = proj_get_gradient(current_x, weight, A, lmbd)
        normalized_gradient = normalize_gradient(current_x, gradient)

        re_head = head_proj(edges=edges,
                            weights=edge_weights,
                            x=normalized_gradient,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            err_tol=1e-8,
                            max_iter=100,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        gamma_x = set(re_nodes)
        print('gamma_x', len(gamma_x))
        # print(sorted(list(gamma_x)))
        if i == 0:
            supp_x = set()
        else:
            supp_x = set(
                [ind for ind, _ in enumerate(current_x) if not 0. == _])
        omega_x = gamma_x | supp_x

        print('omega_x', len(omega_x))
        # print(sorted(list(omega_x)))

        # print(gradient[sorted(list(gamma_x))])
        # print(gradient[sorted(list(supp_x))])

        bx = proj_argmax(current_x,
                         omega_x,
                         weight,
                         A,
                         lmbd,
                         max_iter=2000,
                         learning_rate=0.01)

        re_tail = tail_proj(edges=edges,
                            weights=edge_weights,
                            x=bx,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            nu=2.5,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            verbose=0)
        re_nodes, _, _ = re_tail

        psi_x = set(re_nodes)
        prev_x = current_x
        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]

        print('psi_x', len(np.nonzero(current_x)[0]))

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A,
                                                      lmbd)

        print(
            'iter {}, func val: {:.5f}, iter_time: {:.5f}, diff_norm: {:.5f}'.
            format(i, func_val,
                   time.time() - iter_time, diff_norm_x))

        subgraph = set(np.nonzero(current_x)[0])
        print('subgraph density', nx.density(nx.subgraph(graph, subgraph)))

        if diff_norm_x <= epsilon:
            break

    run_time = time.time() - start_time
    func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd)

    print('final function value: {:.5f}'.format(func_val))
    print('run time of whole algorithm: {:.5f}'.format(run_time))

    subgraph = set(np.nonzero(current_x)[0])

    return subgraph
Exemplo n.º 19
0
def optimize(instance,
             sparsity,
             learning_rate=0.01,
             max_iter=10,
             epsilon=1e-3):

    graph = instance['graph']

    true_subgraph = instance['subgraph']
    edges = np.array(graph.edges)
    num_nodes = graph.number_of_nodes()
    num_edges = graph.number_of_edges()
    edge_weights = np.ones(num_edges)
    logger.debug('-' * 5 + ' related info ' + '-' * 5)
    logger.debug('sparsity: {:d}'.format(sparsity))
    logger.debug('max iteration: {:d}'.format(max_iter))
    logger.debug('number of nodes: {:d}'.format(num_nodes))
    logger.debug('number of edges: {:d}'.format(num_edges))
    logger.debug('number of nodes in true_subgraph: {:d}'.format(
        len(true_subgraph)))

    logger.debug('-' * 5 + ' start iterating ' + '-' * 5)
    start_time = time.time()
    acc_proj_time = 0.
    func = EMS(features=instance['features'], graph=graph)
    # current_x = func.get_init_x_random() # note, not stable
    current_x = func.get_init_x_zeros() + 0.00001
    for iter in range(max_iter):
        logger.debug('iteration: {:d}'.format(iter))
        iter_time = time.time()
        iter_proj_time = 0.

        grad_x = func.get_gradient(current_x)
        normalized_grad = normalize_gradient(current_x, grad_x)  # fixme

        start_proj_time = time.time()
        re_head = head_proj(edges=edges,
                            weights=edge_weights,
                            x=normalized_grad,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            delta=1. / 169.,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            epsilon=1e-10,
                            verbose=0)
        re_nodes, _, _ = re_head
        iter_proj_time += time.time() - start_proj_time

        omega_x = set(re_nodes)
        indicator_x = np.zeros(num_nodes)
        indicator_x[list(omega_x)] = 1.

        bx = current_x + learning_rate * normalized_grad * indicator_x

        start_proj_time = time.time()
        re_tail = tail_proj(edges=edges,
                            weights=edge_weights,
                            x=bx,
                            g=1,
                            s=sparsity,
                            budget=sparsity - 1.,
                            nu=2.5,
                            max_iter=100,
                            err_tol=1e-8,
                            root=-1,
                            pruning='strong',
                            verbose=0)
        re_nodes, _, _ = re_tail
        iter_proj_time += time.time() - start_proj_time
        acc_proj_time += time.time() - start_proj_time
        psi_x = set(re_nodes)

        prev_x = current_x
        current_x = np.zeros_like(current_x)
        current_x[list(psi_x)] = bx[list(psi_x)]
        current_x = normalize(current_x)  # note, constrain current_x in [0, 1]

        logger.debug('function value: {:.5f}'.format(
            func.get_obj_val(current_x)))
        logger.debug('iteration time: {:.5f}'.format(time.time() - iter_time))
        logger.debug('iter projection time: {:.5f}'.format(iter_proj_time))
        logger.debug('acc projection time: {:.5f}'.format(
            acc_proj_time))  # accumulative projection time
        logger.debug('-' * 10)

        diff_norm_x = np.linalg.norm(current_x - prev_x)
        if diff_norm_x < epsilon:
            break

    run_time = time.time() - start_time
    logger.debug('final function value: {:.5f}'.format(
        func.get_obj_val(current_x)))
    logger.debug('run time of whole algorithm: {:.5f}'.format(run_time))
    logger.debug('accumulative projection time: {:.5f}'.format(acc_proj_time))

    return current_x