Exemplo n.º 1
0
def get_approx_boundary(G, query_nodes, target_nodes, n_edges, start_dist):
    """
    Used to calculate an approximation guarantee for greedy algorithm
    """
    
    H = G.copy() # GET A COPY OF THE GRAPH
    query_set_size = len(query_nodes) 
    target_set_size = len(target_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    
    candidates = list(product(query_nodes, target_nodes))
    # ALL minus exitsting in G
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    
    # CALCULATE MARGINAL GAIN TO EMPTY SET FOR ALL NODES IN STEEPNESS FUNCTION
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums_empty = start_dist.dot(F.sum(axis=1))[0,0] # F(\emptyset)
    # candidates = list(product(query_nodes, target_nodes))
    ac_marginal_empty   = []
    ac_marginal_full    = []
    source_idx_empty = []
    node_processed = -1
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_processed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_processed = source_node           
        F_updated = update_fundamental_mat(F, H, map_query_to_org, source_node)
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        ac_marginal_empty.append(abs_cen)
        source_idx_empty.append(source_node)
        
    sorted_indexes_empty = [i[0] for i in sorted(enumerate(source_idx_empty), key=lambda x:x[1])]
    ac_marginal_empty = [ac_marginal_empty[i] for i in sorted_indexes_empty]   
    # CALCULATE MARGINAL GAIN FOR FULL SET

    H.add_edges_from(eligible)
    P_all = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs_all = P_all[list(query_nodes),:][:,list(query_nodes)]
    F_all = compute_fundamental(P_abs_all)
    
    row_sums_all = start_dist.dot(F_all.sum(axis=1))[0,0]
    node_prcessed   = -1
    source_idx = []
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_prcessed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_prcessed = source_node
        F_all_updated = update_rev_fundamental_mat(F_all, H, map_query_to_org, source_node)
        abs_cen   = start_dist.dot(F_all_updated.sum(axis = 1))[0,0]
        ac_marginal_full.append(abs_cen)
        source_idx.append(source_node)   
    
    sorted_indexes = [i[0] for i in sorted(enumerate(source_idx), key=lambda x:x[1])]
    ac_marginal_full = [ac_marginal_full[i] for i in sorted_indexes]
    
    assert sorted_indexes == sorted_indexes_empty , "Something is wrong with the way scores are appended"
    
    all_steepness = (asarray(ac_marginal_full) - row_sums_all) / (row_sums_empty-asarray(ac_marginal_empty))
    s = min(all_steepness)
    node_max = argmin(all_steepness)
    return 1-s, sorted_indexes[node_max]
Exemplo n.º 2
0
def reverse_greedy(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a set of links with a reverse greedy descent algorithm that reduce the 
    absorbing RW centrality between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. The query and target set 
    must be a 'viable' partition of the graph.
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    P : Scipy matrix
        The transition matrix of the graph G
    F : Scipy matrix
        The fundamental matrix for the graph G with the given set of absorbing
        random walk nodes
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    """
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    H.add_edges_from(eligible)
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum(axis=1))[0,0]
    # candidates = list(product(query_nodes, target_nodes))
    worst_F = zeros(F.shape)
    worst_set = []
    optimal_set = []
    ac_scores = []
#     ac_scores.append(row_sums)
    
    while len(eligible) > 0:
        round_min       = -1
        worst_link      = (-1,-1)
        node_prcessed   = -1
        for out_edge in eligible:
            source_node = out_edge[0]
            if(node_prcessed == source_node):
                # skip updating matrix because this updates the F matrix in the same way
                continue
            node_prcessed = source_node
            F_updated = update_rev_fundamental_mat(F, H, map_query_to_org, source_node)
            abs_cen   = start_dist.dot(F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                worst_link  = out_edge
                round_min   = abs_cen
                worst_F     = F_updated
        F = worst_F
        H.remove_edge(*worst_link)
        worst_set.append(worst_link) 
        eligible.remove(worst_link)
        if (len(eligible) <= n_edges):           
            ac_scores.append(round_min)
            optimal_set.append(worst_link)
        
    return list(reversed(optimal_set)), list(reversed(ac_scores))