Exemplo n.º 1
0
def _local_gefura(G, groups, weight=None, normalized=True):
    gamma = dict.fromkeys(G, 0)
    # Make mapping node -> group.
    # This assumes that groups are disjoint.
    group_of = {n: group for group in groups for n in group}

    for s in G:
        if weight is None:
            S, P, sigma = _single_source_shortest_path_basic(G, s)
        else:
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)

        # Accumulation
        delta = dict.fromkeys(G, 0)
        while S:
            w = S.pop()
            different_groups = group_of[s] != group_of[w]
            deltaw, sigmaw = delta[w], sigma[w]
            coeff = (1 + deltaw) / sigmaw if different_groups \
                else deltaw / sigmaw
            for v in P[w]:
                delta[v] += sigma[v] * coeff
            if w != s and not different_groups:
                gamma[w] += deltaw

    gamma = rescale_local(gamma, G, group_of, normalized)
    return gamma
Exemplo n.º 2
0
def global_gefura(G, groups, weight=None, normalized=True):
    """Determine global gefura measure of each node

    This function handles both weighted and unweighted networks, directed and
    undirected, and connected and unconnected.

    Arguments
    ---------
    G : a networkx.Graph
        the network

    groups : a list or iterable of sets
        Each set represents a group and contains 1 to N nodes

    weight : None or a string
        If None, the network is treated as unweighted. If a string, this is
        the edge data key corresponding to the edge weight

    normalized : True|False
        Whether or not to normalize the output to [0, 1].

    Examples
    --------
    >>> import networkx as nx
    >>> G = nx.path_graph(5)
    >>> groups = [{0, 2}, {1}, {3, 4}]
    >>> global_gefura(G, groups)
    {0: 0.0, 1: 0.5, 2: 0.8, 3: 0.6, 4: 0.0}

    """
    gamma = dict.fromkeys(G, 0)
    # Make mapping node -> group.
    # This assumes that groups are disjoint.
    group_of = {n: group for group in groups for n in group}

    for s in G:
        if weight is None:
            S, P, sigma = _single_source_shortest_path_basic(G, s)
        else:
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)

        # Accumulation
        delta = dict.fromkeys(G, 0)
        while S:
            w = S.pop()
            deltaw, sigmaw = delta[w], sigma[w]
            coeff = (1 + deltaw) / sigmaw if group_of[s] != group_of[w] \
                else deltaw / sigmaw
            for v in P[w]:
                delta[v] += sigma[v] * coeff
            if w != s:
                gamma[w] += deltaw

    gamma = rescale_global(gamma, G, groups, normalized)

    return gamma
Exemplo n.º 3
0
def _compare_bfs_spc(G, Gnx, source):
    df = cugraph.bfs(G, source, return_sp_counter=True)
    # This call should only contain 3 columns:
    # 'vertex', 'distance', 'predecessor', 'sp_counter'
    assert len(df.columns) == 4, (
        "The result of the BFS has an invalid " "number of columns"
    )
    _, _, nx_sp_counter = nxacb._single_source_shortest_path_basic(Gnx, source)
    sorted_nx = [nx_sp_counter[key] for key in sorted(nx_sp_counter.keys())]
    # We are not checking for distances / predecessors here as we assume
    # that these have been checked  in the _compare_bfs tests
    # We focus solely on shortest path counting

    # cugraph return a dataframe that should contain exactly one time each
    # vertex
    # We could us isin to filter only vertices that are common to both
    # But it would slow down the comparison, and in this specific case
    # nxacb._single_source_shortest_path_basic is a dictionary containing all
    # the vertices.
    # There is no guarantee when we get `df` that the vertices are sorted
    # thus we enforce the order so that we can leverage faster comparison after
    sorted_df = df.sort_values("vertex").rename(
        columns={"sp_counter": "cu_spc"}, copy=False
    )

    # This allows to detect vertices identifier that could have been
    # wrongly present multiple times
    cu_vertices = set(sorted_df['vertex'].values_host)
    nx_vertices = nx_sp_counter.keys()
    assert len(cu_vertices.intersection(nx_vertices)) == len(
        nx_vertices
    ), "There are missing vertices"

    # We add the nx shortest path counter in the cudf.DataFrame, both the
    # the DataFrame and `sorted_nx` are sorted base on vertices identifiers
    sorted_df["nx_spc"] = sorted_nx

    # We could use numpy.isclose or cupy.isclose, we can then get the entries
    # in the cudf.DataFrame where there are is a mismatch.
    # numpy / cupy allclose would get only a boolean and we might want the
    # extra information about the discrepancies
    shortest_path_counter_errors = sorted_df[
        ~cupy.isclose(
            sorted_df["cu_spc"], sorted_df["nx_spc"], rtol=DEFAULT_EPSILON
        )
    ]
    if len(shortest_path_counter_errors) > 0:
        print(shortest_path_counter_errors)
    assert len(shortest_path_counter_errors) == 0, (
        "Shortest path counters " "are too different"
    )
Exemplo n.º 4
0
def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph):

    dataset, directed, Gnx = small_dataset_nx_graph
    use_spc = True

    start_vertices = [start_vertex for start_vertex in Gnx]

    all_nx_values = []
    for start_vertex in start_vertices:
        _, _, nx_sp_counter = \
            nxacb._single_source_shortest_path_basic(Gnx, start_vertex)
        nx_values = nx_sp_counter
        all_nx_values.append(nx_values)

    return (dataset, directed, all_nx_values, start_vertices, use_spc)
Exemplo n.º 5
0
def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx):
    """
    Helper for fixtures returning Nx results and params.
    """
    random.seed(seed)
    start_vertex = random.sample(Gnx.nodes(), 1)[0]

    if use_spc:
        _, _, nx_sp_counter = \
            nxacb._single_source_shortest_path_basic(Gnx, start_vertex)
        nx_values = nx_sp_counter
    else:
        nx_values = nx.single_source_shortest_path_length(Gnx, start_vertex)

    return (dataset, directed, nx_values, start_vertex, use_spc)
Exemplo n.º 6
0
def stress_centrality(G,
                      k=None,
                      normalized=True,
                      weight=None,
                      endpoints=False,
                      seed=None):
    """ Compute stress centrality 

        We use the same BSF algorithm as for beteweeness centrality 
        used in networkx, but we change the accumulating phase
        in order to get only the number of shortests path

        see algorithm 12 in http://algo.uni-konstanz.de/publications/b-vspbc-08.pdf
    """
    stress = dict.fromkeys(G, 0.0)  # b[v]=0 for v in G
    if k is None:
        nodes = G
    else:
        random.seed(seed)
        nodes = random.sample(G.nodes(), k)
    for s in nodes:
        # single source shortest paths
        if weight is None:  # use BFS
            S, P, sigma = _single_source_shortest_path_basic(G, s)
        else:  # use Dijkstra's algorithm
            S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight)
        # accumulation
        if endpoints:
            stress = _accumulate_stress_endpoints(stress, S, P, sigma, s)
        else:
            stress = _accumulate_stress_basic(stress, S, P, sigma, s)
    # rescaling
    stress = _rescale(stress,
                      len(G),
                      normalized=normalized,
                      directed=G.is_directed(),
                      k=k)
    return stress
Exemplo n.º 7
0
def _group_preprocessing(G, set_v, weight):
    sigma = {}
    delta = {}
    D = {}
    betweenness = dict.fromkeys(G, 0)
    for s in G:
        if weight is None:  # use BFS
            S, P, sigma[s], D[s] = _single_source_shortest_path_basic(G, s)
        else:  # use Dijkstra's algorithm
            S, P, sigma[s], D[s] = _single_source_dijkstra_path_basic(G, s, weight)
        betweenness, delta[s] = _accumulate_endpoints(betweenness, S, P, sigma[s], s)
        for i in delta[s].keys():  # add the paths from s to i and rescale sigma
            if s != i:
                delta[s][i] += 1
            if weight is not None:
                sigma[s][i] = sigma[s][i] / 2
    # building the path betweenness matrix only for nodes that appear in the group
    PB = dict.fromkeys(G)
    for group_node1 in set_v:
        PB[group_node1] = dict.fromkeys(G, 0.0)
        for group_node2 in set_v:
            if group_node2 not in D[group_node1]:
                continue
            for node in G:
                # if node is connected to the two group nodes than continue
                if group_node2 in D[node] and group_node1 in D[node]:
                    if (
                        D[node][group_node2]
                        == D[node][group_node1] + D[group_node1][group_node2]
                    ):
                        PB[group_node1][group_node2] += (
                            delta[node][group_node2]
                            * sigma[node][group_node1]
                            * sigma[group_node1][group_node2]
                            / sigma[node][group_node2]
                        )
    return PB, sigma, D