def _local_gefura(G, groups, weight=None, normalized=True): gamma = dict.fromkeys(G, 0) # Make mapping node -> group. # This assumes that groups are disjoint. group_of = {n: group for group in groups for n in group} for s in G: if weight is None: S, P, sigma = _single_source_shortest_path_basic(G, s) else: S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) # Accumulation delta = dict.fromkeys(G, 0) while S: w = S.pop() different_groups = group_of[s] != group_of[w] deltaw, sigmaw = delta[w], sigma[w] coeff = (1 + deltaw) / sigmaw if different_groups \ else deltaw / sigmaw for v in P[w]: delta[v] += sigma[v] * coeff if w != s and not different_groups: gamma[w] += deltaw gamma = rescale_local(gamma, G, group_of, normalized) return gamma
def global_gefura(G, groups, weight=None, normalized=True): """Determine global gefura measure of each node This function handles both weighted and unweighted networks, directed and undirected, and connected and unconnected. Arguments --------- G : a networkx.Graph the network groups : a list or iterable of sets Each set represents a group and contains 1 to N nodes weight : None or a string If None, the network is treated as unweighted. If a string, this is the edge data key corresponding to the edge weight normalized : True|False Whether or not to normalize the output to [0, 1]. Examples -------- >>> import networkx as nx >>> G = nx.path_graph(5) >>> groups = [{0, 2}, {1}, {3, 4}] >>> global_gefura(G, groups) {0: 0.0, 1: 0.5, 2: 0.8, 3: 0.6, 4: 0.0} """ gamma = dict.fromkeys(G, 0) # Make mapping node -> group. # This assumes that groups are disjoint. group_of = {n: group for group in groups for n in group} for s in G: if weight is None: S, P, sigma = _single_source_shortest_path_basic(G, s) else: S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) # Accumulation delta = dict.fromkeys(G, 0) while S: w = S.pop() deltaw, sigmaw = delta[w], sigma[w] coeff = (1 + deltaw) / sigmaw if group_of[s] != group_of[w] \ else deltaw / sigmaw for v in P[w]: delta[v] += sigma[v] * coeff if w != s: gamma[w] += deltaw gamma = rescale_global(gamma, G, groups, normalized) return gamma
def _compare_bfs_spc(G, Gnx, source): df = cugraph.bfs(G, source, return_sp_counter=True) # This call should only contain 3 columns: # 'vertex', 'distance', 'predecessor', 'sp_counter' assert len(df.columns) == 4, ( "The result of the BFS has an invalid " "number of columns" ) _, _, nx_sp_counter = nxacb._single_source_shortest_path_basic(Gnx, source) sorted_nx = [nx_sp_counter[key] for key in sorted(nx_sp_counter.keys())] # We are not checking for distances / predecessors here as we assume # that these have been checked in the _compare_bfs tests # We focus solely on shortest path counting # cugraph return a dataframe that should contain exactly one time each # vertex # We could us isin to filter only vertices that are common to both # But it would slow down the comparison, and in this specific case # nxacb._single_source_shortest_path_basic is a dictionary containing all # the vertices. # There is no guarantee when we get `df` that the vertices are sorted # thus we enforce the order so that we can leverage faster comparison after sorted_df = df.sort_values("vertex").rename( columns={"sp_counter": "cu_spc"}, copy=False ) # This allows to detect vertices identifier that could have been # wrongly present multiple times cu_vertices = set(sorted_df['vertex'].values_host) nx_vertices = nx_sp_counter.keys() assert len(cu_vertices.intersection(nx_vertices)) == len( nx_vertices ), "There are missing vertices" # We add the nx shortest path counter in the cudf.DataFrame, both the # the DataFrame and `sorted_nx` are sorted base on vertices identifiers sorted_df["nx_spc"] = sorted_nx # We could use numpy.isclose or cupy.isclose, we can then get the entries # in the cudf.DataFrame where there are is a mismatch. # numpy / cupy allclose would get only a boolean and we might want the # extra information about the discrepancies shortest_path_counter_errors = sorted_df[ ~cupy.isclose( sorted_df["cu_spc"], sorted_df["nx_spc"], rtol=DEFAULT_EPSILON ) ] if len(shortest_path_counter_errors) > 0: print(shortest_path_counter_errors) assert len(shortest_path_counter_errors) == 0, ( "Shortest path counters " "are too different" )
def dataset_nxresults_allstartvertices_spc(small_dataset_nx_graph): dataset, directed, Gnx = small_dataset_nx_graph use_spc = True start_vertices = [start_vertex for start_vertex in Gnx] all_nx_values = [] for start_vertex in start_vertices: _, _, nx_sp_counter = \ nxacb._single_source_shortest_path_basic(Gnx, start_vertex) nx_values = nx_sp_counter all_nx_values.append(nx_values) return (dataset, directed, all_nx_values, start_vertices, use_spc)
def get_nx_results_and_params(seed, use_spc, dataset, directed, Gnx): """ Helper for fixtures returning Nx results and params. """ random.seed(seed) start_vertex = random.sample(Gnx.nodes(), 1)[0] if use_spc: _, _, nx_sp_counter = \ nxacb._single_source_shortest_path_basic(Gnx, start_vertex) nx_values = nx_sp_counter else: nx_values = nx.single_source_shortest_path_length(Gnx, start_vertex) return (dataset, directed, nx_values, start_vertex, use_spc)
def stress_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): """ Compute stress centrality We use the same BSF algorithm as for beteweeness centrality used in networkx, but we change the accumulating phase in order to get only the number of shortests path see algorithm 12 in http://algo.uni-konstanz.de/publications/b-vspbc-08.pdf """ stress = dict.fromkeys(G, 0.0) # b[v]=0 for v in G if k is None: nodes = G else: random.seed(seed) nodes = random.sample(G.nodes(), k) for s in nodes: # single source shortest paths if weight is None: # use BFS S, P, sigma = _single_source_shortest_path_basic(G, s) else: # use Dijkstra's algorithm S, P, sigma = _single_source_dijkstra_path_basic(G, s, weight) # accumulation if endpoints: stress = _accumulate_stress_endpoints(stress, S, P, sigma, s) else: stress = _accumulate_stress_basic(stress, S, P, sigma, s) # rescaling stress = _rescale(stress, len(G), normalized=normalized, directed=G.is_directed(), k=k) return stress
def _group_preprocessing(G, set_v, weight): sigma = {} delta = {} D = {} betweenness = dict.fromkeys(G, 0) for s in G: if weight is None: # use BFS S, P, sigma[s], D[s] = _single_source_shortest_path_basic(G, s) else: # use Dijkstra's algorithm S, P, sigma[s], D[s] = _single_source_dijkstra_path_basic(G, s, weight) betweenness, delta[s] = _accumulate_endpoints(betweenness, S, P, sigma[s], s) for i in delta[s].keys(): # add the paths from s to i and rescale sigma if s != i: delta[s][i] += 1 if weight is not None: sigma[s][i] = sigma[s][i] / 2 # building the path betweenness matrix only for nodes that appear in the group PB = dict.fromkeys(G) for group_node1 in set_v: PB[group_node1] = dict.fromkeys(G, 0.0) for group_node2 in set_v: if group_node2 not in D[group_node1]: continue for node in G: # if node is connected to the two group nodes than continue if group_node2 in D[node] and group_node1 in D[node]: if ( D[node][group_node2] == D[node][group_node1] + D[group_node1][group_node2] ): PB[group_node1][group_node2] += ( delta[node][group_node2] * sigma[node][group_node1] * sigma[group_node1][group_node2] / sigma[node][group_node2] ) return PB, sigma, D