def _calc_bc_subset_fixed(G, Gnx, normalized, weight, k, seed, result_dtype): assert isinstance(k, int), ("This test is meant for verifying coherence " "when k is given as an int") # In the fixed set we compare cu_bc against itself as we random.seed(seed) # on the same seed and then sample on the number of vertices themselves if seed is None: seed = 123 # random.seed(None) uses time, but we want same sources random.seed(seed) # It will be called again in cugraph's call sources = random.sample(range(G.number_of_vertices()), k) # The first call is going to proceed to the random sampling in the same # fashion as the lines above df = cugraph.edge_betweenness_centrality( G, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ) # The second call is going to process source that were already sampled # We set seed to None as k : int, seed : not none should not be normal # behavior df2 = cugraph.edge_betweenness_centrality( G, k=sources, normalized=normalized, weight=weight, seed=None, result_dtype=result_dtype, ) sorted_df = df.sort_values(["src", "dst" ]).rename(columns={ "betweenness_centrality": "cu_bc" }, copy=False).reset_index(drop=True) sorted_df2 = df2.sort_values(["src", "dst" ]).rename(columns={ "betweenness_centrality": "ref_bc" }, copy=False).reset_index(drop=True) sorted_df = cudf.concat([sorted_df, sorted_df2["ref_bc"]], axis=1, sort=False) return sorted_df
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype): df = cugraph.edge_betweenness_centrality( G, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ) assert (df["betweenness_centrality"].dtype == result_dtype ), "'betweenness_centrality' column has not the expected type" nx_bc_dict = nx.edge_betweenness_centrality(Gnx, k=k, normalized=normalized, seed=seed, weight=weight) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(columns={ "betweenness_centrality": "ref_bc" }, copy=False) merged_df = df.merge(nx_df, on=['src', 'dst']).rename(columns={ "betweenness_centrality": "cu_bc" }, copy=False).reset_index(drop=True) return merged_df
def _calc_bc_full(G, Gnx, normalized, weight, k, seed, result_dtype): df = cugraph.edge_betweenness_centrality( G, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ) assert (df["betweenness_centrality"].dtype == result_dtype ), "'betweenness_centrality' column has not the expected type" nx_bc_dict = nx.edge_betweenness_centrality(Gnx, k=k, normalized=normalized, seed=seed, weight=weight) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(columns={ "betweenness_centrality": "ref_bc" }, copy=False) sorted_df = df.sort_values(["src", "dst" ]).rename(columns={ "betweenness_centrality": "cu_bc" }, copy=False).reset_index(drop=True) sorted_df = cudf.concat([sorted_df, nx_df["ref_bc"]], axis=1, sort=False) return sorted_df
def test_edge_betweenness_centrality_nx( graph_file, directed, edgevals ): prepare_test() Gnx = utils.generate_nx_graph_from_file(graph_file, directed, edgevals) nx_bc = nx.edge_betweenness_centrality(Gnx) cu_bc = cugraph.edge_betweenness_centrality(Gnx) # Calculating mismatch networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) err = 0 assert len(cugraph_bc) == len(networkx_bc) for i in range(len(cugraph_bc)): if ( abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 and cugraph_bc[i][0] == networkx_bc[i][0] ): err = err + 1 print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc))
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype): # NOTE: Networkx API does not allow passing a list of vertices # And the sampling is operated on Gnx.nodes() directly # We first mimic acquisition of the nodes to compare with same sources random.seed(seed) # It will be called again in nx's call sources = random.sample(Gnx.nodes(), k) # NOTE: Since we sampled the Networkx graph, the sources are already # external ids, so we don't need to translate to external ids for # cugraph df = cugraph.edge_betweenness_centrality( G, k=sources, normalized=normalized, weight=weight, result_dtype=result_dtype, ) nx_bc_dict = nx.edge_betweenness_centrality( Gnx, k=k, normalized=normalized, weight=weight, seed=seed ) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename( columns={"betweenness_centrality": "ref_bc"}, copy=False ) merged_df = df.merge(nx_df, on=['src', 'dst']).rename( columns={"betweenness_centrality": "cu_bc"}, copy=False ).reset_index(drop=True) return merged_df
def _calc_bc_subset_fixed(G, Gnx, normalized, weight, k, seed, result_dtype): assert isinstance(k, int), ( "This test is meant for verifying coherence " "when k is given as an int" ) # In the fixed set we compare cu_bc against itself as we random.seed(seed) # on the same seed and then sample on the number of vertices themselves if seed is None: seed = 123 # random.seed(None) uses time, but we want same sources random.seed(seed) # It will be called again in cugraph's call sources = random.sample(range(G.number_of_vertices()), k) if G.renumbered: sources_df = cudf.DataFrame({'src': sources}) sources = G.unrenumber(sources_df, 'src')['src'].to_pandas().tolist() # The first call is going to proceed to the random sampling in the same # fashion as the lines above df = cugraph.edge_betweenness_centrality( G, k=k, normalized=normalized, weight=weight, seed=seed, result_dtype=result_dtype, ).rename( columns={"betweenness_centrality": "cu_bc"}, copy=False ) # The second call is going to process source that were already sampled # We set seed to None as k : int, seed : not none should not be normal # behavior df2 = cugraph.edge_betweenness_centrality( G, k=sources, normalized=normalized, weight=weight, seed=None, result_dtype=result_dtype, ).rename( columns={"betweenness_centrality": "ref_bc"}, copy=False ).reset_index(drop=True) merged_df = df.merge(df2, on=['src', 'dst']).reset_index(drop=True) return merged_df
def _calc_bc_subset(G, Gnx, normalized, weight, k, seed, result_dtype): # NOTE: Networkx API does not allow passing a list of vertices # And the sampling is operated on Gnx.nodes() directly # We first mimic acquisition of the nodes to compare with same sources random.seed(seed) # It will be called again in nx's call sources = random.sample(Gnx.nodes(), k) df = cugraph.edge_betweenness_centrality( G, k=sources, normalized=normalized, weight=weight, result_dtype=result_dtype, ) nx_bc_dict = nx.edge_betweenness_centrality(Gnx, k=k, normalized=normalized, weight=weight, seed=seed) nx_df = generate_nx_result(nx_bc_dict, type(Gnx) is nx.DiGraph).rename(columns={ "betweenness_centrality": "ref_bc" }, copy=False) sorted_df = df.sort_values(["src", "dst" ]).rename(columns={ "betweenness_centrality": "cu_bc" }, copy=False).reset_index(drop=True) sorted_df = cudf.concat([sorted_df, nx_df["ref_bc"]], axis=1, sort=False) return sorted_df