def test_katz_centrality_multi_column(graph_file): gc.collect() cu_M = utils.read_csv_file(graph_file) cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) cu_M['src_1'] = cu_M['src_0'] + 1000 cu_M['dst_1'] = cu_M['dst_0'] + 1000 G1 = cugraph.DiGraph() G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], destination=["dst_0", "dst_1"]) G2 = cugraph.DiGraph() G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0") k_df_exp = cugraph.katz_centrality(G2, alpha=None, max_iter=1000) k_df_exp = k_df_exp.sort_values("vertex").reset_index(drop=True) nstart = cudf.DataFrame() nstart['vertex_0'] = k_df_exp['vertex'] nstart['vertex_1'] = nstart['vertex_0'] + 1000 nstart['values'] = k_df_exp['katz_centrality'] k_df_res = cugraph.katz_centrality(G1, nstart=nstart, alpha=None, max_iter=1000) k_df_res = k_df_res.sort_values("0_vertex").reset_index(drop=True) k_df_res.rename(columns={'0_vertex': 'vertex'}, inplace=True) top_res = topKVertices(k_df_res, "katz_centrality", 10) top_exp = topKVertices(k_df_exp, "katz_centrality", 10) assert top_res.equals(top_exp)
def test_katz_centrality_nx(graph_file): gc.collect() NM = utils.read_csv_for_nx(graph_file) Gnx = nx.from_pandas_edgelist(NM, create_using=nx.DiGraph(), source="0", target="1") G = cugraph.utilities.convert_from_nx(Gnx) largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree") largest_out_degree = largest_out_degree["out_degree"].iloc[0] katz_alpha = 1 / (largest_out_degree + 1) nk = nx.katz_centrality(Gnx, alpha=katz_alpha) ck = cugraph.katz_centrality(Gnx, alpha=None, max_iter=1000) # Calculating mismatch nk = sorted(nk.items(), key=lambda x: x[0]) ck = sorted(ck.items(), key=lambda x: x[0]) err = 0 assert len(ck) == len(nk) for i in range(len(ck)): if (abs(ck[i][1] - nk[i][1]) > 0.1 and ck[i][0] == nk[i][0]): err = err + 1 print("Mismatches:", err) assert err < (0.1 * len(ck))
def katz_centrality(G, as_dict=False): W = timestep_cache().W nodes_centrality = cugraph.katz_centrality(W) nodes = np.array(list(nodes_centrality.keys())) off_nodes = np.array(G.nodes) clusters = np.array(list(nodes_centrality.values())) s_clusters = ordered(off_nodes, nodes, clusters) # Need to sort the cluster cause the order is not keeped from louvain algo if as_dict: return dict(zip(off_nodes, s_clusters)) return s_clusters
def calc_katz(graph_file): M = utils.read_csv_file(graph_file) G = cugraph.Graph() G.add_edge_list(M['0'], M['1']) largest_out_degree = G.degrees().nlargest(n=1, columns='out_degree') largest_out_degree = largest_out_degree['out_degree'][0] katz_alpha = 1 / (largest_out_degree + 1) k = cugraph.katz_centrality(G, katz_alpha, max_iter=1000) NM = utils.read_csv_for_nx(graph_file) NM = NM.tocsr() Gnx = nx.DiGraph(NM) nk = nx.katz_centrality(Gnx, alpha=katz_alpha) pdf = pd.DataFrame(nk, index=[0]).T k['nx_katz'] = pdf[0] k = k.rename({'katz_centrality': 'cu_katz'}) return k
def calc_katz(graph_file): cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source='0', destination='1') largest_out_degree = G.degrees().nlargest(n=1, columns='out_degree') largest_out_degree = largest_out_degree['out_degree'].iloc[0] katz_alpha = 1/(largest_out_degree + 1) k_df = cugraph.katz_centrality(G, katz_alpha, max_iter=1000) NM = utils.read_csv_for_nx(graph_file) Gnx = nx.from_pandas_edgelist(NM, create_using=nx.DiGraph(), source='0', target='1') nk = nx.katz_centrality(Gnx, alpha=katz_alpha) pdf = [nk[k] for k in sorted(nk.keys())] k_df['nx_katz'] = pdf k_df = k_df.rename({'katz_centrality': 'cu_katz'}) return k_df
def calc_katz(graph_file): cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1") largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree") largest_out_degree = largest_out_degree["out_degree"].iloc[0] katz_alpha = 1 / (largest_out_degree + 1) k_df = cugraph.katz_centrality(G, alpha=None, max_iter=1000) k_df = k_df.sort_values("vertex").reset_index(drop=True) NM = utils.read_csv_for_nx(graph_file) Gnx = nx.from_pandas_edgelist( NM, create_using=nx.DiGraph(), source="0", target="1" ) nk = nx.katz_centrality(Gnx, alpha=katz_alpha) pdf = [nk[k] for k in sorted(nk.keys())] k_df["nx_katz"] = pdf k_df = k_df.rename(columns={"katz_centrality": "cu_katz"}, copy=False) return k_df
def katz(G, alpha=None): return cugraph.katz_centrality(G, alpha)