def test_networkx_compatibility(graph_file): gc.collect() # test from_cudf_edgelist() M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() df["source"] = pd.Series(M["0"]) df["target"] = pd.Series(M["1"]) df["weight"] = pd.Series(M.weight) gdf = cudf.from_pandas(df) Gnx = nx.from_pandas_edgelist( df, source="source", target="target", edge_attr="weight", create_using=nx.DiGraph, ) G = cugraph.from_cudf_edgelist( gdf, source="source", destination="target", edge_attr="weight", create_using=cugraph.DiGraph, ) print('g from gdf = \n', gdf) print('nx from df = \n', df) t1 = time.time() assert compare_graphs(Gnx, G) t2 = time.time() - t1 print('compare_graphs time: ', t2) Gnx.clear() G.clear() Gnx = nx.from_pandas_edgelist(df, source="source", target="target", create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist( gdf, source="source", destination="target", create_using=cugraph.DiGraph, ) t1 = time.time() assert compare_graphs(Gnx, G) t2 = time.time() - t1 print('compare_graphs time: ', t2) Gnx.clear() G.clear()
def test_networkx_compatibility(managed, pool, graph_file): gc.collect() rmm.finalize() rmm_config.use_managed_memory = managed rmm_config.use_pool_allocator = pool rmm_config.initial_pool_size = 2 << 27 rmm.initialize() assert (rmm.is_initialized()) # test from_cudf_edgelist() M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() df['source'] = pd.Series(M.row) df['target'] = pd.Series(M.col) df['weight'] = pd.Series(M.data) gdf = cudf.from_pandas(df) # cugraph.Graph() is implicitly a directed graph right at this moment, so # we should use nx.DiGraph() for comparison. Gnx = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr=['weight'], create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', target='target', weight='weight') assert compare_graphs(Gnx, G) Gnx.clear() G.clear() # cugraph.Graph() is implicitly a directed graph right at this moment, so # we should use nx.DiGraph() for comparison. Gnx = nx.from_pandas_edgelist(df, source='source', target='target', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', target='target') assert compare_graphs(Gnx, G) Gnx.clear() G.clear()
def test_consolidation(graph_file): gc.collect() cluster = LocalCUDACluster() client = Client(cluster) chunksize = dcg.get_chunksize(graph_file) M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() df['source'] = pd.Series(M['0']) df['target'] = pd.Series(M['1']) ddf = dask_cudf.read_csv(graph_file, chunksize=chunksize, delimiter=' ', names=['source', 'target', 'weight'], dtype=['int32', 'int32', 'float32'], header=None) Gnx = nx.from_pandas_edgelist(df, source='source', target='target', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(ddf, source='source', destination='target', create_using=cugraph.DiGraph) assert compare_graphs(Gnx, G) Gnx.clear() G.clear() client.close() cluster.close()
def test_networkx_compatibility(graph_file): # test to make sure cuGraph and Nx build similar Graphs gc.collect() # Read in the graph M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) # create a NetworkX DiGraph nxG = nx.from_pandas_edgelist(M, source="0", target="1", edge_attr="weight", create_using=nx.DiGraph()) # create a cuGraph DiGraph gdf = cudf.from_pandas(M) gdf = gdf.rename(columns={"weight": "weights"}) cuG = cugraph.from_cudf_edgelist( gdf, source="0", destination="1", edge_attr="weights", create_using=cugraph.DiGraph, ) _compare_graphs(nxG, cuG)
def test_edges_for_Graph(graph_file): cu_M = utils.read_csv_file(graph_file) # Create nx Graph pdf = cu_M.to_pandas()[['0', '1']] nx_graph = nx.from_pandas_edgelist(pdf, source='0', target='1', create_using=nx.Graph) nx_edges = nx_graph.edges() # Create Cugraph Graph from DataFrame # Force it to use renumber_from_cudf G = cugraph.from_cudf_edgelist(cu_M, source=['0'], destination=['1'], create_using=cugraph.Graph) cu_edge_list = G.edges() # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) assert nx_graph.number_of_edges() == G.number_of_edges() # Compare nx and cugraph edges when viewing edgelist edges = [] for edge in nx_edges: if edge[0] > edge[1]: edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) nx_edge_list = cudf.DataFrame(list(edges), columns=['src', 'dst']) assert_eq( nx_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), cu_edge_list.sort_values(by=['src', 'dst']).reset_index(drop=True), check_dtype=False )
def to_cugraph(edge_index: Tensor, edge_weight: Optional[Tensor] = None, relabel_nodes: bool = True): r"""Converts a graph given by :obj:`edge_index` and optional :obj:`edge_weight` into a :obj:`cugraph` graph object. Args: relabel_nodes (bool, optional): If set to :obj:`True`, :obj:`cugraph` will remove any isolated nodes, leading to a relabeling of nodes. (default: :obj:`True`) """ import cudf import cugraph df = cudf.from_dlpack(to_dlpack(edge_index.t())) if edge_weight is not None: assert edge_weight.dim() == 1 df[2] = cudf.from_dlpack(to_dlpack(edge_weight)) return cugraph.from_cudf_edgelist( df, source=0, destination=1, edge_attr=2 if edge_weight is not None else None, renumber=relabel_nodes)
def test_networkx_compatibility(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) # test from_cudf_edgelist() M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() df['source'] = pd.Series(M['0']) df['target'] = pd.Series(M['1']) df['weight'] = pd.Series(M.weight) gdf = cudf.from_pandas(df) Gnx = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr='weight', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', destination='target', edge_attr='weight', create_using=cugraph.DiGraph) assert compare_graphs(Gnx, G) Gnx.clear() G.clear() Gnx = nx.from_pandas_edgelist(df, source='source', target='target', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', destination='target', create_using=cugraph.DiGraph) assert compare_graphs(Gnx, G) Gnx.clear() G.clear()
def nx2cu(G): import cugraph, cudf edges = [(int(a), int(b)) for a, b in [*G.edges]] edgelistDF = cudf.DataFrame(edges, columns=['src', 'dst']) Gcu = cugraph.from_cudf_edgelist(edgelistDF, source='src', destination='dst', renumber=True) return (Gcu)
def test_networkx_compatibility(graph_file): gc.collect() # test from_cudf_edgelist() M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() df['source'] = pd.Series(M['0']) df['target'] = pd.Series(M['1']) df['weight'] = pd.Series(M.weight) gdf = cudf.from_pandas(df) Gnx = nx.from_pandas_edgelist(df, source='source', target='target', edge_attr='weight', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', destination='target', edge_attr='weight', create_using=cugraph.DiGraph) assert compare_graphs(Gnx, G) Gnx.clear() G.clear() Gnx = nx.from_pandas_edgelist(df, source='source', target='target', create_using=nx.DiGraph) G = cugraph.from_cudf_edgelist(gdf, source='source', destination='target', create_using=cugraph.DiGraph) assert compare_graphs(Gnx, G) Gnx.clear() G.clear()
def test_view_edge_list_for_Graph(managed, pool, graph_file): gc.collect() rmm.reinitialize(managed_memory=managed, pool_allocator=pool, initial_pool_size=2 << 27) assert (rmm.is_initialized()) cu_M = utils.read_csv_file(graph_file) # Create nx Graph pdf = cu_M.to_pandas()[['0', '1']] nx_graph = nx.from_pandas_edgelist(pdf, source='0', target='1', create_using=nx.Graph) nx_edges = nx_graph.edges() # Create Cugraph Graph from DataFrame G = cugraph.from_cudf_edgelist(cu_M, source='0', destination='1', create_using=cugraph.Graph) cu_edge_list = G.view_edge_list() # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) assert nx_graph.number_of_edges() == G.number_of_edges() # Get edges as upper triangle of matrix edges = [] for edge in nx_edges: if edge[0] > edge[1]: edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) edges = list(edges) edges.sort() nx_edge_list = cudf.DataFrame(edges, columns=['src', 'dst']) # Compare nx and cugraph edges when viewing edgelist assert cu_edge_list.equals(nx_edge_list)
def createGraph(csvFileName, graphType=None): """ Helper function to create a Graph or DiGraph based on csvFileName. """ if graphType is None: # There's potential value in verifying that a DiGraph can be created # from a undirected dataset, and a Graph from a directed. (For now?) do # not include those combinations to keep benchmark runtime and # complexity lower, and assume tests have coverage to verify # correctness for those combinations. if "/directed/" in csvFileName: graphType = cugraph.structure.graph_classes.DiGraph else: graphType = cugraph.structure.graph_classes.Graph return cugraph.from_cudf_edgelist(utils.read_csv_file(csvFileName), source="0", destination="1", create_using=graphType, renumber=True)
def test_view_edge_list_for_Graph(graph_file): gc.collect() cu_M = utils.read_csv_file(graph_file) # Create nx Graph pdf = cu_M.to_pandas()[["0", "1"]] nx_graph = nx.from_pandas_edgelist(pdf, source="0", target="1", create_using=nx.Graph) nx_edges = nx_graph.edges() # Create Cugraph Graph from DataFrame G = cugraph.from_cudf_edgelist(cu_M, source="0", destination="1", create_using=cugraph.Graph) cu_edge_list = G.view_edge_list().sort_values(["src", "dst"]) # Check if number of Edges is same assert len(nx_edges) == len(cu_edge_list) assert nx_graph.number_of_edges() == G.number_of_edges() # Get edges as upper triangle of matrix edges = [] for edge in nx_edges: if edge[0] > edge[1]: edges.append([edge[1], edge[0]]) else: edges.append([edge[0], edge[1]]) edges = list(edges) edges.sort() nx_edge_list = cudf.DataFrame(edges, columns=["src", "dst"]) # Compare nx and cugraph edges when viewing edgelist # assert cu_edge_list.equals(nx_edge_list) assert (cu_edge_list["src"].to_array() == nx_edge_list["src"].to_array() ).all() assert (cu_edge_list["dst"].to_array() == nx_edge_list["dst"].to_array() ).all()
import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D read_file_starttime = datetime.datetime.now() XYZ_C = cd.read_csv('HW_AI/HW_Final/dataset/pos_50.csv', index_col = 0).to_numpy() XYZ_Edges = cd.read_csv('HW_AI/HW_Final/dataset/edges_50.csv', index_col = 0,dtype=['int32', 'int32', 'int32','float32','str']) read_file_endtime = datetime.datetime.now() print (read_file_endtime - read_file_starttime) graph_build_starttime = datetime.datetime.now() G = cg.Graph() G = cg.from_cudf_edgelist(XYZ_Edges, source = 'Source', destination = 'Target', edge_attr = 'Weight') graph_build_endtime = datetime.datetime.now() print (graph_build_endtime - graph_build_starttime) louvain_starttime = datetime.datetime.now() result, mod = cg.louvain(G) vertex = result['vertex'] partition = result['partition'] size = result['partition'].max() + 1 print('community', size) print('modularity', mod) vertex = cp.fromDlpack(vertex.to_dlpack()) partition = cp.fromDlpack(partition.to_dlpack()) vertex = cp.reshape(vertex, XYZ_C.shape[0]) labelRE = cp.reshape(partition, XYZ_C.shape[0])