Exemplo n.º 1
0
def test_rmm_modes(dtype, nelem, alloc, managed, pool):
    assert rmm.is_initialized()
    array_tester(dtype, nelem, alloc)

    rmm.reinitialize(pool_allocator=pool, managed_memory=managed)

    assert rmm.is_initialized()

    array_tester(dtype, nelem, alloc)
Exemplo n.º 2
0
def test_sssp(managed, pool, graph_file, source):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert(rmm.is_initialized())
    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_paths, max_val = cugraph_call(cu_M, source)
    nx_paths, Gnx = networkx_call(M, source)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if(cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + 1 = current dist (since unweighted)
            pred = cu_paths[vid][1]
            if(vid != source and cu_paths[pred][0] + 1 != cu_paths[vid][0]):
                err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0
Exemplo n.º 3
0
def test_strong_cc(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    netx_labels = networkx_strong_call(M)

    cu_M = utils.read_csv_file(graph_file)
    cugraph_labels = cugraph_strong_call(cu_M)

    # NetX returns a list of components, each component being a
    # collection (set{}) of vertex indices;
    #
    # while cugraph returns a component label for each vertex;

    nx_n_components = len(netx_labels)
    cg_n_components = get_n_uniqs(cugraph_labels)

    assert nx_n_components == cg_n_components

    lst_nx_components_lens = [len(c) for c in sorted(netx_labels, key=len)]

    # get counts of uniques:
    #
    lst_cg_components_lens = sorted(get_uniq_counts(cugraph_labels))

    assert lst_nx_components_lens == lst_cg_components_lens
Exemplo n.º 4
0
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    M = M.tocsr()
    Gnx = nx.DiGraph(M).to_undirected()
    G = cugraph.Graph()
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    values = cudf.Series(M.data)
    G.from_cudf_adjlist(row_offsets, col_indices, values)
    pairs = G.get_two_hop_neighbors()
    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs['first'][i], pairs['second'][i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs['first'], pairs['second'])
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df['jaccard_coeff'][i])
        assert diff < 1.0e-6
Exemplo n.º 5
0
def test_overlap_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    M = M.tocsr().sorted_indices()
    cu_M = utils.read_csv_file(graph_file)
    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    G = cugraph.Graph()
    G.add_adj_list(row_offsets, col_indices, None)
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M,
                            pairs['first'],
                            pairs['second'],
                            edgevals=True)
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])

    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6
Exemplo n.º 6
0
def test_pagerank(managed, pool, graph_file, max_iter, tol, alpha,
                  personalization_perc, has_guess):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())
    M = utils.read_csv_for_nx(graph_file)
    networkx_pr, networkx_prsn = networkx_call(M, max_iter, tol, alpha,
                                               personalization_perc)

    cu_nstart = None
    if has_guess == 1:
        cu_nstart = cudify(networkx_pr)
        max_iter = 5
    cu_prsn = cudify(networkx_prsn)
    cu_M = utils.read_csv_file(graph_file)
    cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart)

    # Calculating mismatch

    networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0])
    err = 0
    assert len(cugraph_pr) == len(networkx_pr)
    for i in range(len(cugraph_pr)):
        if (abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1
                and cugraph_pr[i][0] == networkx_pr[i][0]):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.01 * len(cugraph_pr))
Exemplo n.º 7
0
def test_symmetrize_unweighted(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed, pool_allocator=pool)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file + '.csv')

    sym_sources, sym_destinations = cugraph.symmetrize(cu_M['0'], cu_M['1'])

    #
    #  Check to see if all pairs in sources/destinations exist in
    #  both directions
    #
    #  Try this with join logic.  Note that if we create data frames
    #  we can join the data frames (using the DataFrame.merge function).
    #  The symmetrize function should contain every edge that was contained
    #  in the input data.  So if we join the input data with the output
    #  the length of the data frames should be equal.
    #
    sym_df = cudf.DataFrame()
    sym_df['src_s'] = sym_sources
    sym_df['dst_s'] = sym_destinations

    orig_df = cudf.DataFrame()
    orig_df['src'] = cu_M['0']
    orig_df['dst'] = cu_M['1']

    compare(orig_df['src'], orig_df['dst'], None, sym_df['src_s'],
            sym_df['dst_s'], None)
Exemplo n.º 8
0
def test_degrees_functionality(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2')

    Gnx = nx.DiGraph(M)

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if (df['in_degree'][i] != nx_in_degree[i]):
            err_in_degree = err_in_degree + 1
        if (df['out_degree'][i] != nx_out_degree[i]):
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0
Exemplo n.º 9
0
def test_to_undirected(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M['0'] <= cu_M['1']].reset_index(drop=True)
    M = utils.read_csv_for_nx(graph_file)
    M = M[M['0'] <= M['1']]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source='0', destination='1')
    DiGnx = nx.from_pandas_edgelist(M,
                                    source='0',
                                    target='1',
                                    create_using=nx.DiGraph())

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert (G.number_of_nodes() == Gnx.number_of_nodes())
    assert (G.number_of_edges() == Gnx.number_of_edges())

    edgelist_df = G.edgelist.edgelist_df

    for i in range(len(edgelist_df)):
        assert Gnx.has_edge(edgelist_df.iloc[i]['src'],
                            edgelist_df.iloc[i]['dst'])
Exemplo n.º 10
0
def test_edge_cut_clustering(managed, pool, graph_file, partitions):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    # Read in the graph and get a cugraph object
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
    '''row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)

    G_adj = cugraph.DiGraph()
    G_adj.from_cudf_adjlist(row_offsets, col_indices)'''
    G_edge = cugraph.DiGraph()
    G_edge.from_cudf_edgelist(cu_M, source='0', destination='1')

    # Get the edge_cut score for partitioning versus random assignment
    '''cu_vid, cu_score = cugraph_call(G_adj, partitions)
    rand_vid, rand_score = random_call(G_adj, partitions)
    '''
    # Assert that the partitioning has better edge_cut than the random
    # assignment
    '''assert cu_score < rand_score'''

    # Get the edge_cut score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_edge, partitions)
    rand_vid, rand_score = random_call(G_edge, partitions)

    # Assert that the partitioning has better edge_cut than the random
    # assignment
    print(cu_score, rand_score)
    assert cu_score < rand_score
Exemplo n.º 11
0
def test_modularity_clustering(managed, pool, graph_file, partitions):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    # Read in the graph and get a cugraph object
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
    sources = cu_M['0']
    destinations = cu_M['1']
    values = cu_M['2']
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, values)

    # Get the modularity score for partitioning versus random assignment
    cu_score = cugraph_call(G, partitions)
    rand_score = random_call(G, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score > rand_score
Exemplo n.º 12
0
def test_louvain_with_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)
    nx_parts = networkx_call(M)
    # Calculating modularity scores for comparison
    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.Graph())
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())
    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (.82 * nx_mod)
    assert abs(cu_mod - cu_mod_nx) < .0001
Exemplo n.º 13
0
def test_ecg_clustering(managed,
                        pool,
                        graph_file,
                        min_weight,
                        ensemble_size):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    # Read in the graph and get a cugraph object
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    # Get the modularity score for partitioning versus random assignment
    cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size)
    golden_score = golden_call(graph_file)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score > (.95 * golden_score)
Exemplo n.º 14
0
def test_louvain_with_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True)
    nx_parts = networkx_call(M)

    # Calculating modularity scores for comparison
    Gnx = nx.Graph(M)
    cu_map = {0: 0}
    for i in range(len(cu_parts)):
        cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i]
    assert set(nx_parts.keys()) == set(cu_map.keys())
    cu_mod_nx = community.modularity(cu_map, Gnx)
    nx_mod = community.modularity(nx_parts, Gnx)
    assert len(cu_parts) == len(nx_parts)
    assert cu_mod > (.82 * nx_mod)
    print(cu_mod)
    print(cu_mod_nx)
    print(nx_mod)
    assert abs(cu_mod - cu_mod_nx) < .0001
Exemplo n.º 15
0
def test_bfs(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    base_vid, base_dist = base_call(M, 0)
    cugraph_vid, cugraph_dist = cugraph_call(cu_M, 0)

    # Calculating mismatch

    # assert len(base_dist) == len(cugraph_dist)
    i = 0
    j = 0
    while i < len(cugraph_dist):
        if base_vid[i] == cugraph_vid[i]:
            assert base_dist[i] == cugraph_dist[i]
        else:
            j = j + 1
        i = i + 1
Exemplo n.º 16
0
def test_add_adj_list_to_edge_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)

    M = M.tocoo()
    sources_exp = cudf.Series(M.row)
    destinations_exp = cudf.Series(M.col)

    # cugraph add_adj_list to_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_adjlist(offsets, indices, None)
    edgelist = G.view_edge_list()
    sources_cu = np.array(edgelist['src'])
    destinations_cu = np.array(edgelist['dst'])
    assert compare_series(sources_cu, sources_exp)
    assert compare_series(destinations_cu, destinations_exp)
Exemplo n.º 17
0
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df['src'] = cudf.Series(M.row)
    df['dst'] = cudf.Series(M.col)

    M = M.tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets = cudf.Series(M.indptr)
    indices = cudf.Series(M.indices)

    # cugraph delete_adj_list delete_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(df, source='src', target='dst')
    G.delete_edge_list()
    with pytest.raises(Exception):
        G.view_adj_list()

    G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
    with pytest.raises(Exception):
        G.view_edge_list()
Exemplo n.º 18
0
def test_add_edge_list_to_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.finalize()
    rmm_config.use_managed_memory = managed
    rmm_config.use_pool_allocator = pool
    rmm_config.initial_pool_size = 2 << 27
    rmm.initialize()

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)
    sources = cu_M['0']
    destinations = cu_M['1']

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets_exp = M.indptr
    indices_exp = M.indices

    # cugraph add_egde_list to_adj_list call
    G = cugraph.Graph()
    G.add_edge_list(sources, destinations, None)
    offsets_cu, indices_cu, values_cu = G.view_adj_list()
    assert compare_offsets(offsets_cu, offsets_exp)
    assert compare_series(indices_cu, indices_exp)
    assert values_cu is None
Exemplo n.º 19
0
def test_woverlap(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    M = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                shape=(N, N))

    cu_M = utils.read_csv_file(graph_file)
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1')
    pairs = G.get_two_hop_neighbors()

    cu_coeff = cugraph_call(cu_M, pairs)
    cpu_coeff = cpu_call(M, pairs['first'], pairs['second'])
    assert len(cu_coeff) == len(cpu_coeff)
    for i in range(len(cu_coeff)):
        if np.isnan(cpu_coeff[i]):
            assert np.isnan(cu_coeff[i])
        elif np.isnan(cu_coeff[i]):
            assert cpu_coeff[i] == cu_coeff[i]
        else:
            diff = abs(cpu_coeff[i] - cu_coeff[i])
            assert diff < 1.0e-6
Exemplo n.º 20
0
def test_renumber_files(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    source_translated = cudf.Series([x + translate for x in sources])
    dest_translated = cudf.Series([x + translate for x in destinations])

    src, dst, numbering = cugraph.renumber(source_translated, dest_translated)

    for i in range(len(sources)):
        assert sources[i] == (numbering[src[i]] - translate)
        assert destinations[i] == (numbering[dst[i]] - translate)
Exemplo n.º 21
0
def test_sssp_edgevals(managed, pool, graph_file, source):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_paths, max_val = cugraph_call(cu_M, source, edgevals=True)
    nx_paths, Gnx = networkx_call(M, source, edgevals=True)

    # Calculating mismatch
    err = 0
    for vid in cu_paths:
        # Validate vertices that are reachable
        # NOTE : If distance type is float64 then cu_paths[vid][0]
        # should be compared against np.finfo(np.float64).max)
        if (cu_paths[vid][0] != max_val):
            if (cu_paths[vid][0] != nx_paths[vid]):
                err = err + 1
            # check pred dist + edge_weight = current dist
            if (vid != source):
                pred = cu_paths[vid][1]
                edge_weight = Gnx[pred][vid]['weight']
                if (cu_paths[pred][0] + edge_weight != cu_paths[vid][0]):
                    err = err + 1
        else:
            if (vid in nx_paths.keys()):
                err = err + 1

    assert err == 0
Exemplo n.º 22
0
def test_multi_column_unrenumbering(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    translate = 100
    cu_M = utils.read_csv_file(graph_file)
    cu_M['00'] = cu_M['0'] + translate
    cu_M['11'] = cu_M['1'] + translate

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, ['0', '00'], ['1', '11'])
    result_multi = cugraph.pagerank(G).sort_values(by='0').\
        reset_index(drop=True)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, '0', '1')
    result_single = cugraph.pagerank(G)

    result_exp = cudf.DataFrame()
    result_exp['0'] = result_single['vertex']
    result_exp['1'] = result_single['vertex'] + translate
    result_exp['pagerank'] = result_single['pagerank']

    assert result_multi.equals(result_exp)
Exemplo n.º 23
0
def test_jaccard_two_hop_edge_vals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    Gnx = nx.from_pandas_edgelist(M,
                                  source='0',
                                  target='1',
                                  edge_attr='weight',
                                  create_using=nx.Graph())
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    pairs = G.get_two_hop_neighbors()
    nx_pairs = []
    for i in range(len(pairs)):
        nx_pairs.append((pairs['first'][i], pairs['second'][i]))
    preds = nx.jaccard_coefficient(Gnx, nx_pairs)
    nx_coeff = []
    for u, v, p in preds:
        nx_coeff.append(p)
    df = cugraph.jaccard(G, pairs)
    df = df.sort_values(by=['source', 'destination'])
    assert len(nx_coeff) == len(df)
    for i in range(len(df)):
        diff = abs(nx_coeff[i] - df['jaccard_coeff'][i])
        assert diff < 1.0e-6
Exemplo n.º 24
0
def test_add_adj_list_to_edge_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                   shape=(N, N))

    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    Mcoo = Mcsr.tocoo()
    sources_exp = cudf.Series(Mcoo.row)
    destinations_exp = cudf.Series(Mcoo.col)

    # cugraph add_adj_list to_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_adjlist(offsets, indices, None)
    edgelist = G.view_edge_list()
    sources_cu = np.array(edgelist['src'])
    destinations_cu = np.array(edgelist['dst'])
    assert compare_series(sources_cu, sources_exp)
    assert compare_series(destinations_cu, destinations_exp)
Exemplo n.º 25
0
def test_jaccard_edgevals(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(
        managed_memory=managed,
        pool_allocator=pool,
        initial_pool_size=2 << 27
    )

    assert(rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)
    cu_src, cu_dst, cu_coeff = cugraph_call(cu_M, edgevals=True)
    nx_src, nx_dst, nx_coeff = networkx_call(M)

    # Calculating mismatch
    err = 0
    tol = 1.0e-06

    assert len(cu_coeff) == len(nx_coeff)
    for i in range(len(cu_coeff)):
        if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and
           cu_src[i] == nx_src[i] and cu_dst[i] == nx_dst[i]):
            err += 1

    print("Mismatches:  %d" % err)
    assert err == 0
Exemplo n.º 26
0
def test_delete_edge_list_delete_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    Mnx = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df['src'] = cudf.Series(Mnx['0'])
    df['dst'] = cudf.Series(Mnx['1'])

    N = max(max(Mnx['0']), max(Mnx['1'])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx['0'], Mnx['1'])),
                                   shape=(N, N))
    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    # cugraph delete_adj_list delete_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(df, source='src', destination='dst')
    G.delete_edge_list()
    with pytest.raises(Exception):
        G.view_adj_list()

    G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
    with pytest.raises(Exception):
        G.view_edge_list()
Exemplo n.º 27
0
def test_renumber_files_multi_col(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    sources = cudf.Series(M['0'])
    destinations = cudf.Series(M['1'])

    translate = 1000

    gdf = cudf.DataFrame()
    gdf['src_old'] = sources
    gdf['dst_old'] = destinations
    gdf['src'] = sources + translate
    gdf['dst'] = destinations + translate

    src, dst, numbering = cugraph.renumber_from_cudf(gdf, ['src', 'src_old'],
                                                     ['dst', 'dst_old'])

    for i in range(len(gdf)):
        assert sources[i] == (numbering['0'][src[i]] - translate)
        assert destinations[i] == (numbering['0'][dst[i]] - translate)
Exemplo n.º 28
0
def test_Graph_from_MultiGraph(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    # create dataframe for MultiGraph
    cu_M['3'] = cudf.Series([2.0] * len(cu_M), dtype=np.float32)
    cu_M['4'] = cudf.Series([3.0] * len(cu_M), dtype=np.float32)

    # initialize MultiGraph
    G_multi = cugraph.MultiGraph()
    G_multi.from_cudf_edgelist(cu_M,
                               source='0',
                               destination='1',
                               edge_attr=['2', '3', '4'])

    # initialize Graph
    G = cugraph.Graph()
    G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')

    # create Graph from MultiGraph
    G_from_multi = cugraph.Graph(G_multi, edge_attr='2')

    assert G.edgelist.edgelist_df == G_from_multi.edgelist.edgelist_df
Exemplo n.º 29
0
def test_add_edge_list_to_adj_list(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    cu_M = utils.read_csv_file(graph_file)

    M = utils.read_csv_for_nx(graph_file).tocsr()
    if M is None:
        raise TypeError('Could not read the input graph')
    if M.shape[0] != M.shape[1]:
        raise TypeError('Shape is not square')

    offsets_exp = M.indptr
    indices_exp = M.indices

    # cugraph add_egde_list to_adj_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1')
    offsets_cu, indices_cu, values_cu = G.view_adj_list()
    assert compare_offsets(offsets_cu, offsets_exp)
    assert compare_series(indices_cu, indices_exp)
    assert values_cu is None
Exemplo n.º 30
0
def test_bfs(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    base_vid, base_dist = base_call(M, 0)
    cugraph_vid, cugraph_dist = cugraph_call(cu_M, 0)

    # Calculating mismatch
    # Currently, vertex order mismatch is not considered as an error
    cugraph_idx = 0
    base_idx = 0
    distance_error_counter = 0
    while cugraph_idx < len(cugraph_dist):
        if base_vid[base_idx] == cugraph_vid[cugraph_idx]:
            # An error is detected when for the same vertex
            # the distances are different
            if base_dist[base_idx] != cugraph_dist[cugraph_idx]:
                distance_error_counter += 1
            cugraph_idx += 1
        base_idx += 1
    assert distance_error_counter == 0