def generate_rand_graph(n, connect_more=False, complete=False, add_self_loop=False): if complete: cord = [(i, j) for i, j in itertools.product(range(n), range(n)) if i != j] row = [t[0] for t in cord] col = [t[1] for t in cord] data = np.ones((len(row),)) arr = sp.sparse.coo_matrix((data, (row, col)), shape=(n, n)) else: arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64) # having one node to connect to all other nodes. if connect_more: arr[0] = 1 arr[:, 0] = 1 if add_self_loop: g = dgl.DGLGraphStale(arr, readonly=False) nodes = np.arange(g.number_of_nodes()) g.add_edges(nodes, nodes) g.readonly() else: g = dgl.DGLGraphStale(arr, readonly=True) g.ndata['h1'] = F.randn((g.number_of_nodes(), 10)) g.edata['h2'] = F.randn((g.number_of_edges(), 3)) return g
def test_random_walk_with_restart(): edge_list = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 3), (3, 2), (2, 1), (1, 0)] seeds = [0, 1] max_nodes = 10 g = dgl.DGLGraphStale(edge_list) # test normal RWR traces = dgl.contrib.sampling.random_walk_with_restart( g, seeds, 0.2, max_nodes) assert len(traces) == len(seeds) for traces_per_seed in traces: total_nodes = 0 for t in traces_per_seed: total_nodes += len(t) trace_diff = np.diff(F.zerocopy_to_numpy(t), axis=-1) assert (np.abs(trace_diff) == 1).all() assert total_nodes >= max_nodes # test RWR with early stopping traces = dgl.contrib.sampling.random_walk_with_restart( g, seeds, 1, 100, max_nodes, 1) assert len(traces) == len(seeds) for traces_per_seed in traces: assert sum(len(t) for t in traces_per_seed) < 100 # test bipartite RWR traces = dgl.contrib.sampling.bipartite_single_sided_random_walk_with_restart( g, seeds, 0.2, max_nodes) assert len(traces) == len(seeds) for traces_per_seed in traces: for t in traces_per_seed: trace_diff = np.diff(F.zerocopy_to_numpy(t), axis=-1) assert (trace_diff % 2 == 0).all()
def test_nonuniform_neighbor_sampler(): # Construct a graph with # (1) A path (0, 1, ..., 99) with weight 1 # (2) A bunch of random edges with weight 0. edges = [] for i in range(99): edges.append((i, i + 1)) for i in range(1000): edge = (np.random.randint(100), np.random.randint(100)) if edge not in edges: edges.append(edge) src, dst = zip(*edges) g = dgl.DGLGraphStale() g.add_nodes(100) g.add_edges(src, dst) g.readonly() g.edata['w'] = F.cat([ F.ones((99, ), F.float64, F.cpu()), F.zeros((len(edges) - 99, ), F.float64, F.cpu()) ], 0) # Test 1-neighbor NodeFlow with 99 as target node. # The generated NodeFlow should only contain node i on layer i. sampler = dgl.contrib.sampling.NeighborSampler(g, 1, 1, 99, 'in', transition_prob='w', seed_nodes=[99]) nf = next(iter(sampler)) assert nf.num_layers == 100 for i in range(nf.num_layers): assert nf.layer_size(i) == 1 assert F.asnumpy(nf.layer_parent_nid(i)[0]) == i # Test the reverse direction sampler = dgl.contrib.sampling.NeighborSampler(g, 1, 1, 99, 'out', transition_prob='w', seed_nodes=[0]) nf = next(iter(sampler)) assert nf.num_layers == 100 for i in range(nf.num_layers): assert nf.layer_size(i) == 1 assert F.asnumpy(nf.layer_parent_nid(i)[0]) == 99 - i
def test_random_walk(): edge_list = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 3), (3, 2), (2, 1), (1, 0)] seeds = [0, 1] n_traces = 3 n_hops = 4 g = dgl.DGLGraphStale(edge_list, readonly=True) traces = dgl.contrib.sampling.random_walk(g, seeds, n_traces, n_hops) traces = F.zerocopy_to_numpy(traces) assert traces.shape == (len(seeds), n_traces, n_hops + 1) for i, seed in enumerate(seeds): assert (traces[i, :, 0] == seeds[i]).all() trace_diff = np.diff(traces, axis=-1) # only nodes with adjacent IDs are connected assert (np.abs(trace_diff) == 1).all()
##cora dataset have 2708 nodes, 1208 of them is used as train set 1000 of them is used as test set data = citegrh.load_cora() adj = nx.adjacency_matrix(data.graph) # reorder n_nodes = 2708 ids_shuffle = np.arange(n_nodes) np.random.shuffle(ids_shuffle) adj = adj[ids_shuffle, :][:, ids_shuffle] data.features = data.features[ids_shuffle] data.labels = data.labels[ids_shuffle] ##train-test split train_nodes = np.arange(1208) test_nodes = np.arange(1708, 2708) train_adj = adj[train_nodes, :][:, train_nodes] test_adj = adj[test_nodes, :][:, test_nodes] trainG = dgl.DGLGraphStale(train_adj) allG = dgl.DGLGraphStale(adj) h = torch.tensor(data.features[train_nodes], dtype=torch.float32) test_h = torch.tensor(data.features[test_nodes], dtype=torch.float32) all_h = torch.tensor(data.features, dtype=torch.float32) train_nodes = torch.tensor(train_nodes) test_nodes = torch.tensor(test_nodes) y_train = torch.tensor(data.labels[train_nodes]) y_test = torch.tensor(data.labels[test_nodes]) input_size = h.shape[1] output_size = data.num_labels ##configuration config = { 'n_epoch': 300, 'lamb': 0.5,
def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) return dgl.DGLGraphStale(arr, readonly=True)