def sample_blocks(self, seeds): """Do neighbor sample Parameters ---------- seeds : Seed nodes Returns ------- tensor Seed nodes, also known as target nodes blocks Sampled subgraphs """ blocks = [] etypes = [] norms = [] ntypes = [] seeds = th.LongTensor(np.asarray(seeds)) cur = seeds for fanout in self.fanouts: frontier = self.sample_neighbors(self.g, cur, fanout, replace=True) etypes = self.g.edata[dgl.ETYPE][frontier.edata[dgl.EID]] norm = self.g.edata['norm'][frontier.edata[dgl.EID]] block = dgl.to_block(frontier, cur) block.srcdata[dgl.NTYPE] = self.g.ndata[dgl.NTYPE][block.srcdata[ dgl.NID]] block.edata['etype'] = etypes block.edata['norm'] = norm cur = block.srcdata[dgl.NID] blocks.insert(0, block) return seeds, blocks
def start_bipartite_etype_sample_client(rank, tmpdir, disable_shared_mem, fanout=3, nodes={}): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form='eid') letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_etype_neighbors(dist_graph, nodes, dgl.ETYPE, fanout) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? nodes = th.arange(g.number_of_nodes()) for l, layer in enumerate(self.layers): y = th.zeros(g.number_of_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) for start in tqdm.trange(0, len(nodes), batch_size): end = start + batch_size batch_nodes = nodes[start:end] block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes) input_nodes = block.srcdata[dgl.NID] h = x[input_nodes].to(device) h_dst = h[:block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[start:end] = h.cpu() x = y return y
def compact_and_copy(frontier, seeds): block = dgl.to_block(frontier, seeds) for col, data in frontier.edata.items(): if col == dgl.EID: continue block.edata[col] = data[block.edata[dgl.EID]] return block
def sample_blocks(self, seeds): # 在若干个二部图串联在一起的前提下(即blocks),思路是从左向右采样 # 最终得到若干个blocks,包含了batch_size个nodes(即seeds)若干个hops聚合操作所需要的全部nodes seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: if fanout is None: frontier = dgl.transform.in_subgraph(self.g, seeds) else: # sample_neighbors 可以对每一个种子的节点进行邻居采样并返回相应的子图,即frontier # replace=True 表示用采样后的邻居节点代替所有邻居节点 (具体的还是要查一下) frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=True) # # to_black操作是把将采样的子图转换为适合计算的二部图 # 这里特殊的地方在于block.srcdata中的id是包含了dstnodeid的 block = dgl.to_block(frontier, seeds) # 获取新图的源节点作为种子节点,为下一层作准备 # 即将本层的scrnode作为下一层的seeds来采样邻居节点 # 之所以是从 src 中获取种子节点,是因为采样操作相对于聚合操作来说是一个逆向操作 seeds = block.srcdata[dgl.NID] # 把这一层放在最前面 # 假设有两层(K=2),那么最后的blocks=[block1, block0], 序号表示loop blocks.insert(0, block) return blocks
def sample_blocks(self, seeds): # Based on the idea of bipartite graph, sampling will be performed # from the LHS to RHS, i.e. Seeds to their neighbors # Notice: Outmost neighbors, i.e. blocks[0].srcdata[dgl.NID], includes # all of the nodes that will be needed for k-hops aggregation of the # bathc_size seeds seeds = torch.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: if fanout is None: # This will be used during the inference # return the subgraph contains ALL 1-hop neighbors frontier = dgl.transform.in_subgraph(self.g, seeds) else: # sample_neighbors() samples 'fanout' neighbors of 'seeds' on 'g' # TODO: the meaning of replace=True needs to check frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=True) # to_block() converts 'frontier' to a bipartite graph with dst as 'seeds' # 'include_dst_in_src=True' means to include DST nodes in SRC nodes # Since DST nodes are included in SRC nodes, we could fetch # the DST node features from the SRC nodes features (this is why I wrote # such a notice at the begining of this method). block = dgl.to_block(frontier, seeds, include_dst_in_src=True) # assign the SRC of the current block as the DST of next block seeds = block.srcdata[dgl.NID] # store blocks with the stack structure # if there are two layers (K=2), then blocks = [block 1, block 0] # the block_id represents loops blocks.insert(0, block) return blocks
def test_gin_conv(): g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) ctx = F.ctx() gin_conv = nn.GINConv(lambda x: x, 'mean', 0.1) gin_conv.initialize(ctx=ctx) print(gin_conv) # test #1: basic feat = F.randn((g.number_of_nodes(), 5)) h = gin_conv(g, feat) assert h.shape == (20, 5) # test #2: bipartite g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) feat = (F.randn((100, 5)), F.randn((200, 5))) h = gin_conv(g, feat) return h.shape == (20, 5) g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = np.unique(g.edges()[1].asnumpy()) block = dgl.to_block(g, seed_nodes) feat = F.randn((block.number_of_src_nodes(), 5)) h = gin_conv(block, feat) assert h.shape == (block.number_of_dst_nodes(), 12)
def test_gat_conv(): ctx = F.ctx() g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) gat = nn.GATConv(10, 20, 5) # n_heads = 5 gat.initialize(ctx=ctx) print(gat) # test#1: basic feat = F.randn((20, 10)) h = gat(g, feat) assert h.shape == (20, 5, 20) # test#2: bipartite g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) gat = nn.GATConv((5, 10), 2, 4) gat.initialize(ctx=ctx) feat = (F.randn((100, 5)), F.randn((200, 10))) h = gat(g, feat) assert h.shape == (200, 4, 2) # test#3: block g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = np.unique(g.edges()[1].asnumpy()) block = dgl.to_block(g, seed_nodes) gat = nn.GATConv(5, 2, 4) gat.initialize(ctx=ctx) feat = F.randn((block.number_of_src_nodes(), 5)) h = gat(block, feat) assert h.shape == (block.number_of_dst_nodes(), 4, 2)
def test_pickling_heterograph(): # copied from test_heterograph.create_test_heterograph() plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows') plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game') wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game') develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) g.nodes['user'].data['u_h'] = F.randn((3, 4)) g.nodes['game'].data['g_h'] = F.randn((2, 5)) g.edges['plays'].data['p_h'] = F.randn((4, 6)) new_g = _reconstruct_pickle(g) _assert_is_identical_hetero(g, new_g) block = dgl.to_block(g, {'user': [1, 2], 'game': [0, 1], 'developer': []}) new_block = _reconstruct_pickle(block) _assert_is_identical_hetero(block, new_block) assert block.is_block assert new_block.is_block
def block_graph1(): g = dgl.heterograph({ ('user', 'plays', 'game') : ([0, 1, 2], [1, 1, 0]), ('user', 'likes', 'game') : ([1, 2, 3], [0, 0, 2]), ('store', 'sells', 'game') : ([0, 1, 1], [0, 1, 2]), }, device=F.cpu()) return dgl.to_block(g)
def compact_and_copy(frontier, seeds): # DGL provides dgl.to_block() to convert any frontier to a block block = dgl.to_block(frontier, seeds) for col, data in frontier.edata.items(): if col == dgl.EID: continue block.edata[col] = data[block.edata[dgl.EID]] return block
def sample_blocks(self, seeds): seeds = th.LongTensor(seeds) blocks = [] hist_blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout) hist_frontier = dgl.in_subgraph(self.g, seeds) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) hist_block = dgl.to_block(hist_frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) hist_blocks.insert(0, hist_block) return blocks, hist_blocks
def compact_and_copy(frontier, seeds): """Turn graph into block and copy edge data.""" block = dgl.to_block(frontier, seeds) for col, data in frontier.edata.items(): if col == dgl.EID: continue block.edata[col] = data[block.edata[dgl.EID]] return block
def sample_blocks(self, seed_edges): n_edges = len(seed_edges) seed_edges = th.LongTensor(np.asarray(seed_edges)) heads, tails = self.g.find_edges(seed_edges) if self.neg_share and n_edges % self.num_negs == 0: neg_tails = self.neg_sampler(n_edges) neg_tails = (neg_tails.view(-1, 1, self.num_negs).expand( n_edges // self.num_negs, self.num_negs, self.num_negs).flatten()) neg_heads = (heads.view(-1, 1).expand(n_edges, self.num_negs).flatten()) else: neg_tails = self.neg_sampler(self.num_negs * n_edges) neg_heads = (heads.view(-1, 1).expand(n_edges, self.num_negs).flatten()) # Maintain the correspondence between heads, tails and negative tails as two # graphs. # pos_graph contains the correspondence between each head and its positive tail. # neg_graph contains the correspondence between each head and its negative tails. # Both pos_graph and neg_graph are first constructed with the same node space as # the original graph. Then they are compacted together with dgl.compact_graphs. pos_graph = dgl.graph((heads, tails), num_nodes=self.g.number_of_nodes()) neg_graph = dgl.graph((neg_heads, neg_tails), num_nodes=self.g.number_of_nodes()) pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph]) # Obtain the node IDs being used in either pos_graph or neg_graph. Since they # are compacted together, pos_graph and neg_graph share the same compacted node # space. seeds = pos_graph.ndata[dgl.NID] blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=True) # Remove all edges between heads and tails, as well as heads and neg_tails. _, _, edge_ids = frontier.edge_ids( th.cat([heads, tails, neg_heads, neg_tails]), th.cat([tails, heads, neg_tails, neg_heads]), return_uv=True, ) frontier = dgl.remove_edges(frontier, edge_ids) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) # Pre-generate CSR format that it can be used in training directly block.in_degree(0) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) # Pre-generate CSR format that it can be used in training directly return pos_graph, neg_graph, blocks
def obtain_Bs(self, ids): ids = torch.LongTensor(np.asarray(ids)) B = [] for s in self.fanout: nf = sample_neighbors(self.g, nodes=ids, fanout=s, replace=True) # 返回采样后的图,节点不变,边仅保留采样到的 b = dgl.to_block(nf, ids) # 转为二部图,可以方便读取src和dst节点,将后一层节点作为dst ids = b.srcdata[dgl.NID] # 二部图源节点作为前一层的ids B.insert(0, b) # 插入到列表最前 return B
def construct_blocks(self, seeds, user_item_pairs_to_remove): blocks = [] users, items = user_item_pairs_to_remove # 采样就是根据卷积层数选取对应数量的邻居结点 # 涉及到双向图的处理 for i in range(self.num_layers): sampled_graph = dgl.in_subgraph(self.graph, seeds) sampled_eids = sampled_graph.edges[('user', 'watched', 'item')].data[dgl.EID] sampled_eids_rev = sampled_graph.edges[('item', 'watchedby', 'user')].data[dgl.EID] # 训练时要去掉用户和项目间的关联 _, _, edges_to_remove = sampled_graph.edge_ids(users, items, etype=('user', 'watched', 'item'), return_uv=True) _, _, edges_to_remove_rev = sampled_graph.edge_ids( items, users, etype=('item', 'watchedby', 'user'), return_uv=True) # sampled_with_edges_removed = dgl.remove_edges( # sampled_graph, # {('user', 'watched', 'item'): edges_to_remove, ('item', 'watchedby', 'user'): edges_to_remove_rev} # ) sampled_with_edges_removed = dgl.remove_edges( sampled_graph, edges_to_remove, ('user', 'watched', 'item')) sampled_with_edges_removed = dgl.remove_edges( sampled_with_edges_removed, edges_to_remove_rev, ('item', 'watchedby', 'user')) sampled_eids = sampled_eids[sampled_with_edges_removed.edges[( 'user', 'watched', 'item')].data[dgl.EID]] sampled_eids_rev = sampled_eids_rev[ sampled_with_edges_removed.edges[('item', 'watchedby', 'user')].data[dgl.EID]] # 创建子图块 block = dgl.to_block(sampled_with_edges_removed, seeds) blocks.insert(0, block) seeds = { 'user': block.srcnodes['user'].data[dgl.NID], 'item': block.srcnodes['item'].data[dgl.NID] } # 把评分复制过去 block.edges[('user', 'watched', 'item')].data['rating'] = \ self.graph.edges[('user', 'watched', 'item')].data['rating'][sampled_eids] block.edges[('item', 'watchedby', 'user')].data['rating'] = \ self.graph.edges[('item', 'watchedby', 'user')].data['rating'][sampled_eids_rev] return blocks
def sample(self, pairs): heads, tails, types = zip(*pairs) seeds, head_invmap = torch.unique(torch.LongTensor(heads), return_inverse=True) blocks = [] for fanout in reversed(self.num_fanouts): sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout) sampled_block = dgl.to_block(sampled_graph, seeds) seeds = sampled_block.srcdata[dgl.NID] blocks.insert(0, sampled_block) return blocks, torch.LongTensor(head_invmap), torch.LongTensor(tails), torch.LongTensor(types)
def sample_blocks(self, seeds): block_list = [] for sampler in self.sampler_list: frontier = sampler(seeds) # add self loop frontier = dgl.remove_self_loop(frontier) frontier.add_edges(torch.tensor(seeds), torch.tensor(seeds)) block = dgl.to_block(frontier, seeds) block_list.append(block) return seeds, block_list
def sample_blocks(self, g, seed_nodes, exclude_eids=None): output_nodes = seed_nodes blocks = [] for block_id in reversed(range(self.num_layers)): frontier = self.sample_frontier(block_id, g, seed_nodes) eid = frontier.edata[dgl.EID] block = dgl.to_block(frontier, seed_nodes) block.edata[dgl.EID] = eid seed_nodes = block.srcdata[dgl.NID] blocks.insert(0, block) return seed_nodes, output_nodes, blocks
def sample_blocks(self, seeds, fan_outs): seeds = torch.LongTensor(np.asarray(seeds)) blocks = [] for fan_out in fan_outs: frontier = dgl.sampling.sample_neighbors(self.g, seeds, fan_out, replace=True) block = dgl.to_block(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return [block.to(self.device) for block in blocks]
def sample_blocks(self, seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = self.sample_neighbors(self.g, seeds, fanout, replace=True) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks
def test_sage_conv(aggre_type): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) dst_dim = 5 if aggre_type != 'gcn' else 10 sage = nn.SAGEConv((10, dst_dim), 2, aggre_type) feat = (F.randn((100, 10)), F.randn((200, dst_dim))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 200 g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = th.unique(g.edges()[1]) block = dgl.to_block(g, seed_nodes) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((block.number_of_src_nodes(), 5)) sage = sage.to(ctx) h = sage(block, feat) assert h.shape[0] == block.number_of_dst_nodes() assert h.shape[-1] == 10 # Test the case for graphs without edges g = dgl.bipartite([], num_nodes=(5, 3)) sage = nn.SAGEConv((3, 3), 2, 'gcn') feat = (F.randn((5, 3)), F.randn((3, 3))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3 for aggre_type in ['mean', 'pool', 'lstm']: sage = nn.SAGEConv((3, 1), 2, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3
def sample_blocks(self, seeds): blocks = [] seeds = {self.category: th.tensor(seeds).long()} cur = seeds for fanout in self.fanouts: if fanout is None: frontier = dgl.in_subgraph(self.g, cur) else: frontier = dgl.sampling.sample_neighbors(self.g, cur, fanout) block = dgl.to_block(frontier, cur) cur = {} for ntype in block.srctypes: cur[ntype] = block.srcnodes[ntype].data[dgl.NID] blocks.insert(0, block) return seeds, blocks
def sample_blocks(self, seeds): blocks = [] etypes = [] norms = [] ntypes = [] seeds = th.tensor(seeds).long() cur = self.target_idx[seeds] for fanout in self.fanouts: if fanout is None or fanout == -1: frontier = dgl.in_subgraph(self.g, cur) else: frontier = dgl.sampling.sample_neighbors(self.g, cur, fanout) block = dgl.to_block(frontier, cur) gen_norm(block) cur = block.srcdata[dgl.NID] blocks.insert(0, block) return seeds, blocks
def start_bipartite_sample_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(tmpdir / 'test_sampling.json', rank) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert 'feat' in dist_graph.nodes['user'].data assert 'feat' in dist_graph.nodes['game'].data if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_neighbors(dist_graph, nodes, 3) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb
def sample_blocks(self, seeds): blocks, edges = [], [] seeds = torch.LongTensor(np.asarray(seeds)) for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=False) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) edge = frontier.edata[dgl.EID] # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) edges.insert(0, edge) return blocks, edges
def sample_block(self, seeds): blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. if fanout is None: frontier = dgl.in_subgraph(self.g, seeds) else: frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=False) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks, blocks[0].srcdata[dgl.NID]
def test_nn_conv(): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, 'mean') feat = F.randn((100, 5)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, feat, efeat) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, 'mean') feat = F.randn((100, 5)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, feat, efeat) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(50, 100, density=0.1)) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv((5, 2), 10, edge_func, 'mean') feat = F.randn((50, 5)) feat_dst = F.randn((100, 2)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, (feat, feat_dst), efeat) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = th.unique(g.edges()[1]) block = dgl.to_block(g, seed_nodes) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, 'mean') feat = F.randn((block.number_of_src_nodes(), 5)) efeat = F.randn((block.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(block, feat, efeat) assert h.shape[0] == block.number_of_dst_nodes() assert h.shape[-1] == 10
def sample_blocks(self, seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = self.sample_neighbors(self.g, seeds, fanout, replace=True) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) input_nodes = blocks[0].srcdata[dgl.NID] seeds = blocks[-1].dstdata[dgl.NID] batch_inputs, batch_labels = load_subtensor(self.g, seeds, input_nodes, "cpu") blocks[0].srcdata['features'] = batch_inputs blocks[-1].dstdata['labels'] = batch_labels return blocks
def sample_blocks(self, seeds): seeds = { "user": th.LongTensor(np.asarray(seeds)) } blocks = [] for fanout in self.fanouts: frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=self.replace) block = dgl.to_block(frontier, seeds) # seed是被传播的节点,所以src是输入节点 seeds = { "user": block["click"].srcdata[dgl.NID], "ad": block["click_by"].srcdata[dgl.NID] } blocks.insert(0, block) # 只在第一层插入属性 block_add_feat(blocks[0], self.g) # 获取最后一层dst的label batch_labels = get_labels(blocks[-1], self.g) return blocks, batch_labels