def test_out_subgraph(index_dtype): g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype) g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype) g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype) g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype) hg = dgl.hetero_from_relations([g1, g2, g3, g4]) subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}) assert subg._idtype_str == index_dtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1,0),(0,1),(0,2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(0,1),(1,2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(1,0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
def test_out_subgraph(idtype): hg = dgl.heterograph({ ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) }, idtype=idtype) subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1,0),(0,1),(0,2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(0,1),(1,2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(1,0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
def evaluate(g, field_ids, author_rank, true_relevance, field_paper): predict_rank = {} field_feat = g.nodes['field'].data['feat'] apg = g['paper', 'writes_rev', 'author'] for i, f in enumerate(field_ids): pid = field_paper[f] paper_score = torch.matmul(g.nodes['paper'].data['feat'][pid], field_feat[f]) sg = dgl.out_subgraph(apg, {'paper': pid}, relabel_nodes=True) sg.nodes['paper'].data['score'] = paper_score sg.update_all(fn.copy_u('score', 's'), fn.sum('s', 's')) predict_rank[f] = (sg.nodes['author'].data[dgl.NID], sg.nodes['author'].data['s']) return calc_metrics(field_ids, author_rank, true_relevance, predict_rank)
def recall_paper(g, field_ids, num_recall): """预先计算论文召回 :param g: DGLGraph 异构图 :param field_ids: List[int] 目标领域id :param num_recall: 每个领域召回的论文数 :return: Dict[int, List[int]] {field_id: [paper_id]} """ similarity = torch.zeros(len(field_ids), g.num_nodes('paper')) sg = dgl.out_subgraph(g['has_field_rev'], {'field': field_ids}, relabel_nodes=True) sg.apply_edges(fn.u_dot_v('feat', 'feat', 's')) f, p = sg.edges() similarity[f, sg.nodes['paper'].data[dgl.NID][p]] = sg.edata['s'].squeeze( dim=1) _, pid = similarity.topk(num_recall, dim=1) return {f: pid[i].tolist() for i, f in enumerate(field_ids)}
def test_out_subgraph(idtype): hg = dgl.heterograph( { ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) }, idtype=idtype) subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1, 0), (0, 1), (0, 2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (0, 1), (1, 2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID]) for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test store_ids subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False) for etype in subg.canonical_etypes: assert dgl.EID not in subg.edges[etype].data for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test relabel nodes subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 2)} assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['flips'].edge_ids(old_u, old_v), subg['flips'].edata[dgl.EID]) assert subg.num_nodes('user') == 2 assert subg.num_nodes('game') == 2 assert subg.num_nodes('coin') == 1
num_edges = data.num_edges() num_edges = data.num_edges() trainval_div = int(VALID_SPLIT * num_edges) # Select new node from test set and remove them from entire graph test_split_ts = data.edata['timestamp'][trainval_div] test_nodes = torch.cat( [data.edges()[0][trainval_div:], data.edges()[1][trainval_div:]]).unique().numpy() test_new_nodes = np.random.choice(test_nodes, int(0.1 * len(test_nodes)), replace=False) in_subg = dgl.in_subgraph(data, test_new_nodes) out_subg = dgl.out_subgraph(data, test_new_nodes) # Remove edge who happen before the test set to prevent from learning the connection info new_node_in_eid_delete = in_subg.edata[dgl.EID][ in_subg.edata['timestamp'] < test_split_ts] new_node_out_eid_delete = out_subg.edata[dgl.EID][ out_subg.edata['timestamp'] < test_split_ts] new_node_eid_delete = torch.cat( [new_node_in_eid_delete, new_node_out_eid_delete]).unique() graph_new_node = copy.deepcopy(data) # relative order preseved graph_new_node.remove_edges(new_node_eid_delete) # Now for no new node graph, all edge id need to be removed in_eid_delete = in_subg.edata[dgl.EID] out_eid_delete = out_subg.edata[dgl.EID]