def reindex_to_parrent_nodes(self, nodes): """Map the given subgraph node id to parent graph id. Args: nodes: A list of nodes in this subgraph. Return: A list of node ids in parent graph. """ return graph_kernel.map_nodes(nodes, self._to_reindex)
def reindex_from_parrent_nodes(self, nodes): """Map the given parent graph node id to subgraph id. Args: nodes: A list of nodes from parent graph. Return: A list of subgraph ids. """ return graph_kernel.map_nodes(nodes, self._from_reindex)
def batch_fn(self, batch_nodes): graph_list, from_reindex = neighbor_sample(self.dataset.graph, batch_nodes, self.samples) neigh_nodes = graph_list[0][1] neigh_nodes = np.array(neigh_nodes, dtype='int32') y = self.dataset.y[batch_nodes] label_idx = list((set(neigh_nodes) - set(batch_nodes)) & self.dataset.train_idx_label) sub_label_index = graph_kernel.map_nodes(label_idx, from_reindex) sub_label_y = self.dataset.y[label_idx] pos = 2021 - self.dataset.year[neigh_nodes] return graph_list, neigh_nodes, y, sub_label_y, sub_label_index, pos
def neighbor_sample(graph, nodes, samples): graph_list = [] samples_list = [[25, 10, 10, 5, 5], [15, 10, 10, 5, 5]] for idi, max_deg in enumerate(samples): start_nodes = copy.deepcopy(nodes) edges = [] edge_ids = [] edge_feats = [] neigh_nodes = [start_nodes] if max_deg == -1: pred_nodes, pred_eids = graph.predecessor(start_nodes, return_eids=True) else: for idj, g_t in enumerate(graph): pred_nodes, pred_eids = g_t.sample_predecessor( start_nodes, max_degree=samples_list[idi][idj], return_eids=True) neigh_nodes.append(pred_nodes) for dst_node, src_nodes, src_eids in zip( start_nodes, pred_nodes, pred_eids): for src_node, src_eid in zip(src_nodes, src_eids): edges.append((src_node, dst_node)) edge_ids.append(src_eid) edge_feats.append(g_t.edge_feat['edge_type'][src_eid]) neigh_nodes = flat_node_and_edge(neigh_nodes) from_reindex = {x: i for i, x in enumerate(neigh_nodes)} sub_node_index = graph_kernel.map_nodes(nodes, from_reindex) sg = subgraph(graph[0], eid=edge_ids, nodes=neigh_nodes, edges=edges, with_node_feat=False, with_edge_feat=False) edge_feats = np.array(edge_feats, dtype='int32') sg._edge_feat['edge_type'] = edge_feats graph_list.append((sg, neigh_nodes, sub_node_index)) nodes = neigh_nodes graph_list = graph_list[::-1] return graph_list, from_reindex
def neighbor_sample(graph, nodes, samples): assert not graph.is_tensor(), "You must call Graph.numpy() first." graph_list = [] for max_deg in samples: start_nodes = copy.deepcopy(nodes) edges = [] edge_ids = [] if max_deg == -1: pred_nodes, pred_eids = graph.predecessor(start_nodes, return_eids=True) else: pred_nodes, pred_eids = graph.sample_predecessor( start_nodes, max_degree=max_deg, return_eids=True) for dst_node, src_nodes, src_eids in zip(start_nodes, pred_nodes, pred_eids): for src_node, src_eid in zip(src_nodes, src_eids): edges.append((src_node, dst_node)) edge_ids.append(src_eid) neigh_nodes = [start_nodes, pred_nodes] neigh_nodes = flat_node_and_edge(neigh_nodes) from_reindex = {x: i for i, x in enumerate(neigh_nodes)} sub_node_index = graph_kernel.map_nodes(nodes, from_reindex) sg = subgraph(graph, eid=edge_ids, nodes=neigh_nodes, edges=edges, with_node_feat=False, with_edge_feat=True) # sg = add_self_loop(sg, sub_node_index) graph_list.append((sg, neigh_nodes, sub_node_index)) nodes = neigh_nodes graph_list = graph_list[::-1] return graph_list
def graphsage_sample(graph, nodes, samples, ignore_edges=[]): """Implement of graphsage sample. Reference paper: https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf. Args: graph: A pgl graph instance nodes: Sample starting from nodes samples: A list, number of neighbors in each layer ignore_edges: list of edge(src, dst) will be ignored. Return: A list of subgraphs """ assert not graph.is_tensor(), "You must call Graph.numpy() first." node_index = copy.deepcopy(nodes) start = time.time() num_layers = len(samples) start_nodes = nodes nodes = list(start_nodes) eids, edges = [], [] nodes_set = set(nodes) layer_nodes, layer_eids, layer_edges = [], [], [] ignore_edge_set = set([edge_hash(src, dst) for src, dst in ignore_edges]) for layer_idx in reversed(range(num_layers)): if len(start_nodes) == 0: layer_nodes = [nodes] + layer_nodes layer_eids = [eids] + layer_eids layer_edges = [edges] + layer_edges continue batch_pred_nodes, batch_pred_eids = graph.sample_predecessor( start_nodes, samples[layer_idx], return_eids=True) start = time.time() last_nodes_set = nodes_set nodes, eids = copy.copy(nodes), copy.copy(eids) edges = copy.copy(edges) nodes_set, eids_set = set(nodes), set(eids) for srcs, dst, pred_eids in zip(batch_pred_nodes, start_nodes, batch_pred_eids): for src, eid in zip(srcs, pred_eids): if edge_hash(src, dst) in ignore_edge_set: continue if eid not in eids_set: eids.append(eid) edges.append([src, dst]) eids_set.add(eid) if src not in nodes_set: nodes.append(src) nodes_set.add(src) layer_edges = [edges] + layer_edges start_nodes = list(nodes_set - last_nodes_set) layer_nodes = [nodes] + layer_nodes layer_eids = [eids] + layer_eids start = time.time() # Find new nodes from_reindex = {x: i for i, x in enumerate(layer_nodes[0])} node_index = graph_kernel.map_nodes(node_index, from_reindex) sample_index = np.array(layer_nodes[0], dtype="int64") graph_list = [] for i in range(num_layers): sg = subgraph(graph, nodes=layer_nodes[0], eid=layer_eids[i], edges=layer_edges[i]) graph_list.append((sg, sample_index, node_index)) return graph_list