def test_neighbor_sampler(): torch.manual_seed(1234) start = torch.tensor([0, 1]) cumdeg = torch.tensor([0, 3, 7]) e_id = neighbor_sampler(start, cumdeg, size=1.0) assert e_id.tolist() == [0, 2, 1, 5, 6, 3, 4] e_id = neighbor_sampler(start, cumdeg, size=3) assert e_id.tolist() == [1, 0, 2, 4, 5, 6]
def __produce_subgraph__(self, b_id): r"""Produces a :obj:`Data` object holding the subgraph data for a given mini-batch :obj:`b_id`.""" n_ids = [b_id] e_ids = [] edge_indices = [] for l in range(self.num_hops): e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l]) n_id = self.edge_index_j.index_select(0, e_id) n_id = n_id.unique(sorted=False) n_ids.append(n_id) e_ids.append(self.e_assoc.index_select(0, e_id)) edge_index = self.data.edge_index.index_select(1, e_ids[-1]) edge_indices.append(edge_index) n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False) self.tmp[n_id] = torch.arange(n_id.size(0)) e_id = torch.cat(e_ids, dim=0) edge_index = self.tmp[torch.cat(edge_indices, dim=1)] num_nodes = n_id.size(0) idx = edge_index[0] * num_nodes + edge_index[1] idx, inv = idx.unique(sorted=False, return_inverse=True) edge_index = torch.stack([idx / num_nodes, idx % num_nodes], dim=0) e_id = e_id.new_zeros(edge_index.size(1)).scatter_(0, inv, e_id) return Data(edge_index=edge_index, e_id=e_id, n_id=n_id, b_id=b_id, sub_b_id=self.tmp[b_id], num_nodes=num_nodes)
def __produce_subgraph__(self, data): r"""Produces a :obj:`Data` object holding the subgraph data for a given mini-batch :obj:`b_id`.""" b_id, u_id = data[:2] labels = data[2] n_ids = [torch.cat((b_id, u_id))] e_ids = [] edge_indices = [] edge_type_indices = [] for l in range(self.num_hops): e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l]) n_id = self.edge_index_j.index_select(0, e_id) n_id = n_id.unique(sorted=False) n_ids.append(n_id) e_ids.append(self.e_assoc.index_select(0, e_id)) edge_index = self.edge_index.index_select(1, e_ids[-1]) edge_indices.append(edge_index[:2, :]) edge_type = self.edge_type.index_select(0, e_ids[-1]) edge_type_indices.append(edge_type) n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False) # selected node = subgraph self.tmp[n_id] = torch.arange(n_id.size(0)) # renamed e_id = torch.cat(e_ids, dim=0) edge_index = self.tmp[torch.cat(edge_indices, dim=1)] # re-indexing edge_index num_nodes = n_id.size(0) # selected node size return Data(edge_index=edge_index, n_id=n_id, target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels)
def __produce_bipartite_data_flow__(self, n_id): r"""Produces a :obj:`DataFlow` object with a bipartite assignment matrix for a given mini-batch :obj:`n_id`.""" data_flow = DataFlow(n_id, self.flow) for l in range(self.num_hops): e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l]) new_n_id = self.edge_index_j.index_select(0, e_id) e_id = self.e_assoc[e_id] if self.add_self_loops: new_n_id = torch.cat([new_n_id, n_id], dim=0) new_n_id, inv = new_n_id.unique(sorted=False, return_inverse=True) res_n_id = inv[-n_id.size(0):] else: new_n_id = new_n_id.unique(sorted=False) res_n_id = None edges = [None, None] edge_index_i = self.edge_index[self.i, e_id] if self.add_self_loops: edge_index_i = torch.cat([edge_index_i, n_id], dim=0) self.tmp[n_id] = torch.arange(n_id.size(0)) edges[self.i] = self.tmp[edge_index_i] edge_index_j = self.edge_index[self.j, e_id] if self.add_self_loops: edge_index_j = torch.cat([edge_index_j, n_id], dim=0) self.tmp[new_n_id] = torch.arange(new_n_id.size(0)) edges[self.j] = self.tmp[edge_index_j] edge_index = torch.stack(edges, dim=0) e_id = self.e_id[e_id] if self.add_self_loops: if self.edge_index_loop.size(1) == self.data.num_nodes: # Only set `e_id` if all self-loops were initially passed # to the graph. e_id = torch.cat([e_id, self.e_id_loop[n_id]]) else: e_id = None if torch_geometric.is_debug_enabled(): warnings.warn( ('Could not add edge identifiers to the DataFlow' 'object due to missing initial self-loops. ' 'Please make sure that your graph already ' 'contains self-loops in case you want to use ' 'edge-conditioned operators.')) n_id = new_n_id data_flow.append(n_id, res_n_id, e_id, edge_index) return data_flow
def __produce_subgraph__(self, data, h_item): r"""Produces a :obj:`Data` object holding the subgraph data for a given mini-batch :obj:`b_id`.""" b_id, u_id = data[:2] labels = data[2] n_ids = [torch.cat((b_id, u_id))] e_ids = [] edge_indices = [] edge_type_indices = [] for l in range(self.num_hops): e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l]) n_id = self.edge_index_j.index_select(0, e_id) n_id = n_id.unique(sorted=False) n_ids.append(n_id) e_ids.append(self.e_assoc.index_select(0, e_id)) edge_index = self.edge_index.index_select(1, e_ids[-1]) edge_indices.append(edge_index[:2, :]) edge_type = self.edge_type.index_select(0, e_ids[-1]) edge_type_indices.append(edge_type) if self.use_hint == 'True': h_edge_list=[] h_n_id = [] for hub in h_item: users = self.edge_index[0, self.edge_index[1,:] == hub] for urs in users: if urs in n_id: h_edge_list.append([urs.item(), hub.item()]) h_n_id.append(urs.item()) n_ids.append(torch.tensor(h_n_id)) n_ids.append(torch.tensor(h_item)) n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False) self.tmp[n_id] = torch.arange(n_id.size(0)) e_id = torch.cat(e_ids, dim=0) edge_index = self.tmp[torch.cat(edge_indices, dim=1)] num_nodes = n_id.size(0) idx = edge_index[0] * num_nodes + edge_index[1] idx, inv = idx.unique(sorted=False, return_inverse=True) edge_index = torch.stack([idx // num_nodes, idx % num_nodes], dim=0) if self.use_hint == 'True': h_edge_index = torch.cat((edge_index, self.tmp[torch.tensor(h_edge_list).T]), 1) idx = h_edge_index[0] * num_nodes + h_edge_index[1] idx, inv = idx.unique(sorted=False, return_inverse=True) h_edge_index = torch.stack([idx // num_nodes, idx % num_nodes], dim=0) return Data(edge_index=edge_index, h_edge_index=h_edge_index, n_id=n_id, target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels) else: return Data(edge_index=edge_index, n_id=n_id, target_items=self.tmp[b_id], target_users=self.tmp[u_id], labels=labels)
def __produce_bipartite_data_flow__(self, n_id): r"""Produces a :obj:`DataFlow` object with a bipartite assignment matrix for a given mini-batch :obj:`n_id`.""" data_flow = DataFlow(n_id, self.flow) for l in range(self.num_hops): e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l]) new_n_id = self.edge_index_j.index_select(0, e_id) e_id = self.e_assoc[e_id] if self.add_self_loops: new_n_id = torch.cat([new_n_id, n_id], dim=0) new_n_id, inv = new_n_id.unique(sorted=False, return_inverse=True) res_n_id = inv[-n_id.size(0):] else: new_n_id = new_n_id.unique(sorted=False) res_n_id = None edges = [None, None] edge_index_i = self.edge_index[self.i, e_id] if self.add_self_loops: edge_index_i = torch.cat([edge_index_i, n_id], dim=0) self.tmp[n_id] = torch.arange(n_id.size(0)) edges[self.i] = self.tmp[edge_index_i] edge_index_j = self.edge_index[self.j, e_id] if self.add_self_loops: edge_index_j = torch.cat([edge_index_j, n_id], dim=0) self.tmp[new_n_id] = torch.arange(new_n_id.size(0)) edges[self.j] = self.tmp[edge_index_j] edge_index = torch.stack(edges, dim=0) e_id = self.e_id[e_id] if self.add_self_loops: e_id = torch.cat([e_id, self.e_id_loop[n_id]]) n_id = new_n_id data_flow.append(n_id, res_n_id, e_id, edge_index) return data_flow
def __produce_subgraph__(self, b_id): r"""Produces a :obj:`Data` object holding the subgraph data for a given mini-batch :obj:`b_id`.""" n_ids = [b_id] e_ids = [] edge_indices = [] for l in range(self.num_hops): e_id = neighbor_sampler(n_ids[-1], self.cumdeg, self.size[l]) n_id = self.edge_index_j.index_select(0, e_id) n_id = n_id.unique(sorted=False) n_ids.append(n_id) e_ids.append(self.e_assoc.index_select(0, e_id)) edge_index = self.data.edge_index.index_select(1, e_ids[-1]) edge_indices.append(edge_index) n_id = torch.unique(torch.cat(n_ids, dim=0), sorted=False) self.tmp[n_id] = torch.arange(n_id.size(0)) e_id = torch.cat(e_ids, dim=0) edge_index = self.tmp[torch.cat(edge_indices, dim=1)] num_nodes = n_id.size(0) idx = edge_index[0] * num_nodes + edge_index[1] idx, inv = idx.unique(sorted=False, return_inverse=True) edge_index = torch.stack([idx / num_nodes, idx % num_nodes], dim=0) e_id = e_id.new_zeros(edge_index.size(1)).scatter_(0, inv, e_id) # n_id: original ID of nodes in the whole sub-graph. # b_id: original ID of nodes in the training graph. # sub_b_id: sampled ID of nodes in the training graph. # Get full-subgraph for negative sampling. # Will be deleted at __call__. if self.use_negative_sampling: adj, _ = self.adj.saint_subgraph(n_id) row, col, edge_idx = adj.coo() full_edge_index = torch.stack([row, col], dim=0) else: full_edge_index = None return Data(edge_index=edge_index, e_id=e_id, n_id=n_id, b_id=b_id, sub_b_id=self.tmp[b_id], full_edge_index=full_edge_index, num_nodes=num_nodes)
def __produce__(self, n_id): r"""Produces a :obj:`DataFlow` object for a given mini-batch :obj:`n_id`.""" data_flow = DataFlow(n_id, self.flow) for l in range(self.num_hops): e_id = neighbor_sampler(n_id, self.cumdeg, self.size[l]) new_n_id = self.edge_index_j.index_select(0, e_id) if self.add_self_loops: new_n_id = torch.cat([new_n_id, n_id], dim=0) new_n_id = new_n_id.unique(sorted=False) e_id = self.e_assoc[e_id] edges = [None, None] edge_index_i = self.data.edge_index[self.i, e_id] if self.add_self_loops: edge_index_i = torch.cat([edge_index_i, n_id], dim=0) self.tmp[n_id] = torch.arange(n_id.size(0)) edges[self.i] = self.tmp[edge_index_i] edge_index_j = self.data.edge_index[self.j, e_id] if self.add_self_loops: edge_index_j = torch.cat([edge_index_j, n_id], dim=0) self.tmp[new_n_id] = torch.arange(new_n_id.size(0)) edges[self.j] = self.tmp[edge_index_j] edge_index = torch.stack(edges, dim=0) # Remove the edge identifier when adding self-loops to prevent # misused behavior. e_id = None if self.add_self_loops else e_id n_id = new_n_id data_flow.append(n_id, e_id, edge_index) return data_flow
def __produce_bipartite_data_flow__(self, n_id): r"""Produces a :obj:`DataFlow` object with a bipartite assignment matrix for a given mini-batch :obj:`n_id`.""" data_flow = DataFlow(n_id) all_n_id = n_id for l in range(self.num_hops): e_id = neighbor_sampler(n_id, self.cumdeg, self.nsample[l]) sub_edge_index = self.edge_index[:, e_id].to(torch.long) edges = [None, None] # ====================== row_0 = torch.cat([sub_edge_index[0], all_n_id]) row_1 = torch.cat([sub_edge_index[1], all_n_id]) edges[1] = self.__renumerate__(row_1, all_n_id) n_id = sub_edge_index[0].unique(sorted=False) res_size = all_n_id.size(0) # target nodes are placed first all_n_id = torch.cat([all_n_id, n_id]) # res_size = all_n_id.size(0) # all_n_id = torch.cat([all_n_id, n_id]) # all_n_id, inv = all_n_id.unique(sorted=False, return_inverse=True) # res_n_id = inv[:res_size] edges[0] = self.__renumerate__(row_0, all_n_id) # ====================== edge_index = torch.stack(edges, dim=0) data_flow.append(all_n_id, res_size, e_id, edge_index) return data_flow