def get_big_graph(data, num_rels): add_reverse = True if add_reverse: src, rel, dst = data.transpose() # node ids # uniq_v: range from 0 to the number of nodes acting as g.nodes(); # edges: uniq_v[edges] = np.unique((src, dst)), mapping from (o, len(nodes)) to the original node idx uniq_v, edges = np.unique((src, dst), return_inverse=True) src, dst = np.reshape(edges, (2, -1)) g = dgl.DGLGraph() g.add_nodes(len(uniq_v)) src, dst = np.concatenate((src, dst)), np.concatenate((dst, src)) rel_o = np.concatenate((rel + num_rels, rel)) rel_s = np.concatenate((rel, rel + num_rels)) g.add_edges(src, dst) norm = comp_deg_norm(g) # import pdb; pdb.set_trace() g.ndata.update({ 'id': torch.from_numpy(uniq_v).long().view(-1, 1), 'norm': torch.from_numpy(norm).view(-1, 1) }) g.edata['type_s'] = torch.LongTensor(rel_s) g.edata['type_o'] = torch.LongTensor(rel_o) g.ids = {} in_graph_idx = 0 # graph.ids: node id in the entire node set -> node index for id in uniq_v: g.ids[in_graph_idx] = id in_graph_idx += 1 else: src, rel, dst = data.transpose() # node ids # uniq_v: range from 0 to the number of nodes acting as g.nodes(); # edges: uniq_v[edges] = np.unique((src, dst)), mapping from (o, len(nodes)) to the original node idx uniq_v, edges = np.unique((src, dst), return_inverse=True) src, dst = np.reshape(edges, (2, -1)) g = dgl.DGLGraph() g.add_nodes(len(uniq_v)) g.add_edges(src, dst) norm = comp_deg_norm(g) g.ndata.update({ 'id': torch.from_numpy(uniq_v).long().view(-1, 1), 'norm': torch.from_numpy(norm).view(-1, 1) }) g.edata['type_s'] = torch.LongTensor(rel) g.ids = {} in_graph_idx = 0 for id in uniq_v: g.ids[in_graph_idx] = id in_graph_idx += 1 return g
def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False): if val: sampled_graph_list = graph_train_list else: sampled_graph_list = [] for g in graph_train_list: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] half_num_nodes = int(src.shape[0] / 2) graph_split_ids = np.random.choice(np.arange(half_num_nodes), size=int(0.5 * half_num_nodes), replace=False) graph_split_rev_ids = graph_split_ids + half_num_nodes sg = g.edge_subgraph(np.concatenate((graph_split_ids, graph_split_rev_ids)), preserve_nodes=True) norm = comp_deg_norm(sg) sg.ndata.update({'id': g.ndata['id'], 'norm': torch.from_numpy(norm).view(-1, 1)}) sg.edata['type_s'] = rel[np.concatenate((graph_split_ids, graph_split_rev_ids))] sg.ids = g.ids sampled_graph_list.append(sg) time_embeds = [] for t, g in zip(t_list, graph_train_list): temp_ent_embeds = torch.sin(t * self.w_ent_embeds[g.ndata['id']].view(-1, self.embed_size) + self.b_ent_embeds[g.ndata['id']].view(-1, self.embed_size)) time_embeds.append(temp_ent_embeds) batched_graph = dgl.batch(sampled_graph_list) time_embeds = torch.cat(time_embeds, dim=0) ent_embeds = self.ent_embeds[batched_graph.ndata['id']].view(-1, self.embed_size) batched_graph.ndata['h'] = torch.cat([ent_embeds, time_embeds], dim=-1) if self.use_cuda: move_dgl_to_cuda(batched_graph) node_sizes = [len(g.nodes()) for g in graph_train_list] enc_ent_mean_graph = self.ent_encoder(batched_graph, reverse=False) ent_enc_embeds = enc_ent_mean_graph.ndata['h'] per_graph_ent_embeds = ent_enc_embeds.split(node_sizes) return per_graph_ent_embeds
def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False): if val: sampled_graph_list = graph_train_list else: sampled_graph_list = [] for g in graph_train_list: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] total_idx = np.random.choice(np.arange(src.shape[0]), size=int(0.5 * src.shape[0]), replace=False) sg = g.edge_subgraph(total_idx, preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({ 'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1) }) sg.edata['norm'] = node_norm_to_edge_norm( sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[total_idx] sg.ids = g.ids sampled_graph_list.append(sg) # time_embeds = [] # for t, g in zip(t_list, graph_train_list): # temp_ent_embeds = torch.sin(t * self.w_ent_embeds[g.ndata['id']].view(-1, self.embed_size) + # self.b_ent_embeds[g.ndata['id']].view(-1, self.embed_size)) # time_embeds.append(temp_ent_embeds) ent_embeds = [] for t, g in zip(t_list, graph_train_list): static_ent_embeds = self.ent_embeds[g.ndata['id']].view( -1, self.embed_size) ones = static_ent_embeds.new_ones(static_ent_embeds.shape[0], self.static_embed_size) temp_ent_embeds = torch.sin(t * self.w_temp_ent_embeds[ g.ndata['id']].view(-1, self.temporal_embed_size) + self.b_temp_ent_embeds[g.ndata['id']]. view(-1, self.temporal_embed_size)) ent_embeds.append(static_ent_embeds * torch.cat( (ones, temp_ent_embeds), dim=-1)) batched_graph = dgl.batch(sampled_graph_list) batched_graph.ndata['h'] = torch.cat(ent_embeds, dim=0) if self.use_cuda: move_dgl_to_cuda(batched_graph) node_sizes = [len(g.nodes()) for g in graph_train_list] enc_ent_mean_graph = self.ent_encoder(batched_graph) ent_enc_embeds = enc_ent_mean_graph.ndata['h'] per_graph_ent_embeds = ent_enc_embeds.split(node_sizes) return per_graph_ent_embeds
def get_per_graph_ent_embeds(self, g_batched_list_t, node_sizes, first_prev_graph_embeds, second_prev_graph_embeds, val=False): if val: sampled_graph_list = g_batched_list_t else: sampled_graph_list = [] for g in g_batched_list_t: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] half_num_nodes = int(src.shape[0] / 2) graph_split_ids = np.random.choice(np.arange(half_num_nodes), size=int(0.5 * half_num_nodes), replace=False) graph_split_rev_ids = graph_split_ids + half_num_nodes sg = g.edge_subgraph(np.concatenate( (graph_split_ids, graph_split_rev_ids)), preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({ 'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1) }) sg.edata['norm'] = node_norm_to_edge_norm( sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[np.concatenate( (graph_split_ids, graph_split_rev_ids))] sg.ids = g.ids sampled_graph_list.append(sg) batched_graph = dgl.batch(sampled_graph_list) batched_graph.ndata['h'] = self.ent_embeds[ batched_graph.ndata['id']].view(-1, self.embed_size) if self.use_cuda: move_dgl_to_cuda(batched_graph) first_layer_graph, second_layer_graph = self.ent_encoder( batched_graph, first_prev_graph_embeds, second_prev_graph_embeds) first_layer_embeds = first_layer_graph.ndata['h'] second_layer_embeds = second_layer_graph.ndata['h'] return first_layer_embeds.split(node_sizes), second_layer_embeds.split( node_sizes)
def get_per_graph_ent_embeds(self, g_batched_list_t, cur_h, node_sizes, val=False): if val: sampled_graph_list = g_batched_list_t else: sampled_graph_list = [] for g in g_batched_list_t: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] half_num_nodes = int(src.shape[0] / 2) graph_split_ids = np.random.choice(np.arange(half_num_nodes), size=int(0.5 * half_num_nodes), replace=False) graph_split_rev_ids = graph_split_ids + half_num_nodes sg = g.edge_subgraph(np.concatenate( (graph_split_ids, graph_split_rev_ids)), preserve_nodes=True) norm = comp_deg_norm(sg) sg.ndata.update({ 'id': g.ndata['id'], 'norm': torch.from_numpy(norm).view(-1, 1) }) sg.edata['type_s'] = rel[np.concatenate( (graph_split_ids, graph_split_rev_ids))] sg.ids = g.ids sampled_graph_list.append(sg) batched_graph = dgl.batch(sampled_graph_list) expanded_h = torch.cat([ cur_h[i].unsqueeze(0).expand(size, self.embed_size) for i, size in enumerate(node_sizes) ], dim=0) ent_embeds = self.ent_embeds[batched_graph.ndata['id']].view( -1, self.embed_size) batched_graph.ndata['h'] = torch.cat([ent_embeds, expanded_h], dim=-1) if self.use_cuda: move_dgl_to_cuda(batched_graph) enc_ent_mean_graph = self.ent_encoder(batched_graph, reverse=False) ent_enc_embeds = enc_ent_mean_graph.ndata['h'] per_graph_ent_embeds = ent_enc_embeds.split(node_sizes) return per_graph_ent_embeds
def get_batch_graph_embeds(self, g_batched_list_t, full, rate): if full: sampled_graph_list = g_batched_list_t else: sampled_graph_list = [] for g in g_batched_list_t: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] total_idx = np.random.choice(np.arange(src.shape[0]), size=int(rate * src.shape[0]), replace=False) sg = g.edge_subgraph(total_idx, preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1)}) sg.edata['norm'] = node_norm_to_edge_norm(sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[total_idx] sg.ids = g.ids sampled_graph_list.append(sg) batched_graph = dgl.batch(sampled_graph_list) batched_graph.ndata['h'] = self.ent_embeds[batched_graph.ndata['id']].view(-1, self.embed_size) return batched_graph
def get_per_graph_ent_embeds(self, t_list, graph_train_list, val=False): if val: sampled_graph_list = graph_train_list else: # TODO: modify half_num_nodes sampled_graph_list = [] for g in graph_train_list: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] half_num_nodes = int(src.shape[0] / 2) # graph_split_ids = np.random.choice(np.arange(half_num_nodes), # size=int(0.5 * half_num_nodes), replace=False) # graph_split_rev_ids = graph_split_ids + half_num_nodes # sg = g.edge_subgraph(np.concatenate((graph_split_ids, graph_split_rev_ids)), preserve_nodes=True) total_idx = np.random.choice(np.arange(src.shape[0]), size=int(0.5 * src.shape[0]), replace=False) sg = g.edge_subgraph(total_idx, preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({ 'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1) }) sg.edata['norm'] = node_norm_to_edge_norm( sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[total_idx] sg.ids = g.ids sampled_graph_list.append(sg) batched_graph = dgl.batch(sampled_graph_list) batched_graph.ndata['h'] = self.ent_embeds[ batched_graph.ndata['id']].view(-1, self.embed_size) if self.use_cuda: move_dgl_to_cuda(batched_graph) node_sizes = [len(g.nodes()) for g in graph_train_list] enc_ent_mean_graph = self.ent_encoder(batched_graph, t_list, node_sizes) ent_enc_embeds = enc_ent_mean_graph.ndata['h'] per_graph_ent_embeds = ent_enc_embeds.split(node_sizes) return per_graph_ent_embeds
def sample_subgraph(self, cur_time, target_time): # sampled_graph_list = [] # upper = target_time if not self.future else min(self.max_time_step, target_time + self.train_seq_len) # for cur_time in range(max(0, target_time - self.train_seq_len + 1), upper): cur_g = self.graph_dict_train[cur_time] src, rel, dst = cur_g.edges()[0], cur_g.edata['type_s'], cur_g.edges( )[1] drop_rates = self.drop_rate_cache[target_time][cur_time] # pdb.set_trace() mask = torch.bernoulli(1 - torch.tensor(drop_rates)) == 1 sampled_idx = torch.arange(src.shape[0])[mask] sg = cur_g.edge_subgraph(sampled_idx, preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({ 'id': cur_g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1) }) sg.edata['norm'] = node_norm_to_edge_norm( sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[sampled_idx] sg.ids = cur_g.ids return sg
def get_per_graph_ent_embeds(self, g_batched_list_t, time_batched_list_t, node_sizes, time_diff_tensor, first_prev_graph_embeds, second_prev_graph_embeds, val=False): if val: sampled_graph_list = g_batched_list_t else: sampled_graph_list = [] for g in g_batched_list_t: src, rel, dst = g.edges()[0], g.edata['type_s'], g.edges()[1] half_num_nodes = int(src.shape[0] / 2) # graph_split_ids = np.random.choice(np.arange(half_num_nodes), size=int(0.5 * src.shape[0]), replace=False) # graph_split_rev_ids = graph_split_ids + half_num_nodes # total_idx = np.concatenate((graph_split_ids, graph_split_rev_ids)) total_idx = np.random.choice(np.arange(src.shape[0]), size=int(0.5 * src.shape[0]), replace=False) sg = g.edge_subgraph(total_idx, preserve_nodes=True) node_norm = comp_deg_norm(sg) sg.ndata.update({ 'id': g.ndata['id'], 'norm': torch.from_numpy(node_norm).view(-1, 1) }) sg.edata['norm'] = node_norm_to_edge_norm( sg, torch.from_numpy(node_norm).view(-1, 1)) sg.edata['type_s'] = rel[total_idx] sg.ids = g.ids sampled_graph_list.append(sg) ent_embeds = [] for t, g in zip(time_batched_list_t, g_batched_list_t): static_ent_embeds = self.ent_embeds[g.ndata['id']].view( -1, self.embed_size) ones = static_ent_embeds.new_ones(static_ent_embeds.shape[0], self.static_embed_size) temp_ent_embeds = torch.sin(t * self.w_temp_ent_embeds[ g.ndata['id']].view(-1, self.temporal_embed_size) + self.b_temp_ent_embeds[g.ndata['id']]. view(-1, self.temporal_embed_size)) ent_embeds.append(static_ent_embeds * torch.cat( (ones, temp_ent_embeds), dim=-1)) batched_graph = dgl.batch(sampled_graph_list) batched_graph.ndata['h'] = torch.cat(ent_embeds, dim=0) if self.use_cuda: move_dgl_to_cuda(batched_graph) first_layer_graph, second_layer_graph = self.ent_encoder( batched_graph, first_prev_graph_embeds, second_prev_graph_embeds, time_diff_tensor) first_layer_embeds = first_layer_graph.ndata['h'] second_layer_embeds = second_layer_graph.ndata['h'] return first_layer_embeds.split(node_sizes), second_layer_embeds.split( node_sizes)
def get_train_val_test_graph_at_t(triples, num_rels): train_triples, val_triples, test_triples = \ np.array(triples['train']), np.array(triples['valid']), np.array(triples['test']) try: total_triples = np.concatenate( [train_triples, val_triples, test_triples], axis=0) except: # import pdb; pdb.set_trace() if test_triples.shape[0] == 0 and val_triples.shape[0] == 0: total_triples = train_triples elif test_triples.shape[0] == 0: total_triples = np.concatenate([train_triples, val_triples], axis=0) elif val_triples.shape[0] == 0: total_triples = np.concatenate([train_triples, test_triples], axis=0) src_total, rel_total, dst_total = total_triples.transpose() # node ids # g.nodes() = len(uniq_v), uniq_v are the idx of nodes # edges: uniq_v[edges] = np.concat((src, dst)), mapping from (0, len(nodes)) to the original node idx uniq_v, edges = np.unique((src_total, dst_total), return_inverse=True) src, dst = np.reshape(edges, (2, -1)) g_train = dgl.DGLGraph() g_val = dgl.DGLGraph() g_test = dgl.DGLGraph() # for training, add reverse tuples (o, r-1, s); not for val and test graphs src_train, rel_train, dst_train = src[:len(train_triples)], rel_total[:len( train_triples)], dst[:len(train_triples)] src_val, rel_val, dst_val = src[len(train_triples): len(train_triples) + len(val_triples)], \ rel_total[len(train_triples): len(train_triples) + len(val_triples)], \ dst[len(train_triples): len(train_triples) + len(val_triples)] src_test, rel_test, dst_test = src[len(train_triples) + len(val_triples):], \ rel_total[len(train_triples) + len(val_triples):], \ dst[len(train_triples) + len(val_triples):] add_reverse = False if add_reverse: src_train, dst_train = np.concatenate( (src_train, dst_train)), np.concatenate((dst_train, src_train)) g_train.add_nodes(len(uniq_v)) g_train.add_edges(src_train, dst_train) norm = comp_deg_norm(g_train) rel_o = np.concatenate((rel_train + num_rels, rel_train)) rel_s = np.concatenate((rel_train, rel_train + num_rels)) g_train.ndata.update({ 'id': torch.from_numpy(uniq_v).long().view(-1, 1), 'norm': norm.view(-1, 1) }) g_train.edata['type_s'] = torch.LongTensor(rel_s) g_train.edata['type_o'] = torch.LongTensor(rel_o) g_train.ids = {} in_graph_idx = 0 for id in uniq_v: g_train.ids[id] = in_graph_idx in_graph_idx += 1 g_list, src_list, rel_list, dst_list = [g_test, g_val], [ src_test, src_val ], [rel_test, rel_val], [dst_test, dst_val] else: g_list, src_list, rel_list, dst_list = [g_train, g_test, g_val], [src_train, src_test, src_val], \ [rel_train, rel_test, rel_val], [dst_train, dst_test, dst_val] for graph, cur_src, cur_rel, cur_dst in zip(g_list, src_list, rel_list, dst_list): graph.add_nodes(len(uniq_v)) # shuffle tails # rand_obj = torch.randperm(len(uniq_v))[:cur_dst.shape[0]] # rand_sub = torch.randperm(len(uniq_v))[cur_dst.shape[0]:2 * cur_dst.shape[0]] # shuff_dst = graph.nodes()[rand_obj] # shuff_src = graph.nodes()[rand_sub] # graph.add_edges(shuff_src, shuff_dst) graph.add_edges(cur_src, cur_dst) node_norm = comp_deg_norm(graph) graph.ndata.update({ 'id': torch.from_numpy(uniq_v).long().view(-1, 1), 'norm': torch.from_numpy(node_norm).view(-1, 1) }) # import pdb; pdb.set_trace() graph.edata['norm'] = node_norm_to_edge_norm( graph, torch.from_numpy(node_norm).view(-1, 1)) graph.edata['type_s'] = torch.LongTensor(cur_rel) graph.ids = {} in_graph_idx = 0 # graph.ids: node id in the entire node set -> node index for id in uniq_v: graph.ids[in_graph_idx] = id in_graph_idx += 1 return g_train, g_val, g_test