def create_graph(num_part, dist_graph_path, hetero): if not hetero: g = dgl.rand_graph(10000, 42000) g.ndata['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes()), 1) g.edata['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges()), 1) partition_graph(g, graph_name, num_part, dist_graph_path) else: from scipy import sparse as spsp num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_nodes('n1')), 1) g.edges['r1'].data['feat'] = F.unsqueeze( F.arange(0, g.number_of_edges('r1')), 1) partition_graph(g, graph_name, num_part, dist_graph_path)
def knn_graphE(x, k, istrain=False): """Transforms the given point set to a directed graph, whose coordinates are given as a matrix. The predecessors of each point are its k-nearest neighbors. If a 3D tensor is given instead, then each row would be transformed into a separate graph. The graphs will be unioned. Parameters ---------- x : Tensor The input tensor. If 2D, each row of ``x`` corresponds to a node. If 3D, a k-NN graph would be constructed for each row. Then the graphs are unioned. k : int The number of neighbors Returns ------- DGLGraph The graph. The node IDs are in the same order as ``x``. """ if F.ndim(x) == 2: x = F.unsqueeze(x, 0) n_samples, n_points, _ = F.shape(x) dist = pairwise_squared_distance(x) if istrain and np.random.rand() > 0.5: k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False) rand_k = np.random.permutation(round(1.5 * k) - 1)[0:k - 1] + 1 # 0 + random k-1 rand_k = np.append(rand_k, 0) k_indices = k_indices[:, :, rand_k] # add 0 else: k_indices = F.argtopk(dist, k, 2, descending=False) dst = F.copy_to(k_indices, F.cpu()) src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1)) per_sample_offset = F.reshape( F.arange(0, n_samples) * n_points, (-1, 1, 1)) dst += per_sample_offset src += per_sample_offset dst = F.reshape(dst, (-1, )) src = F.reshape(src, (-1, )) adj = sparse.csr_matrix( (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src)))) g = DGLGraph(adj, readonly=True) return g
def generate_rand_graph(n, func_name): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) g = dgl.DGLGraph(arr, readonly=True) num_rels = 10 entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1) if func_name == 'RotatE': entity_emb = F.uniform((g.number_of_nodes(), 20), F.float32, F.cpu(), 0, 1) rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), -1, 1) if func_name == 'RESCAL': rel_emb = F.uniform((num_rels, 10 * 10), F.float32, F.cpu(), 0, 1) g.ndata['id'] = F.arange(0, g.number_of_nodes()) rel_ids = np.random.randint(0, num_rels, g.number_of_edges(), dtype=np.int64) g.edata['id'] = F.tensor(rel_ids, F.int64) # TransR have additional projection_emb if (func_name == 'TransR'): args = {'gpu': -1, 'lr': 0.1} args = dotdict(args) projection_emb = ExternalEmbedding(args, 10, 10 * 10, F.cpu()) return g, entity_emb, rel_emb, (12.0, projection_emb, 10, 10) elif (func_name == 'TransE'): return g, entity_emb, rel_emb, (12.0) elif (func_name == 'RESCAL'): return g, entity_emb, rel_emb, (10, 10) elif (func_name == 'RotatE'): return g, entity_emb, rel_emb, (12.0, 1.0) else: return g, entity_emb, rel_emb, None
def start_server(args): """Start kvstore service """ server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config) my_server = KVServer(server_id=args.server_id, server_namebook=server_namebook, num_client=args.num_client) data = F.zeros((num_entries, args.dim_size), F.float32, F.cpu()) g2l = F.zeros(num_entries * args.num_servers, F.int64, F.cpu()) start = num_entries * my_server.get_machine_id() end = num_entries * (my_server.get_machine_id() + 1) g2l[start:end] = F.arange(0, num_entries) partition = np.arange(args.num_servers) partition = F.tensor(np.repeat(partition, num_entries)) if my_server.get_id() % my_server.get_group_count() == 0: # master server my_server.set_global2local(name='entity_embed', global2local=g2l) my_server.init_data(name='entity_embed', data_tensor=data) my_server.set_partition_book(name='entity_embed', partition_book=partition) else: my_server.set_global2local(name='entity_embed') my_server.init_data(name='entity_embed') my_server.set_partition_book(name='entity_embed') my_server.print() my_server.start()
def __init__(self, dataset, args): pickle_name = 'graph_all.pickle' if args.pickle_graph and os.path.exists( os.path.join(args.data_path, args.dataset, pickle_name)): with open(os.path.join(args.data_path, args.dataset, pickle_name), 'rb') as graph_file: g = pickle.load(graph_file) print('Load pickled graph.') else: src = np.concatenate( (dataset.train[0], dataset.valid[0], dataset.test[0])) etype_id = np.concatenate( (dataset.train[1], dataset.valid[1], dataset.test[1])) dst = np.concatenate( (dataset.train[2], dataset.valid[2], dataset.test[2])) coo = sp.sparse.coo_matrix( (np.ones(len(src)), (src, dst)), shape=[dataset.n_entities, dataset.n_entities]) g = dgl.DGLGraph(coo, readonly=True, sort_csr=True) g.ndata['id'] = F.arange(0, g.number_of_nodes()) g.edata['id'] = F.tensor(etype_id, F.int64) if args.pickle_graph: with open( os.path.join(args.data_path, args.dataset, pickle_name), 'wb') as graph_file: pickle.dump(g, graph_file) self.g = g self.num_train = len(dataset.train[0]) self.num_valid = len(dataset.valid[0]) self.num_test = len(dataset.test[0]) if args.eval_percent < 1: self.valid = np.random.randint( 0, self.num_valid, size=(int( self.num_valid * args.eval_percent), )) + self.num_train else: self.valid = np.arange(self.num_train, self.num_train + self.num_valid) print('|valid|:', len(self.valid)) if args.eval_percent < 1: self.test = np.random.randint( 0, self.num_test, size=(int(self.num_test * args.eval_percent, ))) self.test += self.num_train + self.num_valid else: self.test = np.arange(self.num_train + self.num_valid, self.g.number_of_edges()) print('|test|:', len(self.test)) self.num_valid = len(self.valid) self.num_test = len(self.test)
def __init__(self, dataset, args): triples = dataset.train + dataset.valid + dataset.test pickle_name = "graph_all.pickle" if args.pickle_graph and os.path.exists( os.path.join(args.data_path, args.dataset, pickle_name)): with open(os.path.join(args.data_path, args.dataset, pickle_name), "rb") as graph_file: g = pickle.load(graph_file) print("Load pickled graph.") else: src = [t[0] for t in triples] etype_id = [t[1] for t in triples] dst = [t[2] for t in triples] coo = sp.sparse.coo_matrix( (np.ones(len(src)), (src, dst)), shape=[dataset.n_entities, dataset.n_entities]) g = dgl.DGLGraph(coo, readonly=True, sort_csr=True) g.ndata["id"] = F.arange(0, g.number_of_nodes()) g.edata["id"] = F.tensor(etype_id, F.int64) if args.pickle_graph: with open( os.path.join(args.data_path, args.dataset, pickle_name), "wb") as graph_file: pickle.dump(g, graph_file) self.g = g self.num_train = len(dataset.train) self.num_valid = len(dataset.valid) self.num_test = len(dataset.test) if args.eval_percent < 1: self.valid = (np.random.randint( 0, self.num_valid, size=(int(self.num_valid * args.eval_percent), )) + self.num_train) else: self.valid = np.arange(self.num_train, self.num_train + self.num_valid) print("|valid|:", len(self.valid)) if args.eval_percent < 1: self.test = np.random.randint( 0, self.num_test, size=(int(self.num_test * args.eval_percent, )), ) self.test += self.num_train + self.num_valid else: self.test = np.arange(self.num_train + self.num_valid, self.g.number_of_edges()) print("|test|:", len(self.test)) self.num_valid = len(self.valid) self.num_test = len(self.test)
def __init__(self, train_data, rank, batch_size, shuffle, rel_weight, neg_sample_size, chunk_size, exclude_positive=False, replacement=False, reset=True, drop_last=True): # seed_edges are the index of triple g = train_data.g seed_edges = train_data.edge_parts[rank] if seed_edges is None: seed_edges = F.arange(0, g.number_of_edges()) assert batch_size % chunk_size == 0, 'batch size {} must be divisible by chunk size {} to enable chunk negative sampling'.format( batch_size, chunk_size) self.rels = g.edata['tid'][seed_edges] heads, tails = g.all_edges(order='eid') self.heads = heads[seed_edges] self.tails = tails[seed_edges] self.node_pool = g.nodes() self.reset = reset self.replacement = replacement # self.chunk_size = chunk_size # self.neg_sample_size = neg_sample_size # TODO mask all false negative rels self.exclude_positive = exclude_positive self.drop_last = drop_last # might be replaced by rel weight vector provided self.rel_weight = th.ones( len(self.rels), dtype=th.float32) if rel_weight is None else rel_weight[seed_edges] # shuffle data if shuffle: # MARK - whether to shuffle data or shuffle indices only? self.node_pool = self.node_pool[th.randperm(len(self.node_pool))] idx = th.randperm(len(self.rels)) self.rels = self.rels[idx] self.heads = self.heads[idx] self.tails = self.tails[idx] # the rel weight need to shuffle together to ensure consistency self.rel_weight = self.rel_weight[idx] self.batch_size = batch_size self.pool_size = self.batch_size // chunk_size * neg_sample_size self.iter_idx = 0 self.pool_idx = 0 self.step = 0
def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype( np.int64) g = dgl.DGLGraph(arr, readonly=True) num_rels = 10 entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1) rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), 0, 1) g.ndata['id'] = F.arange(0, g.number_of_nodes()) rel_ids = np.random.randint(0, num_rels, g.number_of_edges(), dtype=np.int64) g.edata['id'] = F.tensor(rel_ids, F.int64) return g, entity_emb, rel_emb
def ConstructGraph(edges, n_entities, args): pickle_name = 'graph_train.pickle' if args.pickle_graph and os.path.exists(os.path.join(args.data_path, args.dataset, pickle_name)): with open(os.path.join(args.data_path, args.dataset, pickle_name), 'rb') as graph_file: g = pickle.load(graph_file) print('Load pickled graph.') else: src, etype_id, dst = edges coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)), shape=[n_entities, n_entities]) g = dgl.DGLGraph(coo, readonly=True, sort_csr=True) g.ndata['id'] = F.arange(0, g.number_of_nodes()) g.edata['id'] = F.tensor(etype_id, F.int64) if args.pickle_graph: with open(os.path.join(args.data_path, args.dataset, pickle_name), 'wb') as graph_file: pickle.dump(g, graph_file) return g
def segmented_knn_graph(x, k, segs): """Transforms the given point set to a directed graph, whose coordinates are given as a matrix. The predecessors of each point are its k-nearest neighbors. The matrices are concatenated along the first axis, and are segmented by ``segs``. Each block would be transformed into a separate graph. The graphs will be unioned. Parameters ---------- x : Tensor The input tensor. k : int The number of neighbors segs : iterable of int Number of points of each point set. Must sum up to the number of rows in ``x``. Returns ------- DGLGraph The graph. The node IDs are in the same order as ``x``. """ n_total_points, _ = F.shape(x) offset = np.insert(np.cumsum(segs), 0, 0) h_list = F.split(x, segs, 0) dst = [ F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) + offset[i] for i, h_g in enumerate(h_list) ] dst = F.cat(dst, 0) src = F.arange(0, n_total_points).unsqueeze(1).expand(n_total_points, k) dst = F.reshape(dst, (-1, )) src = F.reshape(src, (-1, )) # !!! fix shape adj = sparse.csr_matrix( (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))), shape=(n_total_points, n_total_points)) g = DGLGraph(adj, readonly=True) return g
def check_topk_score2(score_model, exclude_mode): num_entity = 40 num_rels = 4 src = F.arange(0, num_entity) dst1 = src + 1 dst1[num_entity-1] = 0 dst2 = src - 1 dst2[0] = num_entity-1 src = F.cat([src, src], dim=0) dst = F.cat([dst1, dst2], dim=0) src = F.cat([src, src, src, src], dim=0) dst = F.cat([dst, dst, dst, dst], dim=0) etype = F.cat([th.full((num_entity*2,), 0, dtype=th.long), th.full((num_entity*2,), 1, dtype=th.long), th.full((num_entity*2,), 2, dtype=th.long), th.full((num_entity*2,), 3, dtype=th.long)], dim=0) g = dgl.graph((src, dst)) g.edata['tid'] = etype _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode)
def ConstructGraph(edges, n_entities, i, args): pickle_name = "graph_train_{}.pickle".format(i) if args.pickle_graph and os.path.exists( os.path.join(args.data_path, args.dataset, pickle_name)): with open(os.path.join(args.data_path, args.dataset, pickle_name), "rb") as graph_file: g = pickle.load(graph_file) print("Load pickled graph.") else: src = [t[0] for t in edges] etype_id = [t[1] for t in edges] dst = [t[2] for t in edges] coo = sp.sparse.coo_matrix((np.ones(len(src)), (src, dst)), shape=[n_entities, n_entities]) g = dgl.DGLGraph(coo, readonly=True, sort_csr=True) g.ndata["id"] = F.arange(0, g.number_of_nodes()) g.edata["id"] = F.tensor(etype_id, F.int64) if args.pickle_graph: with open(os.path.join(args.data_path, args.dataset, pickle_name), "wb") as graph_file: pickle.dump(g, graph_file) return g
def run_topk_emb2(sfunc, sim_func, emb_model): hidden_dim = 32 num_head = 40 num_tail = 40 num_emb = 80 with tempfile.TemporaryDirectory() as tmpdirname: emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1) create_emb_file(Path(tmpdirname), 'entity.npy', emb.numpy()) create_emb_file(Path(tmpdirname), 'relation.npy', emb.numpy()) emb_model.load(Path(tmpdirname)) head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc, pair_ws=True) scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): j = i hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) print('pass pair wise') head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc) assert len(result1) == 1 scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(tail.shape[0]): hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) emb_ids = F.arange(0, num_emb) result1 = emb_model.embed_sim(emb_ids, emb_ids, 'entity', sfunc=sfunc, bcast=True) result2 = emb_model.embed_sim(embed_type='entity', sfunc=sfunc, bcast=True) assert len(result1) == emb_ids.shape[0] assert len(result2) == emb_ids.shape[0] for i in range(emb_ids.shape[0]): scores = [] head_ids = [] tail_ids = [] for j in range(emb_ids.shape[0]): hemb = F.take(emb, emb_ids[i], 0) temb = F.take(emb, emb_ids[j], 0) score = sim_func(hemb, temb) score = F.asnumpy(score) scores.append(score) tail_ids.append(F.asnumpy(emb_ids[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = np.full((10,), F.asnumpy(emb_ids[i])) tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) r2_head, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r2_tail, tail_ids) print('pass all')
def topK(self, head=None, rel=None, tail=None, exec_mode='all', k=10): if head is None: head = F.arange(0, self.model.num_entity) else: head = F.tensor(head) if rel is None: rel = F.arange(0, self.model.num_rel) else: rel = F.tensor(rel) if tail is None: tail = F.arange(0, self.model.num_entity) else: tail = F.tensor(tail) num_head = F.shape(head)[0] num_rel = F.shape(rel)[0] num_tail = F.shape(tail)[0] if exec_mode == 'triplet_wise': result = [] assert num_head == num_rel, \ 'For triplet wise exection mode, head, relation and tail lists should have same length' assert num_head == num_tail, \ 'For triplet wise exection mode, head, relation and tail lists should have same length' raw_score = self.model.score(head, rel, tail, triplet_wise=True) score = self.score_func(raw_score) idx = F.arange(0, num_head) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] result.append((F.asnumpy(head[idx]), F.asnumpy(rel[idx]), F.asnumpy(tail[idx]), F.asnumpy(score))) elif exec_mode == 'all': result = [] raw_score = self.model.score(head, rel, tail) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_rel * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) rel_idx = idx % num_rel idx = floor_divide(idx, num_rel) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(rel[rel_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_head': result = [] for i in range(num_head): raw_score = self.model.score(F.unsqueeze(head[i], 0), rel, tail) score = self.score_func(raw_score) idx = F.arange(0, num_rel * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) rel_idx = idx % num_rel result.append((np.full((k,), F.asnumpy(head[i])), F.asnumpy(rel[rel_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_rel': result = [] for i in range(num_rel): raw_score = self.model.score(head, F.unsqueeze(rel[i], 0), tail) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_tail) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), np.full((k,), F.asnumpy(rel[i])), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) elif exec_mode == 'batch_tail': result = [] for i in range(num_tail): raw_score = self.model.score(head, rel, F.unsqueeze(tail[i], 0)) score = self.score_func(raw_score) idx = F.arange(0, num_head * num_rel) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] idx = idx[sidx] rel_idx = idx % num_rel idx = floor_divide(idx, num_rel) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(rel[rel_idx]), np.full((k,), F.asnumpy(tail[i])), F.asnumpy(score))) else: assert False, 'unknow execution mode type {}'.format(exec_mode) return result
def topK(self, head=None, tail=None, bcast=False, pair_ws=False, k=10): if head is None: head = F.arange(0, self.emb.shape[0]) else: head = F.tensor(head) if tail is None: tail = F.arange(0, self.emb.shape[0]) else: tail = F.tensor(tail) head_emb = self.emb[head] tail_emb = self.emb[tail] if pair_ws is True: result = [] batch_size = self.batch_size # chunked cal score score = [] num_head = head.shape[0] num_tail = tail.shape[0] for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] sh_emb = F.copy_to(sh_emb, self.device) st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] st_emb = F.copy_to(st_emb, self.device) score.append(F.copy_to(self.sim_func(sh_emb, st_emb, pw=True), F.cpu())) score = F.cat(score, dim=0) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] result.append((F.asnumpy(head[sidx]), F.asnumpy(tail[sidx]), F.asnumpy(score))) else: num_head = head.shape[0] num_tail = tail.shape[0] batch_size = self.batch_size # chunked cal score score = [] for i in range((num_head + batch_size - 1) // batch_size): sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \ if (i + 1) * batch_size < num_head \ else num_head] sh_emb = F.copy_to(sh_emb, self.device) s_score = [] for j in range((num_tail + batch_size - 1) // batch_size): st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \ if (j + 1) * batch_size < num_tail \ else num_tail] st_emb = F.copy_to(st_emb, self.device) s_score.append(F.copy_to(self.sim_func(sh_emb, st_emb), F.cpu())) score.append(F.cat(s_score, dim=1)) score = F.cat(score, dim=0) if bcast is False: result = [] idx = F.arange(0, num_head * num_tail) score = F.reshape(score, (num_head * num_tail, )) sidx = F.argsort(score, dim=0, descending=True) sidx = sidx[:k] score = score[sidx] sidx = sidx idx = idx[sidx] tail_idx = idx % num_tail idx = floor_divide(idx, num_tail) head_idx = idx % num_head result.append((F.asnumpy(head[head_idx]), F.asnumpy(tail[tail_idx]), F.asnumpy(score))) else: # bcast at head result = [] for i in range(num_head): i_score = score[i] sidx = F.argsort(i_score, dim=0, descending=True) idx = F.arange(0, num_tail) i_idx = sidx[:k] i_score = i_score[i_idx] idx = idx[i_idx] result.append((np.full((k,), F.asnumpy(head[i])), F.asnumpy(tail[idx]), F.asnumpy(i_score))) return result
def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, rank=0, rel_parts=None, cross_rels=None, barrier=None): if args.num_proc > 1: th.set_num_threads(args.num_thread) client = connect_to_kvstore(args, entity_pb, relation_pb, l2g) client.barrier() train_time_start = time.time() train(args, model, train_sampler, None, rank, rel_parts, cross_rels, barrier, client) client.barrier() print('Total train time {:.3f} seconds'.format(time.time() - train_time_start)) model = None if client.get_id() % args.num_client == 0: # pull full model from kvstore args.num_test_proc = args.num_client dataset_full = get_dataset(args.data_path, args.dataset, args.format) print('Full data n_entities: ' + str(dataset_full.n_entities)) print("Full data n_relations: " + str(dataset_full.n_relations)) model_test = load_model(None, args, dataset_full.n_entities, dataset_full.n_relations) eval_dataset = EvalDataset(dataset_full, args) if args.test: model_test.share_memory() if args.neg_sample_size_test < 0: args.neg_sample_size_test = dataset_full.n_entities args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." if args.neg_chunk_size_valid < 0: args.neg_chunk_size_valid = args.neg_sample_size_valid if args.neg_chunk_size_test < 0: args.neg_chunk_size_test = args.neg_sample_size_test print("Pull relation_emb ...") relation_id = F.arange(0, model_test.n_relations) relation_data = client.pull(name='relation_emb', id_tensor=relation_id) model_test.relation_emb.emb[relation_id] = relation_data print("Pull entity_emb ... ") # split model into 100 small parts start = 0 percent = 0 entity_id = F.arange(0, model_test.n_entities) count = int(model_test.n_entities / 100) end = start + count while True: print("Pull %d / 100 ..." % percent) if end >= model_test.n_entities: end = -1 tmp_id = entity_id[start:end] entity_data = client.pull(name='entity_emb', id_tensor=tmp_id) model_test.entity_emb.emb[tmp_id] = entity_data if end == -1: break start = end end += count percent += 1 if args.save_emb is not None: if not os.path.exists(args.save_emb): os.mkdir(args.save_emb) model_test.save_emb(args.save_emb, args.dataset) if args.test: args.num_thread = 1 test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_test_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-head', num_workers=args.num_thread, rank=i, ranks=args.num_test_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_test, args.neg_chunk_size_test, args.eval_filter, mode='chunk-tail', num_workers=args.num_thread, rank=i, ranks=args.num_test_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) eval_dataset = None dataset_full = None print("Run test, test processes: %d" % args.num_test_proc) queue = mp.Queue(args.num_test_proc) procs = [] for i in range(args.num_test_proc): proc = mp.Process(target=test_mp, args=(args, model_test, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_test_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) for k, v in metrics.items(): print('Test average {} at [{}/{}]: {}'.format( k, args.step, args.max_step, v)) for proc in procs: proc.join() if client.get_id() == 0: client.shut_down()
def _check_topk_score2(score_model, g, num_entity, num_rels, exclude_mode): hidden_dim = 32 num_entity = 40 num_rels = 4 with tempfile.TemporaryDirectory() as tmpdirname: entity_emb, rel_emb = generate_rand_emb(score_model.model_name, num_entity, num_rels, hidden_dim, 'none') create_emb_file(Path(tmpdirname), 'entity.npy', entity_emb.numpy()) create_emb_file(Path(tmpdirname), 'relation.npy', rel_emb.numpy()) score_model.load(Path(tmpdirname)) score_model.attach_graph(g) score_func = score_model._score_func head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.link_predict(head, tw_rel, tail, exec_mode='triplet_wise', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.zeros((10,)) for i in range(10): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = 1 else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) else: assert r1_mask is None # exec_mode==all result1 = score_model.link_predict(head, rel, tail, topk=20, exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(head=head, tail=tail, topk=20, exclude_mode=exclude_mode, batch_size=16) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.zeros((20,)) for i in range(20): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = 1 else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 20: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[0] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(head=head, tail=tail, exec_mode='batch_rel', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for i in range(10): if (head_ids[i] + 1) % num_entity == tail_ids[i] or \ (head_ids[i] - 1) % num_entity == tail_ids[i]: mask[i] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[j] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16) result2 = score_model.link_predict(exec_mode='batch_head', exclude_mode=exclude_mode, batch_size=16) assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for l in range(10): if (head_ids[l] + 1) % num_entity == tail_ids[l] or \ (head_ids[l] - 1) % num_entity == tail_ids[l]: mask[l] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[i] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[i] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None result1 = score_model.link_predict(head, rel, tail, exec_mode='batch_tail', exclude_mode=exclude_mode) result2 = score_model.link_predict(exec_mode='batch_tail', exclude_mode=exclude_mode) assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] if exclude_mode is None or exclude_mode == 'mask': idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] if exclude_mode == 'mask': mask = np.full((10,), False) for l in range(10): if (head_ids[l] + 1) % num_entity == tail_ids[l] or \ (head_ids[l] - 1) % num_entity == tail_ids[l]: mask[l] = True else: c_head_idx = [] c_rel_idx = [] c_tail_idx = [] c_score_topk = [] cur_idx = 0 while len(c_head_idx) < 10: c_idx = idx[cur_idx] cur_idx += 1 if (head_ids[c_idx] + 1) % num_entity == tail_ids[c_idx] or \ (head_ids[c_idx] - 1) % num_entity == tail_ids[c_idx]: continue c_head_idx.append(head_ids[c_idx]) c_tail_idx.append(tail_ids[c_idx]) c_rel_idx.append(rel_ids[c_idx]) c_score_topk.append(scores[c_idx]) head_ids = F.tensor(c_head_idx) rel_ids = F.tensor(c_rel_idx) tail_ids = F.tensor(c_tail_idx) score_topk = F.tensor(c_score_topk) r1_head, r1_rel, r1_tail, r1_score, r1_mask = result1[k] r2_head, r2_rel, r2_tail, r2_score, r2_mask = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) if exclude_mode == 'mask': np.testing.assert_allclose(r1_mask, mask) np.testing.assert_allclose(r2_mask, mask) else: assert r1_mask is None assert r2_mask is None
def run_topk_emb(sfunc, sim_func, create_emb_sim=create_kge_emb_sim): hidden_dim = 32 num_head = 40 num_tail = 40 num_emb = 80 emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1) head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) sim_infer = create_emb_sim(emb, sfunc) result1 = sim_infer.topK(head, tail, pair_ws=True) scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): j = i hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) print('pass pair wise') head = F.arange(0, num_head) tail = F.arange(num_head, num_head+num_tail) result1 = sim_infer.topK(head, tail) assert len(result1) == 1 scores = [] head_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(tail.shape[0]): hemb = F.take(emb, head[i], 0) temb = F.take(emb, tail[j], 0) score = sim_func(hemb, temb) scores.append(F.asnumpy(score)) head_ids.append(F.asnumpy(head[i])) tail_ids.append(F.asnumpy(tail[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) emb_ids = F.arange(0, num_emb) result1 = sim_infer.topK(emb_ids, emb_ids, bcast=True) result2 = sim_infer.topK(bcast=True) assert len(result1) == emb_ids.shape[0] assert len(result2) == emb_ids.shape[0] for i in range(emb_ids.shape[0]): scores = [] head_ids = [] tail_ids = [] for j in range(emb_ids.shape[0]): hemb = F.take(emb, emb_ids[i], 0) temb = F.take(emb, emb_ids[j], 0) score = sim_func(hemb, temb) score = F.asnumpy(score) scores.append(score) tail_ids.append(F.asnumpy(emb_ids[j])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = np.full((10,), F.asnumpy(emb_ids[i])) tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_tail, r1_score = result1[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_tail, tail_ids) r2_head, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r2_tail, tail_ids) print('pass all')
def check_topk_score(model_name): hidden_dim = 32 gamma = 12.0 num_entity = 40 num_rels = 4 score_model = ScoreInfer(-1, 'config', 'path', 'none') if model_name == 'TransE' or \ model_name =='TransE_l1' or \ model_name == 'TransE_l2' or \ model_name == 'DistMult' or \ model_name == 'ComplEx': model = InferModel('cpu', model_name, hidden_dim, batch_size=16) elif model_name == 'RESCAL': model = InferModel('cpu', model_name, hidden_dim) elif model_name == 'RotatE': model = InferModel('cpu', model_name, hidden_dim, double_entity_emb=True) entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels, hidden_dim, 'none') model.entity_emb = InferEmbedding('cpu') model.entity_emb.emb = entity_emb model.relation_emb = InferEmbedding('cpu') model.relation_emb.emb = rel_emb score_model.model = model score_func = model.score_func head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise') assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) # exec_mode==all result1 = score_model.topK(head, rel, tail, k=20) result2 = score_model.topK(head=head, tail=tail, k=20) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] r2_head, r2_rel, r2_tail, r2_score = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel') result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel') assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[j] r2_head, r2_rel, r2_tail, r2_score = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.topK(head, rel, tail, exec_mode='batch_head') result2 = score_model.topK(exec_mode='batch_head') assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[i] r2_head, r2_rel, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail') result2 = score_model.topK(exec_mode='batch_tail') assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[k] r2_head, r2_rel, r2_tail, r2_score = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)
def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, rank=0, rel_parts=None, cross_rels=None, barrier=None): if args.num_proc > 1: th.set_num_threads(args.num_thread) client = connect_to_kvstore(args, entity_pb, relation_pb, l2g) client.barrier() train_time_start = time.time() train(args, model, train_sampler, None, rank, rel_parts, cross_rels, barrier, client) total_train_time = time.time() - train_time_start client.barrier() # Release the memory of local model model = None if (client.get_machine_id() == 0) and (client.get_id() % args.num_client == 0): # pull full model from kvstore # Pull model from kvstore args.num_test_proc = args.num_client dataset_full = dataset = get_dataset(args.data_path, args.dataset, args.format, args.delimiter, args.data_files) args.train = False args.valid = False args.test = True args.strict_rel_part = False args.soft_rel_part = False args.async_update = False args.eval_filter = not args.no_eval_filter if args.neg_deg_sample_eval: assert not args.eval_filter, "if negative sampling based on degree, we can't filter positive edges." print('Full data n_entities: ' + str(dataset_full.n_entities)) print("Full data n_relations: " + str(dataset_full.n_relations)) eval_dataset = EvalDataset(dataset_full, args) if args.neg_sample_size_eval < 0: args.neg_sample_size_eval = dataset_full.n_entities args.batch_size_eval = get_compatible_batch_size( args.batch_size_eval, args.neg_sample_size_eval) model_test = load_model(args, dataset_full.n_entities, dataset_full.n_relations) print("Pull relation_emb ...") relation_id = F.arange(0, model_test.n_relations) relation_data = client.pull(name='relation_emb', id_tensor=relation_id) model_test.relation_emb.emb[relation_id] = relation_data print("Pull entity_emb ... ") # split model into 100 small parts start = 0 percent = 0 entity_id = F.arange(0, model_test.n_entities) count = int(model_test.n_entities / 100) end = start + count while True: print("Pull model from kvstore: %d / 100 ..." % percent) if end >= model_test.n_entities: end = -1 tmp_id = entity_id[start:end] entity_data = client.pull(name='entity_emb', id_tensor=tmp_id) model_test.entity_emb.emb[tmp_id] = entity_data if end == -1: break start = end end += count percent += 1 if not args.no_save_emb: print("save model to %s ..." % args.save_path) save_model(args, model_test) print('Total train time {:.3f} seconds'.format(total_train_time)) if args.test: model_test.share_memory() start = time.time() test_sampler_tails = [] test_sampler_heads = [] for i in range(args.num_test_proc): test_sampler_head = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-head', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_tail = eval_dataset.create_sampler( 'test', args.batch_size_eval, args.neg_sample_size_eval, args.neg_sample_size_eval, args.eval_filter, mode='chunk-tail', num_workers=args.num_workers, rank=i, ranks=args.num_test_proc) test_sampler_heads.append(test_sampler_head) test_sampler_tails.append(test_sampler_tail) eval_dataset = None dataset_full = None print("Run test, test processes: %d" % args.num_test_proc) queue = mp.Queue(args.num_test_proc) procs = [] for i in range(args.num_test_proc): proc = mp.Process(target=test_mp, args=(args, model_test, [ test_sampler_heads[i], test_sampler_tails[i] ], i, 'Test', queue)) procs.append(proc) proc.start() total_metrics = {} metrics = {} logs = [] for i in range(args.num_test_proc): log = queue.get() logs = logs + log for metric in logs[0].keys(): metrics[metric] = sum([log[metric] for log in logs]) / len(logs) print("-------------- Test result --------------") for k, v in metrics.items(): print('Test average {} : {}'.format(k, v)) print("-----------------------------------------") for proc in procs: proc.join() print('testing takes {:.3f} seconds'.format(time.time() - start)) client.shut_down() # shut down kvserver
def part_edge(self, rank, world_size, mode): edges = self.edge_parts[rank] if edges is None: edges = F.arange(0, self.g.number_of_edges()) return edges
def check_topk_score(model_name): hidden_dim = 32 gamma = 12.0 num_entity = 40 num_rels = 4 entity_emb, rel_emb = generate_rand_emb(model_name, num_entity, num_rels, hidden_dim, 'none') score_model, score_func = create_score_infer(model_name, entity_emb, rel_emb) head = F.arange(0, num_entity // 2) rel = F.arange(0, num_rels) tail = F.arange(num_entity // 2, num_entity) # exec_model==triplet_wise tw_rel = np.random.randint(0, num_rels, num_entity // 2) tw_rel = F.tensor(tw_rel) result1 = score_model.topK(head, tw_rel, tail, exec_mode='triplet_wise') assert len(result1) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, tw_rel[i], 0) temb = F.unsqueeze(F.take(entity_emb, tail[i], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(tw_rel[i])) tail_ids.append(F.asnumpy(tail[i])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) # exec_mode==all result1 = score_model.topK(head, rel, tail, k=20) result2 = score_model.topK(head=head, tail=tail, k=20) assert len(result1) == 1 assert len(result2) == 1 scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:20] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[0] r2_head, r2_rel, r2_tail, r2_score = result2[0] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_rel') result2 = score_model.topK(head=head, tail=tail, exec_mode='batch_rel') assert len(result1) == num_rels assert len(result2) == num_rels for j in range(rel.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[j] r2_head, r2_rel, r2_tail, r2_score = result2[j] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) head = F.arange(0, num_entity) rel = F.arange(0, num_rels) tail = F.arange(0, num_entity) result1 = score_model.topK(head, rel, tail, exec_mode='batch_head') result2 = score_model.topK(exec_mode='batch_head') assert len(result1) == num_entity assert len(result2) == num_entity for i in range(head.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for j in range(rel.shape[0]): for k in range(tail.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[i] r2_head, r2_rel, r2_tail, r2_score = result2[i] np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5) np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) result1 = score_model.topK(head, rel, tail, exec_mode='batch_tail') result2 = score_model.topK(exec_mode='batch_tail') assert len(result1) == num_entity assert len(result2) == num_entity for k in range(tail.shape[0]): scores = [] head_ids = [] rel_ids = [] tail_ids = [] for i in range(head.shape[0]): for j in range(rel.shape[0]): hemb = F.take(entity_emb, head[i], 0) remb = F.take(rel_emb, rel[j], 0) temb = F.unsqueeze(F.take(entity_emb, tail[k], 0), dim=0) edge = FakeEdge(hemb, temb, remb) score = F.asnumpy(score_func.edge_func(edge)['score']) scores.append(score) head_ids.append(F.asnumpy(head[i])) rel_ids.append(F.asnumpy(rel[j])) tail_ids.append(F.asnumpy(tail[k])) scores = np.asarray(scores) scores = scores.reshape(scores.shape[0]) head_ids = np.asarray(head_ids) rel_ids = np.asarray(rel_ids) tail_ids = np.asarray(tail_ids) idx = np.argsort(scores) idx = idx[::-1] idx = idx[:10] head_ids = head_ids[idx] rel_ids = rel_ids[idx] tail_ids = tail_ids[idx] score_topk = scores[idx] r1_head, r1_rel, r1_tail, r1_score = result1[k] r2_head, r2_rel, r2_tail, r2_score = result2[k] np.testing.assert_allclose(r1_head, head_ids) np.testing.assert_allclose(r2_head, head_ids) np.testing.assert_allclose(r1_rel, rel_ids) np.testing.assert_allclose(r2_rel, rel_ids) np.testing.assert_allclose(r1_tail, tail_ids) np.testing.assert_allclose(r2_tail, tail_ids) np.testing.assert_allclose(r1_score, score_topk, rtol=1e-6, atol=1e-6) np.testing.assert_allclose(r2_score, score_topk, rtol=1e-6, atol=1e-6)