def train(self, train_data, tester_val, tester_tst): head, tail, rela = train_data # useful information related to cache n_train = len(head) if self.args.optim=='adam' or self.args.optim=='Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim=='adagrad' or self.args.optim=='Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_epoch = self.args.n_epoch n_batch = self.args.n_batch best_mrr = 0 # used for counting repeated triplets for margin based loss for epoch in range(n_epoch): start = time.time() self.epoch = epoch rand_idx = torch.randperm(n_train) head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() epoch_loss = 0 for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(h, t, r) loss += self.args.lamb * self.model.regul loss.backward() self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() self.time_tot += time.time() - start scheduler.step() if (epoch+1) % self.args.epoch_per_test == 0: # output performance valid_mrr, valid_mr, valid_10 = tester_val() test_mrr, test_mr, test_10 = tester_tst() out_str = '%.4f\t\t%.4f\t%.4f\t%.4f\n'%(epoch + 1, test_mr, test_mrr, test_10) # output the best performance info if valid_mrr > best_mrr: best_mrr = valid_mrr best_str = out_str if best_mrr < self.args.thres: print('\tearly stopped in Epoch:{}, best_mrr:{}'.format(epoch+1, best_mrr), self.model.struct) return best_str return best_mrr, best_str
def test_link(self, test_data, n_ent, heads, tails, filt=True): mrr_tot = 0. mr_tot = 0 #hit10_tot = 0 hit_tot = np.zeros((3, )) count = 0 for batch_h, batch_t, batch_r in batch_by_size( self.args.test_batch_size, *test_data): batch_size = batch_h.size(0) head_val = Variable( batch_h.unsqueeze(1).expand(batch_size, n_ent).cuda()) tail_val = Variable( batch_t.unsqueeze(1).expand(batch_size, n_ent).cuda()) rela_val = Variable( batch_r.unsqueeze(1).expand(batch_size, n_ent).cuda()) all_val = Variable( torch.arange(0, n_ent).unsqueeze(0).expand( batch_size, n_ent).type(torch.LongTensor).cuda()) batch_head_scores = self.model.score(all_val, tail_val, rela_val).data batch_tail_scores = self.model.score(head_val, all_val, rela_val).data # for each positive, compute its head scores and tail scores for h, t, r, head_score, tail_score in zip(batch_h, batch_t, batch_r, batch_head_scores, batch_tail_scores): h_idx = int(h.data.cpu().numpy()) t_idx = int(t.data.cpu().numpy()) r_idx = int(r.data.cpu().numpy()) if filt: # filtered setting if tails[(h_idx, r_idx)]._nnz() > 1: tmp = tail_score[t_idx].data.cpu().numpy() idx = tails[(h_idx, r_idx)]._indices() tail_score[idx] = 1e20 tail_score[t_idx] = torch.from_numpy(tmp).cuda() if heads[(t_idx, r_idx)]._nnz() > 1: tmp = head_score[h_idx].data.cpu().numpy() idx = heads[(t_idx, r_idx)]._indices() head_score[idx] = 1e20 head_score[h_idx] = torch.from_numpy(tmp).cuda() mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(head_score, h_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 2 logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d', float(mrr_tot) / count, float(mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[2] / count, count) return float(mrr_tot) / count, mr_tot / count, hit_tot[ 0] / count, hit_tot[1] / count, hit_tot[2] / count
def train(self, train_data, valid_data, tester_val, tester_tst, tester_trip=None): self.tester_val = tester_val if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_epoch = self.args.n_epoch n_batch = self.args.n_batch self.best_mrr = 0 # useful information related to cache n_train = train_data.size(0) n_valid = valid_data.size(0) self.good_struct = [] for epoch in range(n_epoch): self.model.train() start = time.time() self.epoch = epoch rand_idx = torch.randperm(n_train) if self.GPU: train_data = train_data[rand_idx].cuda() else: train_data = train_data[rand_idx] epoch_loss = 0 for facts in batch_by_size(n_batch, train_data, n_sample=n_train): self.model.zero_grad() if self.n_arity == 3: loss = self.model.forward(facts, self.op_idx) loss.backward() elif self.n_arity == 4: loss = self.model.forward_tri(facts, self.op_idx) loss.backward() """kge step""" self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() scheduler.step() self.time_tot += time.time() - start print("Epoch: %d/%d, Loss=%.8f, Time=%.4f" % (epoch + 1, n_epoch, epoch_loss / n_train, time.time() - start)) if (epoch + 1) % self.args.epoch_per_test == 0: valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val() test_mrr, test_mr, test_1, test_3, test_10 = tester_tst() if tester_trip is None: out_str = '%d\t%.2f %.2f \t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train, \ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) with open(self.args.perf_file, 'a') as f: f.write(out_str) if test_mrr > self.best_mrr: self.best_mrr = test_mrr with open(self.args.perf_file, 'a') as f: f.write("arch:" + str(self.op_idx.tolist()) + "\n") f.write("best mrr:" + str(self.best_mrr) + "\n") return self.best_mrr
def evaluate(self, test_data, e1_sp, e2_sp, e3_sp, arch=None): mrr_tot = 0. mr_tot = 0 hit_tot = np.zeros((3, )) count = 0 # if arch is None: # max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1) # else: # max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()]) self.model.eval() max_idx = self.op_idx for facts in batch_by_size(self.args.test_batch_size, test_data): if self.GPU: r, e1, e2, e3 = facts[:, 0].cuda(), facts[:, 1].cuda( ), facts[:, 2].cuda(), facts[:, 3].cuda() else: r, e1, e2, e3 = facts[:, 0], facts[:, 1], facts[:, 2], facts[:, 3] length = self.n_dim // self.K # r_embed = self.model.rel_embed(r).view(-1, self.K, length) # e1_embed = self.model.ent_embed(e1).view(-1, self.K, length) # e2_embed = self.model.ent_embed(e2).view(-1, self.K, length) # e3_embed = self.model.ent_embed(e3).view(-1, self.K, length) #print(max_idx) r_embed = self.model.bnr(self.model.rel_embed(r)).view( -1, self.K, length) e1_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e1))).view( -1, self.K, length) e2_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e2))).view( -1, self.K, length) e3_embed = self.model.input_dropout( self.model.bne(self.model.ent_embed(e3))).view( -1, self.K, length) e1_scores = F.softmax(self.model.tri_neg_other( r_embed, e2_embed, e3_embed, max_idx, 1), dim=1).data e2_scores = F.softmax(self.model.tri_neg_other( r_embed, e1_embed, e3_embed, max_idx, 2), dim=1).data e3_scores = F.softmax(self.model.tri_neg_other( r_embed, e1_embed, e2_embed, max_idx, 3), dim=1).data for idx in range(len(r)): r_idx, e1_idx, e2_idx, e3_idx = int( r[idx].data.cpu().numpy()), int( e1[idx].data.cpu().numpy()), int( e2[idx].data.cpu().numpy()), int( e3[idx].data.cpu().numpy()) if e1_sp[(r_idx, e2_idx, e3_idx)]._nnz() > 1: tmp = e1_scores[idx][e1_idx].data.cpu().numpy() indic = e1_sp[(r_idx, e2_idx, e3_idx)]._indices() e1_scores[idx][indic] = 0.0 if self.GPU: e1_scores[idx][e1_idx] = torch.from_numpy(tmp).cuda() else: e1_scores[idx][e1_idx] = torch.from_numpy(tmp) if e2_sp[(r_idx, e1_idx, e3_idx)]._nnz() > 1: tmp = e2_scores[idx][e2_idx].data.cpu().numpy() indic = e2_sp[(r_idx, e1_idx, e3_idx)]._indices() e2_scores[idx][indic] = 0.0 if self.GPU: e2_scores[idx][e2_idx] = torch.from_numpy(tmp).cuda() else: e2_scores[idx][e2_idx] = torch.from_numpy(tmp) if e3_sp[(r_idx, e1_idx, e2_idx)]._nnz() > 1: tmp = e3_scores[idx][e3_idx].data.cpu().numpy() indic = e3_sp[(r_idx, e1_idx, e2_idx)]._indices() e3_scores[idx][indic] = 0.0 if self.GPU: e3_scores[idx][e3_idx] = torch.from_numpy(tmp).cuda() else: e3_scores[idx][e3_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(e1_scores[idx], e1_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(e2_scores[idx], e2_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(e3_scores[idx], e3_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 3 #if arch is None: logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d', float(mrr_tot) / count, float(mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[2] / count, count) return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, embed_time, mrr_time
def test_link(self, test_data, n_ent, heads, tails, filt=True, arch=None): mrr_tot = 0. mr_tot = 0 hit_tot = np.zeros((3, )) count = 0 # if arch is None: # max_idx = torch.Tensor(self.asng.p_model.theta).argmax(1) # else: # max_idx = torch.LongTensor([item.index(True) for item in arch.tolist()]) max_idx = self.op_idx for facts in batch_by_size(self.args.test_batch_size, test_data): if self.GPU: batch_h = facts[:, 0].cuda() batch_t = facts[:, 1].cuda() batch_r = facts[:, 2].cuda() else: batch_h = facts[:, 0] batch_t = facts[:, 1] batch_r = facts[:, 2] length = self.n_dim // self.K h_embed = self.model.ent_embed(batch_h).view(-1, self.K, length) t_embed = self.model.ent_embed(batch_t).view(-1, self.K, length) r_embed = self.model.rel_embed(batch_r).view(-1, self.K, length) head_scores = torch.sigmoid( self.model.bin_neg_other(r_embed, t_embed, max_idx, 1)).data tail_scores = torch.sigmoid( self.model.bin_neg_other(r_embed, h_embed, max_idx, 2)).data for h, t, r, head_score, tail_score in zip(batch_h, batch_t, batch_r, head_scores, tail_scores): h_idx = int(h.data.cpu().numpy()) t_idx = int(t.data.cpu().numpy()) r_idx = int(r.data.cpu().numpy()) if filt: # filter if tails[(h_idx, r_idx)]._nnz() > 1: tmp = tail_score[t_idx].data.cpu().numpy() idx = tails[(h_idx, r_idx)]._indices() tail_score[idx] = 0.0 if self.GPU: tail_score[t_idx] = torch.from_numpy(tmp).cuda() else: tail_score[t_idx] = torch.from_numpy(tmp) if heads[(t_idx, r_idx)]._nnz() > 1: tmp = head_score[h_idx].data.cpu().numpy() idx = heads[(t_idx, r_idx)]._indices() head_score[idx] = 0.0 if self.GPU: head_score[h_idx] = torch.from_numpy(tmp).cuda() else: head_score[h_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(head_score, h_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 2 if arch is None: logging.info('Test_MRR=%f, Test_MR=%f, Test_H=%f %f %f, Count=%d', float(mrr_tot) / count, float(mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[2] / count, count) return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, total_loss/n_test
def test_link(self, struct, test, randint, test_data, n_ent, heads, tails, filt=True): mrr_tot = 0. mr_tot = 0. hit_tot = np.zeros((3, )) count = 0 self.n_cluster = len(struct) test_batch_size = self.args.n_batch head, tail, rela = test_data if randint is None: rand_idx = torch.randperm(len(head)) else: np.random.seed(randint) rand_idx = torch.LongTensor(np.random.permutation(len(head))) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] for batch_h, batch_t, batch_r in batch_by_size(test_batch_size, head, tail, rela): if self.GPU: batch_h = batch_h.cuda() batch_t = batch_t.cuda() batch_r = batch_r.cuda() else: batch_h = batch_h batch_t = batch_t batch_r = batch_r h_embed = self.model.ent_embed(batch_h).view( -1, self.K, self.n_dim // self.K) t_embed = self.model.ent_embed(batch_t).view( -1, self.K, self.n_dim // self.K) length = self.n_dim // self.K # create a rela_embed with size (n_rel, 2K+1, length) rel_embed_pos = self.model.rel_embed.weight.view( -1, self.K, length) rel_embed_neg = -rel_embed_pos if self.GPU: rel_embed_zeros = torch.zeros(self.n_rel, 1, length).cuda() else: rel_embed_zeros = torch.zeros(self.n_rel, 1, length) self.rel_embed_2K_1 = torch.cat( (rel_embed_zeros, rel_embed_pos, rel_embed_neg), 1) # combine struct if self.GPU: self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length).cuda() else: self.r_embed = torch.zeros(self.n_rel, self.K * self.K, length) for i_rc in range(self.n_cluster): max_idx_list = struct[i_rc] self.r_embed[ self.cluster_rela_dict[i_rc], :, :] = self.rel_embed_2K_1[ self.cluster_rela_dict[i_rc]][:, max_idx_list, :] self.r_embed = self.r_embed.view(-1, self.K, self.K, length) self.r_embed = self.r_embed[batch_r, :, :, :] head_scores = torch.sigmoid( self.model.test_head(self.r_embed, t_embed)).data tail_scores = torch.sigmoid( self.model.test_tail(h_embed, self.r_embed)).data for h, t, r, head_score, tail_score in zip(batch_h, batch_t, batch_r, head_scores, tail_scores): h_idx = int(h.data.cpu().numpy()) t_idx = int(t.data.cpu().numpy()) r_idx = int(r.data.cpu().numpy()) if filt: # filter if tails[(h_idx, r_idx)]._nnz() > 1: tmp = tail_score[t_idx].data.cpu().numpy() idx = tails[(h_idx, r_idx)]._indices() tail_score[idx] = 0.0 if self.GPU: tail_score[t_idx] = torch.from_numpy(tmp).cuda() else: tail_score[t_idx] = torch.from_numpy(tmp) if heads[(t_idx, r_idx)]._nnz() > 1: tmp = head_score[h_idx].data.cpu().numpy() idx = heads[(t_idx, r_idx)]._indices() head_score[idx] = 0.0 if self.GPU: head_score[h_idx] = torch.from_numpy(tmp).cuda() else: head_score[h_idx] = torch.from_numpy(tmp) mrr, mr, hit = mrr_mr_hitk(tail_score, t_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit mrr, mr, hit = mrr_mr_hitk(head_score, h_idx) mrr_tot += mrr mr_tot += mr hit_tot += hit count += 2 if not test: break # one mini batch return float(mrr_tot) / count, float( mr_tot) / count, hit_tot[0] / count, hit_tot[1] / count, hit_tot[ 2] / count #, total_loss/n_test
def train_stand(self, train_data, valid_data, derived_struct, rela_cluster, mrr): self.rela_to_dict(rela_cluster) #self.args.perf_file = os.path.join(self.args.out_dir, self.args.dataset + '_std_' + str(self.args.m) + "_" + str(self.args.n) + "_" + str(mrr) + '.txt') #plot_config(self.args) head, tail, rela = train_data n_train = len(head) if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_batch = self.args.n_batch best_mrr = 0 start = time.time() for epoch in range(self.args.n_stand_epoch): #self.epoch = epoch rand_idx = torch.randperm(n_train) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] epoch_loss = 0 n_iters = 0 #lr = scheduler.get_lr()[0] # train model weights for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(derived_struct, h, t, r, self.cluster_rela_dict) loss += self.args.lamb * self.model.regul loss.backward() self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() n_iters += 1 scheduler.step() print("Epoch: %d/%d, Loss=%.2f, Stand Time=%.2f" % (epoch + 1, self.args.n_stand_epoch, time.time() - start, epoch_loss / n_train)) if (epoch + 1) % 5 == 0: test, randint = True, None valid_mrr, valid_mr, valid_1, valid_3, valid_10 = self.tester_val( derived_struct, test, randint) test_mrr, test_mr, test_1, test_3, test_10 = self.tester_tst( derived_struct, test, randint) out_str = '%d \t %.2f \t %.2f \t %.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, epoch_loss/n_train,\ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) # output the best performance info if test_mrr > best_mrr: best_mrr = test_mrr best_str = out_str with open(self.args.perf_file, 'a+') as f: f.write(out_str) with open(self.args.perf_file, 'a+') as f: f.write("best performance:" + best_str + "\n") f.write("struct:" + str(derived_struct) + "\n") f.write("rela:" + str(rela_cluster) + "\n") return best_mrr
def train_oas(self, train_data, valid_data, derived_struct): head, tail, rela = train_data n_train = len(head) if self.args.optim == 'adam' or self.args.optim == 'Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr) elif self.args.optim == 'adagrad' or self.args.optim == 'Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr) scheduler = ExponentialLR(self.optimizer, self.args.decay_rate) n_batch = self.args.n_batch for epoch in range(self.args.n_oas_epoch): start = time.time() rand_idx = torch.randperm(n_train) if self.GPU: head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() else: head = head[rand_idx] tail = tail[rand_idx] rela = rela[rand_idx] epoch_loss = 0 n_iters = 0 # train model weights for h, t, r in batch_by_size(n_batch, head, tail, rela, n_sample=n_train): self.model.zero_grad() loss = self.model.forward(derived_struct, h, t, r, self.cluster_rela_dict) loss += self.args.lamb * self.model.regul loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip) self.optimizer.step() self.prox_operator() epoch_loss += loss.data.cpu().numpy() n_iters += 1 scheduler.step() if self.cluster_way == "scu": self.rela_cluster = self.cluster() self.rela_cluster_history.append(self.rela_cluster) self.rela_to_dict(self.rela_cluster) # train controller self.train_controller() # derive structs self.time_tot += time.time( ) - start # evaluation for the derived architecture is unnessary in searching procedure derived_struct, test_mrr = self.derive(sample_num=1) print( "Epoch: %d/%d, Search Time=%.2f, Loss=%.2f, Sampled Val MRR=%.8f, Tst MRR=%.8f" % (epoch + 1, self.args.n_oas_epoch, self.time_tot, epoch_loss / n_train, self.derived_raward_history[-1], test_mrr))
def train(self, train_data, caches, corrupter, tester_val, tester_tst): head, tail, rela = train_data # useful information related to cache head_idx, tail_idx, self.head_cache, self.tail_cache, self.head_pos, self.tail_pos = caches n_train = len(head) if self.args.optim=='adam' or self.args.optim=='Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) elif self.args.optim=='adagrad' or self.args.optim=='adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) n_epoch = self.args.n_epoch n_batch = self.args.n_batch best_mrr = 0 for epoch in range(n_epoch): start = time.time() self.epoch = epoch rand_idx = torch.randperm(n_train) head = head[rand_idx].cuda() tail = tail[rand_idx].cuda() rela = rela[rand_idx].cuda() head_idx = head_idx[rand_idx.numpy()] tail_idx = tail_idx[rand_idx.numpy()] epoch_loss = 0 if self.args.save and epoch==self.args.s_epoch: self.save(os.path.join(self.args.task_dir, self.args.model + '.mdl')) for h, t, r, h_idx, t_idx in batch_by_size(n_batch, head, tail, rela, head_idx, tail_idx, n_sample=n_train): self.model.zero_grad() h_rand, t_rand = self.neg_sample(h, t, r, h_idx, t_idx, self.args.sample, self.args.loss) # Bernoulli sampling to select (h', r, t) and (h, r, t') prob = corrupter.bern_prob[r] selection = torch.bernoulli(prob).type(torch.ByteTensor) n_h = torch.LongTensor(h.cpu().numpy()).cuda() n_t = torch.LongTensor(t.cpu().numpy()).cuda() n_r = torch.LongTensor(r.cpu().numpy()).cuda() if n_h.size() != h_rand.size(): n_h = n_h.unsqueeze(1).expand_as(h_rand) n_t = n_t.unsqueeze(1).expand_as(h_rand) n_r = n_r.unsqueeze(1).expand_as(h_rand) h = h.unsqueeze(1) r = r.unsqueeze(1) t = t.unsqueeze(1) n_h[selection] = h_rand[selection] n_t[~selection] = t_rand[~selection] if not (self.args.sample=='bern'): self.update_cache(h, t, r, h_idx, t_idx) if self.args.loss == 'point': p_loss = self.model.point_loss(h, t, r, 1) n_loss = self.model.point_loss(n_h, n_t, n_r, -1) loss = p_loss + n_loss else: loss = self.model.pair_loss(h, t, r, n_h, n_t) loss.backward() self.optimizer.step() epoch_loss += loss.data.cpu().numpy() # get the time of each epoch self.time_tot += time.time() - start print("Epoch: %d/%d, Loss=%.8f, Time=%.4f"%(epoch+1, n_epoch, epoch_loss/n_train, time.time()-start)) if (epoch+1) % self.args.epoch_per_test == 0: # output performance valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val() test_mrr, test_mr, test_1, test_3, test_10 = tester_tst() out_str = '%d\t%.2f\t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, \ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) with open(self.args.perf_file, 'a') as f: f.write(out_str) # remove false negative if self.args.remove: self.remove_positive(self.args.remove) # output the best performance info if valid_mrr > best_mrr: best_mrr = valid_mrr best_str = out_str return best_str
def train(self, train_data, caches, corrupter, tester_val, tester_tst): heads, tails, relas = train_data # useful information related to cache head_idxs, tail_idxs, self.head_cache, self.tail_cache, self.head_pos, self.tail_pos = caches self.head_score = np.random.randn(len(self.head_cache), self.args.N_1) self.tail_score = np.random.randn(len(self.tail_cache), self.args.N_1) n_train = len(heads) if self.args.optim=='adam' or self.args.optim=='Adam': self.optimizer = Adam(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) elif self.args.optim=='adagrad' or self.args.optim=='Adagrad': self.optimizer = Adagrad(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) else: self.optimizer = SGD(self.model.parameters(), lr=self.args.lr, weight_decay=self.weight_decay) n_epoch = self.args.n_epoch n_batch = self.args.n_batch best_mrr = 0 losses = [] for epoch in range(n_epoch): start = time.time() self.epoch = epoch # positive sampling logits = self.cache_score quant_lo = np.quantile(logits, 0.2) quant_hi = np.quantile(logits, 0.8) logits[logits<quant_lo] = quant_lo logits[logits>quant_hi] = quant_hi logits = (logits-quant_lo)/(quant_hi - quant_lo) logits = logits * self.args.alpha_1 probb = np.exp(logits) / np.exp(logits).sum() if epoch == 0: # use uniform sampling for the first epoch probb = np.ones((n_train,)) / n_train indices = np.random.choice(n_train, n_train, replace=False, p=probb) rand_idx = torch.LongTensor(indices) head = heads[rand_idx].cuda() tail = tails[rand_idx].cuda() rela = relas[rand_idx].cuda() head_idx = head_idxs[indices] tail_idx = tail_idxs[indices] epoch_loss = 0 if self.args.save and epoch==self.args.s_epoch: self.save(os.path.join(self.args.task_dir, self.args.model + '.mdl')) iters = 0 for h, t, r, h_idx, t_idx, idx, in batch_by_size(n_batch, head, tail, rela, head_idx, tail_idx, indices, n_sample=n_train): self.model.zero_grad() h_rand, t_rand = self.neg_sample(h, t, r, h_idx, t_idx, self.args.sample, self.args.loss) # Bernoulli sampling to select (h', r, t) and (h, r, t') prob = corrupter.bern_prob[r] selection = torch.bernoulli(prob).type(torch.ByteTensor).cuda() n_h = torch.LongTensor(h.cpu().numpy()).cuda() n_t = torch.LongTensor(t.cpu().numpy()).cuda() n_r = torch.LongTensor(r.cpu().numpy()).cuda() if n_h.size() != h_rand.size(): n_h = n_h.unsqueeze(1).expand_as(h_rand) n_t = n_t.unsqueeze(1).expand_as(h_rand) n_r = n_r.unsqueeze(1).expand_as(h_rand) h = h.unsqueeze(1) r = r.unsqueeze(1) t = t.unsqueeze(1) n_h[selection] = h_rand[selection] n_t[~selection] = t_rand[~selection] if not (self.args.sample=='bern') and iters % self.args.lazy==0: self.update_cache(h, t, r, idx, h_idx, t_idx) if self.args.loss == 'point': p_loss = torch.sum(self.model.point_loss(h, t, r, 1)) n_loss = torch.sum(self.model.point_loss(n_h, n_t, n_r, -1)) loss = p_loss + n_loss else: loss = self.model.pair_loss(h, t, r, n_h, n_t) loss.backward() self.optimizer.step() self.remove_nan() epoch_loss += loss.data.cpu().numpy() iters += 1 # get the time of each epoch self.time_tot += time.time() - start losses.append(round(epoch_loss/n_train, 4)) if (epoch+1) % self.args.epoch_per_test == 0: # output performance valid_mrr, valid_mr, valid_1, valid_3, valid_10 = tester_val() test_mrr, test_mr, test_1, test_3, test_10 = tester_tst() out_str = '%d\t%.2f\t%.4f %.1f %.4f %.4f %.4f\t%.4f %.1f %.4f %.4f %.4f\n' % (epoch, self.time_tot, \ valid_mrr, valid_mr, valid_1, valid_3, valid_10, \ test_mrr, test_mr, test_1, test_3, test_10) with open(self.args.perf_file, 'a') as f: f.write(out_str) # remove false negative if self.args.remove: self.remove_positive(self.args.remove) # output the best performance info if valid_mrr > best_mrr: best_mrr = valid_mrr best_str = out_str return best_mrr, best_str