def evaluate(model, data_test, config, word2id, entity2id, epoch=0, model_path=None): if model_path != None: model.load_state_dict(torch.load(model_path)) sentence_ppx_loss = 0 sentence_ppx_word_loss = 0 sentence_ppx_local_loss = 0 sentence_ppx_only_two_loss = 0 word_cut = use_cuda(torch.Tensor([0])) local_cut = use_cuda(torch.Tensor([0])) only_two_cut = use_cuda(torch.Tensor([0])) id2word = dict() for key in word2id.keys(): id2word[word2id[key]] = key data_len = 0 for iteration, batch in enumerate(data_test): decoder_loss, sentence_ppx, sentence_ppx_word, sentence_ppx_local, sentence_ppx_only_two, word_neg_num, \ local_neg_num, only_two_neg_num = run(model, batch) sentence_ppx_loss += torch.sum(sentence_ppx).data sentence_ppx_word_loss += torch.sum(sentence_ppx_word).data sentence_ppx_local_loss += torch.sum(sentence_ppx_local).data sentence_ppx_only_two_loss += torch.sum(sentence_ppx_only_two).data word_cut += word_neg_num local_cut += local_neg_num only_two_cut += only_two_neg_num if iteration % 50 == 0: print("iteration for evaluate:", iteration, "Loss:", decoder_loss.data) data_len += len(batch['query_text']) model.is_inference = False if model_path != None: print(' perplexity on test set:', np.exp(sentence_ppx_loss.cpu() / data_len), \ np.exp(sentence_ppx_word_loss.cpu() / (data_len - int(word_cut))), np.exp(sentence_ppx_local_loss.cpu() / (data_len\ - int(local_cut))), np.exp(sentence_ppx_only_two_loss.cpu() / (data_len - int(only_two_cut)))) exit() print(' perplexity on test set:', np.exp(sentence_ppx_loss.cpu() / data_len), np.exp(sentence_ppx_word_loss.cpu() / \ (data_len- int(word_cut))), np.exp(sentence_ppx_local_loss.cpu() / (data_len - int(local_cut))), \ np.exp(sentence_ppx_only_two_loss.cpu() / (data_len - int(only_two_cut)))) return np.exp(sentence_ppx_loss.cpu() / data_len), np.exp(sentence_ppx_word_loss.cpu() / (data_len- int(word_cut))), \ np.exp(sentence_ppx_local_loss.cpu() / (data_len - int(local_cut))), np.exp(sentence_ppx_only_two_loss.cpu() / \ (data_len - int(only_two_cut)))
def train(config, model, data_train, data_test, word2id, entity2id, model_optimizer): for epoch in range(config.num_epoch): print("epoch: ", epoch) sentence_ppx_loss = 0 sentence_ppx_word_loss = 0 sentence_ppx_local_loss = 0 sentence_ppx_only_two_loss = 0 word_cut = use_cuda(torch.Tensor([0])) local_cut = use_cuda(torch.Tensor([0])) only_two_cut = use_cuda(torch.Tensor([0])) data_len = 0 for iteration, batch in enumerate(data_train): decoder_loss, sentence_ppx, sentence_ppx_word, sentence_ppx_local, sentence_ppx_only_two, word_neg_num, local_neg_num, \ only_two_neg_num = run(model, batch) sentence_ppx_loss += torch.sum(sentence_ppx).data sentence_ppx_word_loss += torch.sum(sentence_ppx_word).data sentence_ppx_local_loss += torch.sum(sentence_ppx_local).data sentence_ppx_only_two_loss += torch.sum(sentence_ppx_only_two).data word_cut += word_neg_num local_cut += local_neg_num only_two_cut += only_two_neg_num model_optimizer.zero_grad() decoder_loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), config.max_gradient_norm) model_optimizer.step() if iteration % 50 == 0: print("iteration:", iteration, "Loss:", decoder_loss.data) data_len += len(batch['query_text']) print ("perplexity for epoch", epoch + 1, ":", np.exp(sentence_ppx_loss.cpu() / data_len), " ppx_word: ", \ np.exp(sentence_ppx_word_loss.cpu() / (data_len - int(word_cut))), " ppx_local: ", \ np.exp(sentence_ppx_local_loss.cpu() / (data_len - int(local_cut))), " ppx_only_two: ", \ np.exp(sentence_ppx_only_two_loss.cpu() / (data_len - int(only_two_cut)))) torch.save( model.state_dict(), config.result_dir_name + '/' + '_epoch_' + str(epoch + 1) + '.pkl') ppx, ppx_word, ppx_local, ppx_only_two = evaluate( model, data_test, config, word2id, entity2id, epoch + 1) ppx_f = open(config.result_dir_name + '/result.txt', 'a') ppx_f.write("epoch " + str(epoch + 1) + " ppx: " + str(ppx) + " ppx_word: " + str(ppx_word) + " ppx_local: " + \ str(ppx_local) + " ppx_only_two: " + str(ppx_only_two) + '\n') ppx_f.close()
def main(): config = Config('config.yml') config.list_all_member() raw_vocab = prepare_data(config) word2id, entity2id, vocab, embed, entity_vocab, entity_embed, relation_vocab, relation_embed, entity_relation_embed = build_vocab( config.data_dir, raw_vocab, config=config) data_train, data_test = get_data(config, word2id, entity2id) model = use_cuda(ConceptFlow(config, embed, entity_relation_embed)) model_optimizer = torch.optim.Adam(model.parameters(), lr=config.lr_rate) if not os.path.exists(config.result_dir_name): os.mkdir(config.result_dir_name) ppx_f = open(config.result_dir_name + '/result.txt', 'a') for name, value in vars(config).items(): ppx_f.write('%s = %s' % (name, value) + '\n') if config.is_train == False: evaluate(model, data_test, config, word2id, entity2id, 0, model_path=config.test_model_path) exit() train(config, model, data_train, data_test, word2id, entity2id, model_optimizer)
def main(): # choose gpu with sufficient memory pynvml.nvmlInit() device_index = -1 device_count = pynvml.nvmlDeviceGetCount() max_space = 0 for i in range(device_count): if i == 1: continue handle = pynvml.nvmlDeviceGetHandleByIndex(i) info = pynvml.nvmlDeviceGetMemoryInfo(handle) if info.free > max_space: max_space = info.free device_index = i if max_space < 1e9: print("no gpu with sufficient memory currently.") sys.exit(0) print("Running on device %d" % device_index) os.environ['CUDA_VISIBLE_DEVICES'] = str(device_index) config = Config('config.yml') config.list_all_member() raw_vocab, data_train, data_test = prepare_data(config) word2id, entity2id, vocab, embed, entity_vocab, entity_embed, relation_vocab, relation_embed, entity_relation_embed, adj_table \ = build_vocab(config.data_dir, raw_vocab, config=config) model = use_cuda( ConceptFlow(config, embed, entity_relation_embed, adj_table)) model_optimizer = torch.optim.Adam(model.parameters(), lr=config.lr_rate) writer = SummaryWriter(config.tb_path) ppx_f = open(config.result_dir_name, 'a') for name, value in vars(config).items(): ppx_f.write('%s = %s' % (name, value) + '\n') if config.is_train == False: evaluate(model, data_test, config, word2id, entity2id, 0, writer, model_path=config.test_model_path) exit() train(config, model, data_train, data_test, word2id, entity2id, model_optimizer, writer)
def main(): config = Config('config.yml') config.list_all_member() raw_vocab, _, data_test = prepare_data(config) word2id, entity2id, vocab, embed, entity_vocab, entity_embed, relation_vocab, relation_embed, entity_relation_embed = build_vocab( config.data_dir, raw_vocab, config=config) model = use_cuda(ConceptFlow(config, embed, entity_relation_embed)) model_optimizer = torch.optim.Adam(model.parameters(), lr=config.lr_rate) if not os.path.exists(config.generated_path): os.mkdir(config.generated_path) generate(model, data_test, config, word2id, entity2id, model_path=config.test_model_path)
def evaluate(model, data_test, config, word2id, entity2id, epoch = 0, is_test = False, model_path = None): if model_path != None: model.load_state_dict(torch.load(model_path)) sentence_ppx_loss = 0 sentence_ppx_word_loss = 0 sentence_ppx_local_loss = 0 sentence_ppx_only_two_loss = 0 word_cut = use_cuda(torch.Tensor([0])) local_cut = use_cuda(torch.Tensor([0])) only_two_cut = use_cuda(torch.Tensor([0])) count = 0 model.is_inference = True id2word = dict() for key in word2id.keys(): id2word[word2id[key]] = key def write_batch_res_text(word_index, id2word, selector = None): w = open(config.generated_text_name + '_' + str(epoch) + '.txt', 'a') batch_size = len(word_index) decoder_len = len(word_index[0]) text = [] if selector != None: for i in range(batch_size): tmp_dict = dict() tmp = [] for j in range(decoder_len): if word_index[i][j] == 2: break tmp.append(id2word[word_index[i][j]]) tmp_dict['res_text'] = tmp local_tmp = [] only_two_tmp = [] for j in range(len(tmp)): if selector[i][j] == 1: local_tmp.append(tmp[j]) if selector[i][j] == 2: only_two_tmp.append(tmp[j]) tmp_dict['local'] = local_tmp tmp_dict['only_two'] = only_two_tmp text.append(tmp_dict) for line in text: w.write(json.dumps(line) + '\n') w.close() for iteration in range(len(data_test) // config.batch_size): decoder_loss, sentence_ppx, sentence_ppx_word, sentence_ppx_local, sentence_ppx_only_two, word_index, word_neg_num, \ local_neg_num, only_two_neg_num, selector = run(model, data_test[(iteration * config.batch_size):(iteration * \ config.batch_size + config.batch_size)], config, word2id, entity2id, model.is_inference) sentence_ppx_loss += torch.sum(sentence_ppx).data sentence_ppx_word_loss += torch.sum(sentence_ppx_word).data sentence_ppx_local_loss += torch.sum(sentence_ppx_local).data sentence_ppx_only_two_loss += torch.sum(sentence_ppx_only_two).data word_cut += word_neg_num local_cut += local_neg_num only_two_cut += only_two_neg_num if count % 50 == 0: print ("iteration for evaluate:", iteration, "Loss:", decoder_loss.data) count += 1 model.is_inference = False if model_path != None: print(' perplexity on test set:', np.exp(sentence_ppx_loss.cpu() / len(data_test)), \ np.exp(sentence_ppx_word_loss.cpu() / (len(data_test) - int(word_cut))), np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) \ - int(local_cut))), np.exp(sentence_ppx_only_two_loss.cpu() / (len(data_test) - int(only_two_cut)))) exit() print(' perplexity on test set:', np.exp(sentence_ppx_loss.cpu() / len(data_test)), np.exp(sentence_ppx_word_loss.cpu() / \ (len(data_test) - int(word_cut))), np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) - int(local_cut))), \ np.exp(sentence_ppx_only_two_loss.cpu() / (len(data_test) - int(only_two_cut)))) return np.exp(sentence_ppx_loss.cpu() / len(data_test)), np.exp(sentence_ppx_word_loss.cpu() / (len(data_test) - int(word_cut))), \ np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) - int(local_cut))), np.exp(sentence_ppx_only_two_loss.cpu() / \ (len(data_test) - int(only_two_cut)))
def train(config, model, data_train, data_test, word2id, entity2id, model_optimizer, writer): count = 0 start_time = time.time() for epoch in range(config.num_epoch): print("epoch: ", epoch) with open(config.log_dir, 'a') as f: f.write("epoch %d\n" % (epoch + 1)) sentence_ppx_loss = 0 sentence_ppx_word_loss = 0 sentence_ppx_local_loss = 0 word_cut = use_cuda(torch.Tensor([0])) local_cut = use_cuda(torch.Tensor([0])) for iteration in range(len(data_train) // config.batch_size): count += 1 data = data_train[(iteration * config.batch_size):( iteration * config.batch_size + config.batch_size)] decoder_loss, retrieval_loss, sentence_ppx, sentence_ppx_word, sentence_ppx_local, word_neg_num, entity_neg_num = \ run(model, data, config, word2id, entity2id) sentence_ppx_loss += torch.sum(sentence_ppx).data sentence_ppx_word_loss += torch.sum(sentence_ppx_word).data sentence_ppx_local_loss += torch.sum(sentence_ppx_local).data word_cut += word_neg_num local_cut += entity_neg_num model_optimizer.zero_grad() loss = decoder_loss + retrieval_loss loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), config.max_gradient_norm) model_optimizer.step() writer.add_scalar('train_loss/decoding_loss', decoder_loss.data, count) writer.add_scalar('train_loss/retrieval_loss', retrieval_loss.data, count) if count % 50 == 0: print("iteration:", iteration, "decode loss:", decoder_loss.data, "retr loss:", retrieval_loss.data) print("time used: %ds" % (time.time() - start_time)) with open(config.log_dir, 'a') as f: f.write( "iteration: %d decode loss: %.4f retr loss: %.4f total loss: %.4f\n" % (iteration, decoder_loss.data, retrieval_loss.data, loss.data)) ppl = np.exp(sentence_ppx_loss.cpu() / len(data_train)) word_ppl = np.exp(sentence_ppx_word_loss.cpu() / (len(data_train) - int(word_cut))) entity_ppl = np.exp(sentence_ppx_local_loss.cpu() / (len(data_train) - int(local_cut))) writer.add_scalar('train_ppl/ppl', ppl, epoch + 1) writer.add_scalar('train_ppl/word_ppl', word_ppl, epoch + 1) writer.add_scalar('train_ppl/entitty_ppl', entity_ppl, epoch + 1) print("perplexity for epoch", epoch + 1, ":", ppl, " ppx_word: ", word_ppl, " ppx_entity: ", entity_ppl) with open(config.log_dir, 'a') as f: f.write( "perplexity for epoch%d: %.2f word ppl: %.2f entity ppl: %.2f\n" % (epoch + 1, ppl, word_ppl, entity_ppl)) # torch.save(model.state_dict(), config.model_save_name + '_epoch_' + str(epoch + 1) + '.pkl') ppx, ppx_word, ppx_entity, recall = evaluate(model, data_test, config, word2id, entity2id, epoch + 1, writer) ppx_f = open(config.result_dir_name, 'a') ppx_f.write("test entity recall for epoch %d: %.4f\n" % (epoch + 1, recall)) ppx_f.write("epoch " + str(epoch + 1) + " ppx: " + str(ppx) + " ppx_word: " + str(ppx_word) + " ppx_entity: " + \ str(ppx_entity) + '\n') ppx_f.close()
def evaluate(model, data_test, config, word2id, entity2id, epoch, writer, is_test=False, model_path=None): if model_path: model.load_state_dict(torch.load(model_path)) sentence_ppx_loss = 0 sentence_ppx_word_loss = 0 entity_recall = 0 entity_precision = 0 total_graph_size = 0 sentence_ppx_local_loss = 0 word_cut = use_cuda(torch.Tensor([0])) local_cut = use_cuda(torch.Tensor([0])) count = 0 model.is_inference = True id2word = dict() for key in word2id.keys(): id2word[word2id[key]] = key def write_batch_res_text(word_index, id2word, selector=None): w = open(config.generated_text_name + '_' + str(epoch) + '.txt', 'a') batch_size = len(word_index) decoder_len = len(word_index[0]) text = [] if True: for i in range(batch_size): tmp_dict = dict() tmp = [] for j in range(decoder_len): if word_index[i][j] == 2: break tmp.append(id2word[word_index[i][j]]) tmp_dict['res_text'] = tmp text.append(tmp_dict) for line in text: w.write(json.dumps(line) + '\n') w.close() iter_time = len(data_test) // config.batch_size for iteration in range(iter_time): count += 1 data = data_test[(iteration * config.batch_size):(iteration * config.batch_size + config.batch_size)] decoder_loss, sentence_ppx, sentence_ppx_word, sentence_ppx_local, word_neg_num, entity_neg_num, recall, precision, graph_size, word_index = \ run(model, data, config, word2id, entity2id, model.is_inference) sentence_ppx_loss += torch.sum(sentence_ppx).data entity_recall += recall entity_precision += precision total_graph_size += graph_size / config.batch_size sentence_ppx_word_loss += torch.sum(sentence_ppx_word).data sentence_ppx_local_loss += torch.sum(sentence_ppx_local).data word_cut += word_neg_num local_cut += entity_neg_num if config.to_generate: write_batch_res_text(word_index, id2word) writer.add_scalar('test_loss/', decoder_loss.data, count) if count % 50 == 0: print("iteration for evaluate:", count, "loss:", decoder_loss.data) entity_recall /= count entity_precision /= count total_graph_size /= count model.is_inference = False if model_path != None: print('perplexity on test set:', np.exp(sentence_ppx_loss.cpu() / len(data_test)), \ np.exp(sentence_ppx_word_loss.cpu() / (len(data_test) - int(word_cut))), np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) - int(local_cut)))) exit() ppl = np.exp(sentence_ppx_loss.cpu() / len(data_test)) word_ppl = np.exp(sentence_ppx_word_loss.cpu() / (len(data_test) - int(word_cut))) entity_ppl = np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) - int(local_cut))) print('perplexity on test set:', ppl, "word ppl: ", word_ppl, 'entity ppl: ', entity_ppl) print("response entity recall: ", entity_recall, ' precision: ', entity_precision, "graph size:", total_graph_size) writer.add_scalar('test_ppl/ppl', ppl, epoch) writer.add_scalar('test_ppl/word_ppl', word_ppl, epoch) writer.add_scalar('test_ppl/entity_ppl', entity_ppl, epoch) writer.add_scalar('recall', entity_recall, epoch) writer.add_scalar('precision', entity_precision, epoch) writer.add_scalar('graph_size', total_graph_size, epoch) with open(config.log_dir, 'a') as f: f.write( "perplexity on testset: %.2f word ppl: %.2f entity ppl: %.2f\n" % (ppl, word_ppl, entity_ppl)) f.write("response entity recall: %.2f; precision: %.2f\n" % (entity_recall, entity_precision)) return np.exp(sentence_ppx_loss.cpu() / len(data_test)), np.exp(sentence_ppx_word_loss.cpu() / (len(data_test) - int(word_cut))), \ np.exp(sentence_ppx_local_loss.cpu() / (len(data_test) - int(local_cut))), entity_recall