def evaluate_model(evalparams): torch.manual_seed(evalparams.seed) random.seed(1234) if evalparams.cpu: evalparams.cuda = False elif evalparams.cud: torch.cuda.manual_seed(args.seed) # load opt print(evalparams.model_dir, evalparams.model) # model_file = evalparams.model_dir + "/" + evalparams.model model_file = 'best_model.pt' print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) model = RelationModel(opt) model.load(model_file) # load vocab vocab_file = evalparams.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True) # save probability scores if len(evalparams.out) > 0: helper.ensure_dir(os.path.dirname(evalparams.out)) with open(evalparams.out, 'wb') as outfile: pickle.dump(all_probs, outfile) print("Prediction scores saved to {}.".format(evalparams.out)) print("Evaluation ended.") return (batch.gold(), predictions, model)
def load_best_model(model_dir, model_type="predictor"): model_file = model_dir + "/best_model.pt" print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) if model_type == "predictor": predictor = Predictor(model_opt) model = Trainer(model_opt, predictor, model_type=model_type) else: selector = Selector(model_opt) model = Trainer(model_opt, selector, model_type=model_type) model.load(model_file) helper.print_config(model_opt) return model
evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score") # print model info helper.print_config(opt) # model if not opt['load']: trainer = GCNTrainer(opt, emb_matrix=emb_matrix) else: # load pretrained model model_file = opt['model_file'] print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] trainer = GCNTrainer(model_opt) trainer.load(model_file) id2label = dict([(v, k) for k, v in label2id.items()]) dev_score_history = []
def transre_search(ffn, connect, hidden_dim, trans_layers, multi_heads, ffn_ex_size, initial, final): opt['weighted'] = False opt['rnn'] = False opt['ffn'] = ffn opt['connect'] = connect opt['hidden_dim'] = hidden_dim opt['trans_layers'] = trans_layers opt['multi_heads'] = multi_heads opt['ffn_ex_size'] = ffn_ex_size opt['initial'] = initial opt['final'] = final id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_name =str (opt['optim']) + '_' + str (opt['lr']) + str (ffn) + '_' +str(connect)+"_"\ + str (hidden_dim) + '_' + str (trans_layers) + '_' + str (multi_heads) + '_' + \ str (ffn_ex_size)+'_'+str(initial)+'_'+str(final) model_name = model_name + '' + str(opt['memo']) model_name = str(id) + "_" + model_name model_save_dir = opt['save_dir'] + '/' + model_name opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score") helper.print_config(opt) if not opt['load']: trainer = TransTrainer(opt, emb_matrix=emb_matrix) else: # load pre-train model model_file = opt['model_file'] print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] trainer = TransTrainer(model_opt) trainer.load(model_file) id2label = dict([(v, k) for k, v in label2id.items() ]) # the classification result dev_score_history = [] dev_loss_history = [] current_lr = opt['lr'] global_step = 0 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] best_result = "unknown" file_logger.log(str(opt['memo'])) for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 epoch_start_time = time.time() for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss, norm = trainer.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss, _ = trainer.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] acc, dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}" .format(epoch, train_loss, dev_loss, dev_f1)) dev_score = dev_f1 file_logger.log("{}\t{:.3f}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, acc, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') best_result = (model_name, dev_score) print("new best model saved.") file_logger.log( "new best model saved at epoch {}: {:.2f}\t{:.2f}\t{:.2f}". format(epoch, dev_p * 100, dev_r * 100, dev_score * 100)) if epoch % opt['save_epoch'] != 0: os.remove(model_file) # lr schedule if len(dev_score_history ) > opt['decay_epoch'] and dev_score <= dev_score_history[ -1] and opt['optim'] in ['sgd', 'adagrad', 'adadelta']: current_lr *= opt['lr_decay'] trainer.update_lr(current_lr) dev_score_history += [dev_score] dev_loss_history += [dev_loss] epoch_end_time = time.time() print("epoch time {:.3f}".format(epoch_end_time - epoch_start_time)) return best_result
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/') parser.add_argument('--save_dir', type=str, default='saved_models') # Model parameters parser.add_argument('--emb_dim', type=int, default=50, help='Word embedding dimension.') parser.add_argument('--pos_dim', type=int, default=5, help='Position embedding dimension.') parser.add_argument('--pos_limit', type=int, default=30, help='Position embedding length limit.') parser.add_argument('--num_conv', type=int, default=230, help='The number of convolutional filters.') parser.add_argument('--win_size', type=int, default=3, help='Convolutional filter size.') parser.add_argument('--dropout', type=float, default=0.5, help='The rate at which randomly set a parameter to 0.') parser.add_argument('--lr', type=float, default=0.01, help='Applies to SGD.') parser.add_argument('--num_epoch', type=int, default=15) parser.add_argument('--num_rand_start', type=int, default=30) parser.add_argument('--penal_scalar', type=int, default=500) parser.add_argument('--adaplr', dest='adaplr', action='store_true', help='Use bag-size adaptive learning rate.') parser.add_argument('--no-adaplr', dest='adaplr', action='store_false') parser.set_defaults(adaplr=True) parser.add_argument('--adaplr_beta1', type=float, default=20.0) parser.add_argument('--adaplr_beta2', type=float, default=25.0) parser.add_argument('--sen_file', type=str, default='sentential_DEV.txt', help='Sentential eval dataset.') parser.add_argument('--heldout_eval', type=bool, default=False, help='Perform heldout evaluation after each epoch.') parser.add_argument('--save_each_epoch', type=bool, default=False, help='Save the checkpoint of each epoch.') # parser.add_argument('--seed', type=int, default=666) parser.add_argument('--trial_exp', dest='trial', action='store_true', help='Use partial training data.') parser.set_defaults(trial=False) parser.add_argument('--num_trial', type=int, default=10000) parser.add_argument('--log_step', type=int, default=20000) parser.add_argument('--num_exp', type=int, default=0) parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available()) parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.') args = parser.parse_args() if args.cpu: args.cuda = False # # Set random seed # torch.manual_seed(args.seed) # np.random.seed(args.seed) # random.seed(args.seed) # torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False # if args.cuda: # torch.cuda.manual_seed(args.seed) # make opt opt = vars(args) opt['train_file'] = opt['data_dir'] + '/' + 'train.txt' opt['test_file'] = opt['data_dir'] + '/' + 'test.txt' opt['sen_dev_file'] = opt['data_dir'] + '/' + 'sentential_DEV.txt' opt['vocab_file'] = opt['data_dir'] + '/' + 'vec.bin' opt['rel_file'] = opt['data_dir'] + '/' + 'relation2id.txt' if opt['data_dir'].split('/')[-1] != '': opt['data_name'] = opt['data_dir'].split('/')[-1] else: opt['data_name'] = opt['data_dir'].split('/')[-2] # Pretrained word embedding print "\nPretrained word embedding loaded" w2v_model = gensim.models.KeyedVectors.load_word2vec_format(opt['vocab_file'], binary=True) word_list = [u'UNK'] + w2v_model.index2word word_vec = w2v_model.syn0 word2id = {} for id, word in enumerate(word_list): word2id[word] = id assert opt['emb_dim'] == w2v_model.syn0.shape[1] # Read from relation2id.txt to build a dictionary: rel2id rel2id = {} with open(opt['rel_file'],'rb') as f: for item in f: [relation, id] = item.strip('\n').split(' ') rel2id[relation] = int(id) id2rel = [''] * len(rel2id) for relation, rel_id in rel2id.items(): id2rel[rel_id] = relation opt['num_rel'] = len(rel2id) opt['vocab_size'] = len(word_list) # Load data all_data = loader.DataLoader(opt, word2id, rel2id) opt['pos_e1_size'] = all_data.pos_max_e1 - all_data.pos_min_e1 + 1 opt['pos_e2_size'] = all_data.pos_max_e2 - all_data.pos_min_e2 + 1 opt['pos_min_e1'] = all_data.pos_min_e1 opt['pos_min_e2'] = all_data.pos_min_e2 opt['EP_num_train'] = len(all_data.bags_train) opt['EP_num_test'] = len(all_data.bags_test) assert opt['pos_e1_size'] == opt['pos_e2_size'] helper.check_dir(opt['save_dir']) helper.print_config(opt) # Get KB disagreement penalty kb_score_all = kb_info.get_MIT_MID_score(all_data.bags_train, all_data.train_bags_label, opt, rel2id, id2rel) # Get hamming score ham_score_all = kb_info.getting_hamming_score(all_data.bags_train, all_data.train_bags_label, opt) # Build the model PCNN_NMAR_model = PCNN_NMAR(word_vec, opt) if opt['cuda']: PCNN_NMAR_model.cuda() loss_function = nn.NLLLoss() optimizer = optim.SGD(PCNN_NMAR_model.parameters(), lr=opt['lr']) print "Training starts." for epoch in xrange(opt['num_epoch']): opt['epoch'] = epoch start_time = time.time() total_loss = np.float64(0.0) train_part = all_data.bags_train.keys()[:] if opt['trial']: train_part = train_part[:opt['num_trial']] random.shuffle(train_part) for index, bag_name in enumerate(train_part): if index > 0 and index % opt['log_step'] == 0: print '{}: train examples {}/{} (epoch {}/{}), loss = {:.6f} '.format(datetime.now(), index, opt['EP_num_train'], epoch+1, opt['num_epoch'], total_loss) optimizer.zero_grad() sentence_list = all_data.bags_train[bag_name] target = all_data.train_bags_label[bag_name] kb_score = kb_score_all[bag_name] ham_score = ham_score_all[bag_name] BPable_loss, loss_augmented = PCNN_NMAR_model(sentence_list, target, all_data, kb_score, ham_score) # Check if there is search error assert loss_augmented >= 0 total_loss += loss_augmented # Apply bag-size adaptive learning rate if opt['adaplr']: if len(sentence_list) <= opt['adaplr_beta1']: adaplr_scalar = 1 elif len(sentence_list) <= opt['adaplr_beta2']: adaplr_scalar = (float(opt['adaplr_beta1']) / len(sentence_list)) else: adaplr_scalar = (float(opt['adaplr_beta1']) / len(sentence_list)) ** 2 BPable_loss = BPable_loss * adaplr_scalar BPable_loss.backward() optimizer.step() stop_time = time.time() print 'For epoch {}/{}, training time:{}, training loss: {:.6f}'.format(epoch+1, opt['num_epoch'], stop_time - start_time, total_loss) # Sentential evaluation sen_AUC = PCNN_NMAR_model.sentential_eval(opt['sen_dev_file'], all_data, rel2id, id2rel) print 'The sentential AUC of P/R curve on DEV set: {:.3f}'.format(sen_AUC) # Heldout evaluation if opt['heldout_eval']: recall, precision = PCNN_NMAR_model.heldout_eval(all_data) heldout_AUC = metrics.auc(recall, precision) if len(recall) != 0 else 0 print "The heldout AUC of P/R curve: {:.4f}".format(heldout_AUC) # Save parameters in each epoch model_file = opt['save_dir'] + '/' + opt['data_name'] + '_' + \ 'lr{}_penal{}_epoch{}.tar'.format(opt['lr'], opt['penal_scalar'], epoch) # print model_file if opt['save_each_epoch']: torch.save({ 'state_dict': PCNN_NMAR_model.state_dict(), 'config': opt }, model_file ) best_file = opt['save_dir'] + '/' + opt['data_name'] + '_' + \ 'lr{}_penal{}_best_model.tar'.format(opt['lr'], opt['penal_scalar']) if epoch == 0 or best_AUC < sen_AUC: best_AUC = sen_AUC torch.save({ 'state_dict': PCNN_NMAR_model.state_dict(), 'config': opt }, best_file )
def train_unbiased_model(args, biased_batch_probs): # make opt opt = vars(args) opt["num_class"] = len(constant.LABEL_TO_ID) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt["data_dir"], opt["batch_size"])) train_batch = DataLoader( opt["data_dir"] + "/" + args.data_name, opt["batch_size"], opt, vocab, evaluation=False, ) dev_batch = DataLoader(opt["data_dir"] + "/dev.json", opt["batch_size"], opt, vocab, evaluation=True) model_id = opt["id"] if len(opt["id"]) > 1 else "0" + opt["id"] model_save_dir = opt["save_dir"] + "/" + model_id opt["model_save_dir"] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + "/config.json", verbose=True) vocab.save(model_save_dir + "/vocab.pkl") file_logger = helper.FileLogger( model_save_dir + "/" + opt["log"], header="# epoch\ttrain_loss\tdev_loss\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt["lr"] global_step = 0 global_start_time = time.time() format_str = ( "{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}" ) max_steps = len(train_batch) * opt["num_epoch"] # start training for epoch in range(1, opt["num_epoch"] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch, torch.tensor(biased_batch_probs[i]).cuda()) train_loss += loss if global_step % opt["log_step"] == 0: duration = time.time() - start_time print( format_str.format( datetime.now(), global_step, max_steps, epoch, opt["num_epoch"], loss, duration, current_lr, )) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) f = open("label.txt", "w+") f.write(str(dev_batch.gold())) f.close() train_loss = (train_loss / train_batch.num_examples * opt["batch_size"] ) # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt["batch_size"] print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}" .format(epoch, train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_f1)) # save model_file = model_save_dir + "/checkpoint_epoch_{}.pt".format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + "/best_model.pt") print("new best model saved.") if epoch % opt["save_epoch"] != 0: os.remove(model_file) # lr schedule if (len(dev_f1_history) > 10 and dev_f1 <= dev_f1_history[-1] and opt["optim"] in ["sgd", "adagrad"]): current_lr *= opt["lr_decay"] model.update_lr(current_lr) dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/') # parser.add_argument('--train_file', type=str, default='data/train.txt') # parser.add_argument('--test_file', type=str, default='data/test.txt') # parser.add_argument('--vocab_file', type=str, default='data/vec.bin') # parser.add_argument('--rel_file', type=str, default='data/relation2id.txt') parser.add_argument('--save_dir', type=str, default='saved_models') # Model parameters parser.add_argument('--emb_dim', type=int, default=50, help='Word embedding dimension.') parser.add_argument('--pos_dim', type=int, default=5, help='Position embedding dimension.') parser.add_argument('--pos_limit', type=int, default=30, help='Position embedding length limit.') parser.add_argument('--num_conv', type=int, default=230, help='The number of convolutional filters.') parser.add_argument('--win_size', type=int, default=3, help='Convolutional filter size.') parser.add_argument( '--dropout', type=float, default=0.5, help='The rate at which randomly set a parameter to 0.') parser.add_argument('--lr', type=float, default=0.001, help='Applies to SGD.') parser.add_argument('--num_epoch', type=int, default=15) parser.add_argument('--num_trial', type=int, default=50000) parser.add_argument('--trial', type=bool, default=False) parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available()) parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.') args = parser.parse_args() if args.cpu: args.cuda = False # make opt opt = vars(args) opt['train_file'] = opt['data_dir'] + '/' + 'train.txt' opt['test_file'] = opt['data_dir'] + '/' + 'test.txt' opt['vocab_file'] = opt['data_dir'] + '/' + 'vec.bin' opt['rel_file'] = opt['data_dir'] + '/' + 'relation2id.txt' # Pretrained word embedding print "Load pretrained word embedding" w2v_model = gensim.models.KeyedVectors.load_word2vec_format( opt['vocab_file'], binary=True) word_list = [u'UNK'] + w2v_model.index2word word_vec = w2v_model.syn0 word_map = {} for id, word in enumerate(word_list): word_map[word] = id assert opt['emb_dim'] == w2v_model.syn0.shape[1] # Read from relation2id.txt to build a dictionary: rel_map rel_map = {} with open(opt['rel_file'], 'rb') as f: for item in f: [relation, id] = item.strip('\n').split(' ') rel_map[relation] = int(id) opt['num_rel'] = len(rel_map) opt['vocab_size'] = len(word_list) # Load data all_data = loader.DataLoader(opt['train_file'], opt['test_file'], opt, word_map, rel_map) opt['pos_e1_size'] = all_data.pos_max_e1 - all_data.pos_min_e1 + 1 opt['pos_e2_size'] = all_data.pos_max_e2 - all_data.pos_min_e2 + 1 opt['pos_min_e1'] = all_data.pos_min_e1 opt['pos_min_e2'] = all_data.pos_min_e2 assert opt['pos_e1_size'] == opt['pos_e2_size'] helper.check_dir(opt['save_dir']) helper.print_config(opt) PCNN_ATT_model = PCNN_ATT(word_vec, opt) PCNN_ATT_model.cuda() loss_function = nn.NLLLoss() optimizer = optim.SGD(PCNN_ATT_model.parameters(), lr=opt['lr']) start_time = time.time() print "Training starts." for epoch in xrange(opt['num_epoch']): print 'The running time of epoch %d:' % (epoch), total_loss = torch.Tensor([0]).cuda() if opt['trial']: train_part = all_data.bags_train.keys()[:opt['num_trial']] else: train_part = all_data.bags_train.keys()[:] shuffle(train_part) for index, bag_name in enumerate(train_part): # if index % 10000 == 0: # print 'index == ', index optimizer.zero_grad() sentence_list = all_data.bags_train[bag_name] target = int(all_data.train_rel[sentence_list[0]]) try: log_probs = PCNN_ATT_model(sentence_list, target, all_data) except: print index, len(sentence_list) raise target = autograd.Variable(torch.LongTensor([target]).cuda()) loss = loss_function(log_probs, target) loss.backward() optimizer.step() total_loss += loss.data # Eval and get the AUC recall, precision = PCNN_ATT_model.test(all_data) test_AUC = metrics.auc(recall, precision) # Save parameters in each epoch model_file = opt['save_dir'] + '/checkpoint_epoch_%s.tar' % epoch torch.save({ 'state_dict': PCNN_ATT_model.state_dict(), }, model_file) best_file = opt['save_dir'] + '/best_model.tar' if epoch == 0 or best_AUC < test_AUC: best_AUC = test_AUC torch.save({ 'state_dict': PCNN_ATT_model.state_dict(), }, best_file) stop_time = time.time() print '%f; the total loss: %f; the AUC of P/R curve: %f' % ( stop_time - start_time, total_loss.cpu().numpy()[0], test_AUC) start_time = stop_time
def main(): # set top-level random seeds torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if args.cpu: args.cuda = False elif args.cuda: # force random seed for reproducibility # also apply same seed to numpy in every file torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # make opt opt = vars(args) opt['num_class'] = len(constant.LABEL_TO_ID) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) # in some previous experiments we saw that lower vocab size can improve performance # but it was in a completely different project although on the same data # here it seems it's much harder to get this to work # uncomment the following line if this is solved: # new_vocab_size = 30000 opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_p\tdev_r\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr'] global_step = 0 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # setup the scheduler for lr decay # this doesn't seem to work well compared to what we already have # scheduler = ReduceLROnPlateau(model.optimizer, mode='min', factor=opt['lr_decay'], patience=1) # start training for epoch in range(1, opt['num_epoch'] + 1): # TODO: if lr warmup is used, the lr console output is not updated print( "Current params: " + " heads-" + str(opt["n_head"]) + " enc_layers-" + str(opt["num_layers_encoder"]), " drop-" + str(opt["dropout"]) + " scaled_drop-" + str(opt["scaled_dropout"]) + " lr-" + str(opt["lr"]), " lr_decay-" + str(opt["lr_decay"]) + " max_grad_norm-" + str(opt["max_grad_norm"])) print( " weight_no_rel-" + str(opt["weight_no_rel"]) + " weight_rest-" + str(opt["weight_rest"]) + " attn-" + str(opt["attn"]) + " attn_dim-" + str(opt["attn_dim"]), " obj_sub_pos-" + str(opt["obj_sub_pos"]) + " new_residual-" + str(opt["new_residual"])) print( " use_batch_norm-" + str(opt["use_batch_norm"]) + " relative_positions-" + str(opt["relative_positions"]), " decay_epoch-" + str(opt["decay_epoch"]) + " use_lemmas-" + str(opt["use_lemmas"]), " hidden_self-" + str(opt["hidden_self"])) train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch) train_loss += float(loss) if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) # do garbage collection, # as per https://discuss.pytorch.org/t/best-practices-for-maximum-gpu-utilization/13863/6 del loss # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += float(loss) del loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch, \ train_loss, dev_loss, dev_f1) ) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_p, dev_r, dev_f1)) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0: os.remove(model_file) # reduce learning rate if it stagnates by a certain decay rate and within given epoch patience # this for some reason works worth than the implementation we have afterwards # scheduler.step(dev_loss) if opt["optim"] != "noopt_adam" and opt["optim"] != "noopt_nadam": # do warm_up_for sgd only instead of adam do_warmup_trick = False if do_warmup_trick: # print("do_warmup_trick") # 1 and 5 first worked kind of # 10 and 15 current_lr = 10 * (360**(-0.5) * min(epoch**(-0.5), epoch * 15**(-1.5))) # print("current_lr", current_lr) model.update_lr(current_lr) else: # decay schedule # 15 is best! # simulate patience of x epochs if len(dev_f1_history ) > opt['decay_epoch'] and dev_f1 <= dev_f1_history[-1]: current_lr *= opt['lr_decay'] model.update_lr(current_lr) # else, update the learning rate in torch_utils.py dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
def train_model(vocab_params, train_params, train_batch, dev_batch, model_id=-1): torch.manual_seed(train_params.seed) np.random.seed(train_params.seed) random.seed(train_params.seed) if train_params.cpu: train_params.cuda = False elif train_params.cuda: torch.cuda.manual_seed(train_params.seed) # make opt opt = vars(vocab_params) print(constant.LABEL_TO_ID) print(opt) opt['num_class'] = len(constant.LABEL_TO_ID) # Combine all the parameters together opt.update(vars(train_params)) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] if (model_id == -1): model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr'] global_step = 0 global_start_time = time.time() format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # start training for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch,\ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\ train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_f1)) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0: os.remove(model_file) # lr schedule if len(dev_f1_history) > 10 and dev_f1 <= dev_f1_history[-1] and \ opt['optim'] in ['sgd', 'adagrad']: current_lr *= opt['lr_decay'] model.update_lr(current_lr) dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
def main(): args = get_parser() # set seed and prepare for training torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if args.cpu: args.cuda = False elif args.cuda: torch.cuda.manual_seed(args.seed) init_time = time.time() # make opt opt = vars(args) TEXT, train_batch, dev_batch = load_data(opt['batch_size'], device='cuda:0') vocab = TEXT.vocab opt['vocab_size'] = len(vocab.stoi) emb_matrix = vocab.vectors assert emb_matrix.shape[0] == opt['vocab_size'] assert emb_matrix.shape[1] == opt['emb_dim'] model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + str(model_id) opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config path = os.path.join(model_save_dir, 'config.json') helper.save_config(opt, path, verbose=True) # vocab.save(os.path.join(model_save_dir, 'vocab.pkl')) file_logger = helper.FileLogger( os.path.join(model_save_dir, opt['log']), header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score") # print model info helper.print_config(opt) # Build Model if not opt['load']: trainer = LSTMTrainer(opt, emb_matrix) else: model_file = opt['model_file'] print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] trainer = LSTMTrainer(model_opt) trainer.load(model_file) dev_score_history = [] current_lr = opt['lr'] global_step = 0 global_start_time = time.time() format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # start training for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = trainer.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch, \ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set ...") predictions = [] golds = [] dev_loss = 0.0 for i, batch in enumerate(dev_batch): preds, probs, labels, loss = trainer.predict(batch) predictions += preds golds += labels dev_loss += loss train_loss = train_loss / len(train_batch) dev_loss = dev_loss / len(dev_batch) # print(golds) # print(predictions) print(accuracy_score(golds, predictions)) dev_roc = roc_auc_score(golds, predictions) print( "epoch {}: train loss = {:.6f}, dev loss = {:.6f}, dev roc = {:.4f}" .format(epoch, train_loss, dev_loss, dev_roc)) dev_score = dev_roc file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model model_file = os.path.join(model_save_dir, "checkpoint_epoch_{}.py".format(epoch)) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") file_logger.log("new best model saved at epoch {}: {:.2f}"\ .format(epoch, dev_score*100)) if epoch % opt['save_epoch'] != 0: os.remove(model_file) if len(dev_score_history) > opt['decay_epoch'] and dev_score <= dev_score_history[-1] and \ opt['optim'] in ['sgd', 'adagrad', 'adadelta']: current_lr *= opt['lr_decay'] trainer.update_lr(current_lr) dev_score_history += [dev_score] print("") print("Training ended with {} epochs.".format(epoch))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model_dir', type=str, default='saved_models/', help='Directory of the model.') parser.add_argument('--model_name', type=str, default='best_model.tar', help='Name of the model file.') parser.add_argument('--data_dir', type=str, default='data/') parser.add_argument('--out', type=str, default='', help="Save model predictions to this dir.") parser.add_argument('--emb_dim', type=int, default=50, help='Word embedding dimension.') parser.add_argument('--pos_dim', type=int, default=5, help='Position embedding dimension.') parser.add_argument('--pos_limit', type=int, default=30, help='Position embedding length limit.') parser.add_argument('--num_conv', type=int, default=230, help='The number of convolutional filters.') parser.add_argument('--win_size', type=int, default=3, help='Convolutional filter size.') parser.add_argument( '--dropout', type=float, default=0.5, help='The rate at which randomly set a parameter to 0.') parser.add_argument('--lr', type=float, default=0.001, help='Applies to SGD.') parser.add_argument('--num_epoch', type=int, default=15) parser.add_argument('--seed', type=int, default=666) parser.add_argument('--sentential_eval', type=bool, default=False, help='Perform sentential evaluation.') parser.add_argument('--sen_file', type=str, default='', help='Sentential eval dataset.') parser.add_argument('--heldout_eval', type=bool, default=False, help='Perform heldout evaluation after each epoch.') parser.add_argument('--print_config', type=bool, default=False, help='Print out the configuration of the model.') parser.add_argument( '--tune', type=bool, default=False, help= 'Perform sentential evaluation for all models in the same directory.') parser.add_argument('--cuda', type=bool, default=torch.cuda.is_available()) # parser.add_argument('--gpu_num', type=int, default=0) parser.add_argument('--cpu', action='store_true', help='Ignore CUDA.') args = parser.parse_args() if args.cpu: args.cuda = False # make opt opt = vars(args) opt['train_file'] = opt['data_dir'] + '/' + 'train.txt' opt['test_file'] = opt['data_dir'] + '/' + 'test.txt' opt['vocab_file'] = opt['data_dir'] + '/' + 'vec.bin' opt['rel_file'] = opt['data_dir'] + '/' + 'relation2id.txt' if opt['data_dir'].split('/')[-1] != '': opt['data_name'] = opt['data_dir'].split('/')[-1] else: opt['data_name'] = opt['data_dir'].split('/')[-2] # Pretrained word embedding print "\nPretrained word embedding loaded" w2v_model = gensim.models.KeyedVectors.load_word2vec_format( opt['vocab_file'], binary=True) word_list = [u'UNK'] + w2v_model.index2word word_vec = w2v_model.syn0 word2id = {} for id, word in enumerate(word_list): word2id[word] = id assert opt['emb_dim'] == w2v_model.syn0.shape[1] # Read from relation2id.txt to build a dictionary: rel2id rel2id = {} with open(opt['rel_file'], 'rb') as f: for item in f: [relation, id] = item.strip('\n').split(' ') rel2id[relation] = int(id) id2rel = [''] * len(rel2id) for relation, rel_id in rel2id.items(): id2rel[rel_id] = relation opt['num_rel'] = len(rel2id) opt['vocab_size'] = len(word_list) # Load data all_data = loader.DataLoader(opt, word2id, rel2id) opt['pos_e1_size'] = all_data.pos_max_e1 - all_data.pos_min_e1 + 1 opt['pos_e2_size'] = all_data.pos_max_e2 - all_data.pos_min_e2 + 1 opt['pos_min_e1'] = all_data.pos_min_e1 opt['pos_min_e2'] = all_data.pos_min_e2 opt['EP_num_train'] = len(all_data.bags_train) opt['EP_num_test'] = len(all_data.bags_test) assert opt['pos_e1_size'] == opt['pos_e2_size'] if opt['tune']: model_file_list = sorted( glob.glob(args.model_dir + opt['data_name'] + "*.tar")) else: model_file_list = [args.model_dir + '/' + args.model_name] for model_file in model_file_list: # Load input model print("Load model: {}".format(model_file.split('/')[-1])) PCNN_NMAR_model = PCNN_NMAR(word_vec, opt) checkpoint = torch.load(model_file) PCNN_NMAR_model.load_state_dict(checkpoint['state_dict']) model_config = torch.load(model_file)['config'] if opt['print_config']: helper.print_config(model_config) if opt['cuda']: PCNN_NMAR_model.cuda() # Sentential evaluation if opt['sentential_eval']: print "Sentential evaluaiton starts." sen_file = opt['data_dir'] + '/' + opt['sen_file'] sen_AUC = PCNN_NMAR_model.sentential_eval(sen_file, all_data, rel2id, id2rel) print "The sentential AUC of P/R curve on {} is {:.3f}".format( opt['sen_file'], sen_AUC) print "Sentential evaluaiton ends.\n" # Heldout evaluation if opt['heldout_eval']: print "Heldout evaluation starts." recall, precision = PCNN_NMAR_model.heldout_eval(all_data) heldout_AUC = metrics.auc(recall, precision) if len(recall) != 0 else 0 print "The heldout AUC of P/R curve is {:.4f}".format(heldout_AUC) print "Heldout evaluaiton ends."