def main(): args = parse_args() print("Loading data file...") filename = args.data_dir + '/{}.json'.format(args.dataset) with open(filename, 'r') as infile: data = json.load(infile, encoding='utf8') labels = [d['relation'] for d in data] # read predictions #args.pred_files = ['./res_pkl/trans.pkl','./res_pkl/palstm.pkl'] args.pred_files = ['./res_pkl/palstm.pkl'] print("Loading {} prediction files...".format(len(args.pred_files))) scores_list = [] for path in args.pred_files: with open(path, 'rb') as infile: scores = pickle.load(infile) scores_list += [scores] print("Calculating ensembled predictions...") predictions = [] scores_by_examples = list(zip(*scores_list)) assert len(scores_by_examples) == len(data) for scores in scores_by_examples: pred = ensemble(scores) predictions += [pred] id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [id2label[p] for p in predictions] scorer.score(labels, predictions, verbose=True)
def _valid_epoch(self): self.model.eval() val_loss = 0 preds = [] labels = [] with torch.no_grad(): for idx, inputs in enumerate(self.valid_data_loader): org_idx = inputs[-2] inputs = [ item.to(self.device) for item in inputs if not isinstance(item, list) ] outputs, pooling_out = self.model(inputs[:-1]) val_loss += self.criterion(outputs, inputs[-1]) pred = F.softmax(outputs, 1).data.cpu().numpy().tolist() pred = np.argmax(pred, axis=1) preds.extend(pred) labels += inputs[-1].tolist() preds = [self.data_loader.id2label[pred] for pred in preds] labels = [self.data_loader.id2label[label] for label in labels] valid_p, valid_r, valid_f1 = scorer.score(labels, preds, verbose=True) self.logger.info( ' validation precision is : {:.3f}, validation recall is : {:.3f}, validation f1_macro is : {:.3f}, bset scores is : {}' .format(valid_p, valid_r, valid_f1, self.best_score)) return val_loss / len(self.valid_data_loader), valid_f1
def main(): args = parse_args() print("Loading data file...") filename = args.data_dir + '/{}.json'.format(args.dataset) with open(filename, 'r') as infile: data = json.load(infile, encoding='utf8') labels = [d['relation'] for d in data] # read predictions print("Loading {} prediction files...".format(len(args.pred_files))) scores_list = [] for path in args.pred_files: with open(path, 'rb') as infile: scores = pickle.load(infile) scores_list += [scores] print("Calculating ensembled predictions...") predictions = [] scores_by_examples = list(zip(*scores_list)) assert len(scores_by_examples) == len(data) for scores in scores_by_examples: pred = ensemble(scores) predictions += [pred] id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [id2label[p] for p in predictions] precision, recall, f1, mistake = scorer.score(labels, predictions, verbose=True) with open("logs.txt", 'a') as f: f.write("Temp:" + args.temp + '\tAlpha:' + args.alpha + '\n') f.write("Precision:%f\n" % precision) f.write("Recall:%f\n" % recall) f.write("F1:%f\n" % f1) f.write("-------------------\n")
def select_samples(model_p, model_s, dataset_infer, k_samples, args, default_distribution): max_upperbound = int(math.ceil(k_samples * args.selector_upperbound)) # predictor selection meta_idxs_p, confidence_idxs_p = model_p.retrieve( dataset_infer, len(dataset_infer)) # retrieve all the samples print("Infer on predictor: ") # Track performance of predictor alone gold, guess = [t[2] for t in meta_idxs_p[:k_samples] ], [t[1] for t in meta_idxs_p[:k_samples]] scorer.score(gold, guess, verbose=False) scorer.score(gold, guess, verbose=False, NO_RELATION="-1") # for self-training if args.integrate_method == "p_only": return split_samples(dataset_infer, meta_idxs_p[:k_samples], args.batch_size) # selector selection label_distribution = None if args.integrate_method == "s_only" or max_upperbound == 0: label_distribution = default_distribution else: label_distribution = get_relation_distribution( meta_idxs_p[:max_upperbound]) def s_retrieve_fn(k_samples, label_distribution): return model_s.retrieve(dataset_infer, k_samples, label_distribution=label_distribution) ori_meta_idxs_s, _ = s_retrieve_fn(k_samples, label_distribution) print("Infer on selector: ") gold, guess = [t[2] for t in ori_meta_idxs_s], [t[1] for t in ori_meta_idxs_s] scorer.score(gold, guess, verbose=False) scorer.score(gold, guess, verbose=False, NO_RELATION="-1") # If we only care about performance of selector if args.integrate_method == "s_only": return split_samples(dataset_infer, ori_meta_idxs_s) # integrate method if args.integrate_method == "intersection": meta_idxs, confidence_idxs_s = intersect_samples( meta_idxs_p, s_retrieve_fn, k_samples, label_distribution) else: raise NotImplementedError("integrate_method {} not implemented".format( args.integrate_method)) confidence_dict_p = dict( (id, confidence) for id, confidence in confidence_idxs_p) confidence_dict_s = dict( (id, confidence) for id, confidence in confidence_idxs_s) return split_samples(dataset_infer, meta_idxs, conf_p=confidence_dict_p, conf_s=confidence_dict_s)
def evaluate_model(evalparams): torch.manual_seed(evalparams.seed) random.seed(1234) if evalparams.cpu: evalparams.cuda = False elif evalparams.cud: torch.cuda.manual_seed(args.seed) # load opt print(evalparams.model_dir, evalparams.model) # model_file = evalparams.model_dir + "/" + evalparams.model model_file = 'best_model.pt' print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) model = RelationModel(opt) model.load(model_file) # load vocab vocab_file = evalparams.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt[ 'vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(evalparams.dataset) print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True) # save probability scores if len(evalparams.out) > 0: helper.ensure_dir(os.path.dirname(evalparams.out)) with open(evalparams.out, 'wb') as outfile: pickle.dump(all_probs, outfile) print("Prediction scores saved to {}.".format(evalparams.out)) print("Evaluation ended.") return (batch.gold(), predictions, model)
def intersect_samples(meta_idxs1, s_retrieve_fn, k_samples, prior_distribution): upperbound, meta_idxs, confidence_idxs_s = k_samples, [], [] while len(meta_idxs) < min(k_samples, len(meta_idxs1)): upperbound = math.ceil(1.25 * upperbound) ori_meta_idxs_s, confidence_idxs_s = s_retrieve_fn( upperbound, prior_distribution) meta_idxs = sorted( set(meta_idxs1[:upperbound]).intersection( set(ori_meta_idxs_s)))[:k_samples] if upperbound > k_samples * 30: # set a limit for growing upperbound break print("Infer on combination...") scorer.score([actual for _, _, actual in meta_idxs], [pred for _, pred, _ in meta_idxs], verbose=False) scorer.score([actual for _, _, actual in meta_idxs], [pred for _, pred, _ in meta_idxs], verbose=False, NO_RELATION="-1") return meta_idxs, confidence_idxs_s
def evaluate(model, dataset, evaluate_type='prf', verbose=False): rel_stoi, rel_itos = model.opt['rel_stoi'], model.opt['rel_itos'] iterator_test = data.Iterator(dataset=dataset, batch_size=model.opt['batch_size'], repeat=False, train=True, shuffle=False, sort=True, sort_key=lambda x: -len(x.token), sort_within_batch=False) if evaluate_type == 'prf': predictions = [] all_probs = [] golds = [] all_loss = 0 for batch in iterator_test: inputs, target = batch_to_input(batch, model.opt['vocab_pad_id']) preds, probs, loss = model.predict(inputs, target) predictions += preds all_probs += probs all_loss += loss golds += target.data.tolist() predictions = [rel_itos[p] for p in predictions] golds = [rel_itos[p] for p in golds] p, r, f1 = scorer.score(golds, predictions, verbose=verbose) return p, r, f1, all_loss elif evaluate_type == 'auc': logits, labels = [], [] for batch in iterator_test: inputs, target = batch_to_input(batch, model.opt['vocab_pad_id']) logits += model.predict(inputs)[0] labels += batch.relation.data.numpy().tolist() p, q = 0, 0 for rel in range(len(rel_itos)): if rel == rel_stoi['no_relation']: continue logits_rel = [logit[rel] for logit in logits] labels_rel = [1 if label == rel else 0 for label in labels] ranking = list(zip(logits_rel, labels_rel)) ranking = sorted(ranking, key=lambda x: x[0], reverse=True) logits_rel, labels_rel = zip(*ranking) p += scorer.AUC(logits_rel, labels_rel) q += 1 dev_auc = p / q * 100 return dev_auc, None, None, None
def get_scores(data_file, opt, vocab, model): print( "Loading data from {} with batch size {}...".format( data_file, opt["batch_size"] ) ) batch = DataLoader(data_file, opt["batch_size"], opt, vocab, evaluation=True) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, attn_weights, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] # print("predictions") # for a, b in zip(batch.gold(), predictions): # print(f"{a:<28} {b:<28}") p, r, f1 = scorer.score(batch.gold(), predictions, verbose=False) return p, r, f1
for i, b in enumerate(batch_iter): preds, probs, loss = trainer.predict(b) predictions += preds all_probs += probs losses += loss prob_a = np.array(all_probs) prob_list.append(prob_a) #prob_all = (prob_list[0] + prob_list[1] + prob_list[2] + prob_list[3] + prob_list[4])/5 prob_all = (prob_list[0] + prob_list[1] + prob_list[2] + prob_list[3]) / 4 # pa_tree_file = 'saved_models/other_method/pa_tree_pro.npy' # pa_tree_pro = np.load(pa_tree_file) # # prob_all = 0.25 * pa_tree_pro + 0.75 * prob_all prob_all = torch.from_numpy(prob_all) pre_out = np.argmax(prob_all.data.cpu().numpy(), axis=1).tolist() label_out = [id2label[p] for p in pre_out] p, r, f1 = scorer.score(batch.gold(), label_out, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format( args.dataset, p, r, f1)) # losses = losses / len(batch_iter) # print(losses) print("Evaluation ended.") print('a')
duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch,\ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") dev_predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = trainer.predict(batch) dev_predictions += preds dev_loss += loss dev_predictions = [id2label[p] for p in dev_predictions] dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] dev_metrics, _ = scorer.score(dev_batch.gold(), dev_predictions) dev_p, dev_r, dev_f1 = dev_metrics['precision'], dev_metrics[ 'recall'], dev_metrics['f1'] print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}". format(epoch, train_loss, dev_loss, dev_f1)) dev_score = dev_f1 file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) current_dev_metrics = {'f1': dev_f1, 'precision': dev_p, 'recall': dev_r} # eval on test print("Evaluating on test set...") test_predictions = [] for i, batch in enumerate(test_batch): preds, _, loss = trainer.predict(batch)
print(format_str.format(datetime.now(), global_step, max_steps, epoch,\ opt['num_epoch'], loss, duration, current_lr)) # eval on train print("Evaluating on train set...") train_predictions = [] train_eval_loss = 0 for i, batch in enumerate(train_batch): preds, _, loss = trainer.predict(batch) train_predictions += preds train_eval_loss += loss train_predictions = [id2label[p] for p in train_predictions] train_eval_loss = train_eval_loss / train_batch.num_examples * opt[ 'batch_size'] train_p, train_r, train_f1 = scorer.score(train_batch.gold(), train_predictions) print( "epoch {}: train_loss = {:.6f}, train_eval_loss = {:.6f}, dev_f1 = {:.4f}" .format(epoch, train_loss, train_eval_loss, train_f1)) train_score = train_f1 # file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(epoch, train_loss, train_eval_loss, train_f1)) # eval on dev print("Evaluating on dev set...") dev_predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = trainer.predict(batch) dev_predictions += preds dev_loss += loss dev_predictions = [id2label[p] for p in dev_predictions]
data_file = opt['data_dir'] + f'/test.json' print("Loading data from {} with batch size {}...".format( data_file, opt['batch_size'])) batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for i, b in enumerate(batch): preds, probs, _ = model.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] metrics, other_data = scorer.score(batch.gold(), predictions, verbose=True) p = metrics['precision'] r = metrics['recall'] f1 = metrics['f1'] wrong_indices = other_data['wrong_indices'] correct_indices = other_data['correct_indices'] wrong_predictions = other_data['wrong_predictions'] raw_data = np.array(batch.raw_data) wrong_data = raw_data[wrong_indices] correct_data = raw_data[correct_indices] wrong_ids = [d['id'] for d in wrong_data] correct_ids = [d['id'] for d in correct_data]
opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = trainer.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\ train_loss, dev_loss, dev_f1)) dev_score = dev_f1 file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") file_logger.log("new best model saved at epoch {}: {:.2f}\t{:.2f}\t{:.2f}"\ .format(epoch, dev_p*100, dev_r*100, dev_score*100))
print("Loading model from {}".format(model_file)) opt = torch_utils.load_config(model_file) model = RelationModel(opt) model.load(model_file) # load vocab vocab_file = args.model_dir + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) assert opt['vocab_size'] == vocab.size, "Vocab size must match that in the saved model." # load data data_file = opt['data_dir'] + '/{}.json'.format(args.dataset) print("Loading data from {} with batch size {}...".format(data_file, opt['batch_size'])) eval_batch = BatchLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) id2label = dict([(v,k) for k,v in constant.LABEL_TO_ID.items()]) predictions = [] all_probs = [] for batch in eval_batch.data: preds, probs, _ = model.predict(batch) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(eval_batch.gold(), predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(args.dataset,p,r,f1)) print("Evaluation ended.")
def transre_search(ffn, connect, hidden_dim, trans_layers, multi_heads, ffn_ex_size, initial, final): opt['weighted'] = False opt['rnn'] = False opt['ffn'] = ffn opt['connect'] = connect opt['hidden_dim'] = hidden_dim opt['trans_layers'] = trans_layers opt['multi_heads'] = multi_heads opt['ffn_ex_size'] = ffn_ex_size opt['initial'] = initial opt['final'] = final id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_name =str (opt['optim']) + '_' + str (opt['lr']) + str (ffn) + '_' +str(connect)+"_"\ + str (hidden_dim) + '_' + str (trans_layers) + '_' + str (multi_heads) + '_' + \ str (ffn_ex_size)+'_'+str(initial)+'_'+str(final) model_name = model_name + '' + str(opt['memo']) model_name = str(id) + "_" + model_name model_save_dir = opt['save_dir'] + '/' + model_name opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_score\tbest_dev_score") helper.print_config(opt) if not opt['load']: trainer = TransTrainer(opt, emb_matrix=emb_matrix) else: # load pre-train model model_file = opt['model_file'] print("Loading model from {}".format(model_file)) model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] trainer = TransTrainer(model_opt) trainer.load(model_file) id2label = dict([(v, k) for k, v in label2id.items() ]) # the classification result dev_score_history = [] dev_loss_history = [] current_lr = opt['lr'] global_step = 0 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] best_result = "unknown" file_logger.log(str(opt['memo'])) for epoch in range(1, opt['num_epoch'] + 1): train_loss = 0 epoch_start_time = time.time() for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss, norm = trainer.update(batch) train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss, _ = trainer.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] acc, dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}" .format(epoch, train_loss, dev_loss, dev_f1)) dev_score = dev_f1 file_logger.log("{}\t{:.3f}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, acc, train_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') best_result = (model_name, dev_score) print("new best model saved.") file_logger.log( "new best model saved at epoch {}: {:.2f}\t{:.2f}\t{:.2f}". format(epoch, dev_p * 100, dev_r * 100, dev_score * 100)) if epoch % opt['save_epoch'] != 0: os.remove(model_file) # lr schedule if len(dev_score_history ) > opt['decay_epoch'] and dev_score <= dev_score_history[ -1] and opt['optim'] in ['sgd', 'adagrad', 'adadelta']: current_lr *= opt['lr_decay'] trainer.update_lr(current_lr) dev_score_history += [dev_score] dev_loss_history += [dev_loss] epoch_end_time = time.time() print("epoch time {:.3f}".format(epoch_end_time - epoch_start_time)) return best_result
batch = DataLoader(data_file, opt['batch_size'], opt, vocab, evaluation=True) helper.print_config(opt) label2id = constant.LABEL_TO_ID id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] all_probs = [] batch_iter = tqdm(batch) for i, b in enumerate(batch_iter): preds, probs, _ = trainer.predict(b) predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(batch.gold(), predictions) fjson = open(data_file, 'r') origin_data = json.load(fjson) fjson.close() with open("eval_output.txt", 'a') as f: f.write("True Label\tPrediction\tSubject\tObject\tSentence") for i in range(len(predictions)): if batch.gold()[i] != predictions[i]: ss = origin_data[i]['subj_start'] se = origin_data[i]['subj_end'] os = origin_data[i]['obj_start'] oe = origin_data[i]['obj_end'] token = origin_data[i]['token'] subj = " ".join(token[ss:ss + 1])
def train_unbiased_model(args, biased_batch_probs): # make opt opt = vars(args) opt["num_class"] = len(constant.LABEL_TO_ID) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt["data_dir"], opt["batch_size"])) train_batch = DataLoader( opt["data_dir"] + "/" + args.data_name, opt["batch_size"], opt, vocab, evaluation=False, ) dev_batch = DataLoader(opt["data_dir"] + "/dev.json", opt["batch_size"], opt, vocab, evaluation=True) model_id = opt["id"] if len(opt["id"]) > 1 else "0" + opt["id"] model_save_dir = opt["save_dir"] + "/" + model_id opt["model_save_dir"] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + "/config.json", verbose=True) vocab.save(model_save_dir + "/vocab.pkl") file_logger = helper.FileLogger( model_save_dir + "/" + opt["log"], header="# epoch\ttrain_loss\tdev_loss\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt["lr"] global_step = 0 global_start_time = time.time() format_str = ( "{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}" ) max_steps = len(train_batch) * opt["num_epoch"] # start training for epoch in range(1, opt["num_epoch"] + 1): train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch, torch.tensor(biased_batch_probs[i]).cuda()) train_loss += loss if global_step % opt["log_step"] == 0: duration = time.time() - start_time print( format_str.format( datetime.now(), global_step, max_steps, epoch, opt["num_epoch"], loss, duration, current_lr, )) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) f = open("label.txt", "w+") f.write(str(dev_batch.gold())) f.close() train_loss = (train_loss / train_batch.num_examples * opt["batch_size"] ) # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt["batch_size"] print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}" .format(epoch, train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_f1)) # save model_file = model_save_dir + "/checkpoint_epoch_{}.pt".format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + "/best_model.pt") print("new best model saved.") if epoch % opt["save_epoch"] != 0: os.remove(model_file) # lr schedule if (len(dev_f1_history) > 10 and dev_f1 <= dev_f1_history[-1] and opt["optim"] in ["sgd", "adagrad"]): current_lr *= opt["lr_decay"] model.update_lr(current_lr) dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
inputs['words'], inputs['length'] = batch.token inputs['pos'] = batch.pos inputs['ner'] = batch.ner inputs['subj_pst'] = batch.subj_pst inputs['obj_pst'] = batch.obj_pst inputs['masks'] = torch.eq(batch.token[0], opt['vocab_pad_id']) target = batch.relation preds, _, loss = model.predict(inputs, target) predictions += preds dev_loss += loss golds += target.data.tolist() predictions = [RELATION.vocab.itos[p] for p in predictions] golds = [RELATION.vocab.itos[p] for p in golds] dev_p, dev_r, dev_f1 = scorer.score(golds, predictions) # print training information train_loss = train_loss / len(iterator_train) * opt['batch_size'] # avg loss per batch dev_loss = dev_loss / len(iterator_dev) * opt['batch_size'] print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\ train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format(epoch, train_loss, dev_loss, dev_f1)) # save the current model model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0:
helper.print_config(opt) label2id = constant.LABEL_TO_ID id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] all_probs = [] all_ids = [] for i, b in enumerate(loaded): preds, probs, _, ids = trainer.predict_with_confidence(b) predictions += preds all_probs += probs all_ids += ids predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(loaded.gold(), predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format( args.dataset, p, r, f1)) if args.trace_file != None: print(f'Creating trace file "{args.trace_file}"') with open(args.trace_file, 'w', encoding='utf-8', newline='') as trace_file: csv_writer = csv.writer(trace_file) csv_writer.writerow(['id', 'gold', 'predicted', 'probability']) for id, gold, prediction, probability in zip(all_ids, loaded.gold(), predictions, all_probs): csv_writer.writerow([id, gold, prediction, probability])
sent_predictions = [] batch_iter = tqdm(batch) for i, b in enumerate(batch_iter): preds, probs, _, sent_preds = trainer.predict(b) predictions += preds all_probs += probs sent_predictions += sent_preds lens = [len(p) for p in predictions] predictions = [[id2label[l + 1]] for p in predictions for l in p] sent_predictions = [sent_id2label[p] for p in sent_predictions] #print(len(predictions)) #print(len(batch.gold())) p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True, verbose_output=args.per_class == 1) print('scroes from sklearn: ') macro_f1 = f1_score(batch.gold(), predictions, average='macro') micro_f1 = f1_score(batch.gold(), predictions, average='micro') macro_p = precision_score(batch.gold(), predictions, average='macro') micro_p = precision_score(batch.gold(), predictions, average='micro') macro_r = recall_score(batch.gold(), predictions, average='macro') micro_r = recall_score(batch.gold(), predictions, average='micro') print('micro scores: ') print('micro P: ', micro_p) print('micro R: ', micro_r) print('micro F1: ', micro_f1) print("") print("macro scroes: ")
model_opt = torch_utils.load_config(model_file) model_opt['optim'] = opt['optim'] model_opt['lr'] = opt['lr'] model_opt['lr_decay'] = opt['lr_decay'] trainer = GCNTrainer(model_opt) trainer.load(model_file) # model_file = "saved_models/02/" + subj + "_" + obj + "_" + "best_model.pt" print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, probs, loss, samples = trainer.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch, predictions) test_loss = 0 predictions = [] for i, batch in enumerate(test_batch): preds, _, loss, samples = trainer.predict(batch) predictions += preds test_loss += loss predictions = [id2label[p] for p in predictions] test_loss = test_loss / test_batch.num_examples * opt['batch_size'] test_p, test_r, test_f1 = scorer.score(test_batch, predictions) score_history += [dev_f1] test_score_history += [test_f1] stand = 3 for epoch in range(1, opt['num_epoch'] + 1): # if (not train_batch.NoAugData()):
def main(): # set top-level random seeds torch.manual_seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if args.cpu: args.cuda = False elif args.cuda: # force random seed for reproducibility # also apply same seed to numpy in every file torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # make opt opt = vars(args) opt['num_class'] = len(constant.LABEL_TO_ID) # load vocab vocab_file = opt['vocab_dir'] + '/vocab.pkl' vocab = Vocab(vocab_file, load=True) # in some previous experiments we saw that lower vocab size can improve performance # but it was in a completely different project although on the same data # here it seems it's much harder to get this to work # uncomment the following line if this is solved: # new_vocab_size = 30000 opt['vocab_size'] = vocab.size emb_file = opt['vocab_dir'] + '/embedding.npy' emb_matrix = np.load(emb_file) assert emb_matrix.shape[0] == vocab.size assert emb_matrix.shape[1] == opt['emb_dim'] # load data print("Loading data from {} with batch size {}...".format( opt['data_dir'], opt['batch_size'])) train_batch = DataLoader(opt['data_dir'] + '/train.json', opt['batch_size'], opt, vocab, evaluation=False) dev_batch = DataLoader(opt['data_dir'] + '/dev.json', opt['batch_size'], opt, vocab, evaluation=True) model_id = opt['id'] if len(opt['id']) > 1 else '0' + opt['id'] model_save_dir = opt['save_dir'] + '/' + model_id opt['model_save_dir'] = model_save_dir helper.ensure_dir(model_save_dir, verbose=True) # save config helper.save_config(opt, model_save_dir + '/config.json', verbose=True) vocab.save(model_save_dir + '/vocab.pkl') file_logger = helper.FileLogger( model_save_dir + '/' + opt['log'], header="# epoch\ttrain_loss\tdev_loss\tdev_p\tdev_r\tdev_f1") # print model info helper.print_config(opt) # model model = RelationModel(opt, emb_matrix=emb_matrix) id2label = dict([(v, k) for k, v in constant.LABEL_TO_ID.items()]) dev_f1_history = [] current_lr = opt['lr'] global_step = 0 format_str = '{}: step {}/{} (epoch {}/{}), loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}' max_steps = len(train_batch) * opt['num_epoch'] # setup the scheduler for lr decay # this doesn't seem to work well compared to what we already have # scheduler = ReduceLROnPlateau(model.optimizer, mode='min', factor=opt['lr_decay'], patience=1) # start training for epoch in range(1, opt['num_epoch'] + 1): # TODO: if lr warmup is used, the lr console output is not updated print( "Current params: " + " heads-" + str(opt["n_head"]) + " enc_layers-" + str(opt["num_layers_encoder"]), " drop-" + str(opt["dropout"]) + " scaled_drop-" + str(opt["scaled_dropout"]) + " lr-" + str(opt["lr"]), " lr_decay-" + str(opt["lr_decay"]) + " max_grad_norm-" + str(opt["max_grad_norm"])) print( " weight_no_rel-" + str(opt["weight_no_rel"]) + " weight_rest-" + str(opt["weight_rest"]) + " attn-" + str(opt["attn"]) + " attn_dim-" + str(opt["attn_dim"]), " obj_sub_pos-" + str(opt["obj_sub_pos"]) + " new_residual-" + str(opt["new_residual"])) print( " use_batch_norm-" + str(opt["use_batch_norm"]) + " relative_positions-" + str(opt["relative_positions"]), " decay_epoch-" + str(opt["decay_epoch"]) + " use_lemmas-" + str(opt["use_lemmas"]), " hidden_self-" + str(opt["hidden_self"])) train_loss = 0 for i, batch in enumerate(train_batch): start_time = time.time() global_step += 1 loss = model.update(batch) train_loss += float(loss) if global_step % opt['log_step'] == 0: duration = time.time() - start_time print( format_str.format(datetime.now(), global_step, max_steps, epoch, opt['num_epoch'], loss, duration, current_lr)) # do garbage collection, # as per https://discuss.pytorch.org/t/best-practices-for-maximum-gpu-utilization/13863/6 del loss # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += float(loss) del loss predictions = [id2label[p] for p in predictions] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions) train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print( "epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch, \ train_loss, dev_loss, dev_f1) ) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_p, dev_r, dev_f1)) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if epoch == 1 or dev_f1 > max(dev_f1_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.") if epoch % opt['save_epoch'] != 0: os.remove(model_file) # reduce learning rate if it stagnates by a certain decay rate and within given epoch patience # this for some reason works worth than the implementation we have afterwards # scheduler.step(dev_loss) if opt["optim"] != "noopt_adam" and opt["optim"] != "noopt_nadam": # do warm_up_for sgd only instead of adam do_warmup_trick = False if do_warmup_trick: # print("do_warmup_trick") # 1 and 5 first worked kind of # 10 and 15 current_lr = 10 * (360**(-0.5) * min(epoch**(-0.5), epoch * 15**(-1.5))) # print("current_lr", current_lr) model.update_lr(current_lr) else: # decay schedule # 15 is best! # simulate patience of x epochs if len(dev_f1_history ) > opt['decay_epoch'] and dev_f1 <= dev_f1_history[-1]: current_lr *= opt['lr_decay'] model.update_lr(current_lr) # else, update the learning rate in torch_utils.py dev_f1_history += [dev_f1] print("") print("Training ended with {} epochs.".format(epoch))
final_predictions, inst_predictions, aux_predictions = [], [], [] all_final_probs, all_inst_probs, all_aux_probs = [], [], [] for i, b in enumerate(batch): final_preds, inst_preds, aux_preds, final_probs, inst_probs, aux_probs = student_model.predict_all( b) final_predictions += final_preds inst_predictions += inst_preds aux_predictions += aux_preds all_final_probs += final_probs all_inst_probs += inst_probs all_aux_probs += aux_probs final_predictions = [id2label[p] for p in final_predictions] inst_predictions = [id2label[p] for p in inst_predictions] aux_predictions = [id2label[p] for p in aux_predictions] print('\n >> Final Prediction:') _, _, _ = scorer.score(batch.gold(), final_predictions, verbose=True) print('\n >> Instance Prediction:') _, _, _ = scorer.score(batch.gold(), inst_predictions, verbose=True) print('\n >> Auxiliary Prediction:') _, _, _ = scorer.score(batch.gold(), aux_predictions, verbose=True) # save probability scores # if len(args.out) > 0: # outfile = 'saved_models/' + args.model_id + '/' + args.out # with open(outfile, 'w') as fw: # for f_prob, i_prob, a_prob in zip(all_final_probs, all_inst_probs, all_aux_probs): # fw.write(json.dumps([round(p, 4) for p in f_prob])) # fw.write('\r\n') # fw.write(json.dumps([round(p, 4) for p in i_prob])) # fw.write('\r\n') # fw.write(json.dumps([round(p, 4) for p in a_prob]))
id2label = dict([(v, k) for k, v in label2id.items()]) predictions = [] all_probs = [] all_ids = [] batch_tuples = zip(*model_data) batch_tuple_iter = tqdm(batch_tuples) for i, data in enumerate(batch_tuple_iter): preds, probs, ids = evaluator.predict(data, cuda) predictions += preds all_probs += probs all_ids += ids predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(model_data[0].gold(), predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format( args.dataset, p, r, f1)) if args.trace_file_for_misses != None: print( f'Preparing miss information and writing it to "{args.trace_file_for_misses}"' ) with open(args.trace_file_for_misses, 'w', encoding='utf-8', newline='') as trace_file_for_misses: csv_writer = csv.writer(trace_file_for_misses) csv_writer.writerow(['id', 'gold', 'predicted']) for gold, prediction, id in zip(model_data[0].gold(), predictions, all_ids):
train_loss += loss if global_step % opt['log_step'] == 0: duration = time.time() - start_time print(format_str.format(datetime.now(), global_step, max_steps, epoch,\ opt['num_epoch'], loss, duration, current_lr)) # eval on dev print("Evaluating on dev set...") predictions = [] dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss = model.predict(batch) predictions += preds dev_loss += loss predictions = [id2label[p] for p in predictions] current_dev_metrics, _ = scorer.score(dev_batch.gold(), predictions) dev_f1 = current_dev_metrics['f1'] train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] print("epoch {}: train_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}". format(epoch, train_loss, dev_loss, dev_f1)) file_logger.log("{}\t{:.6f}\t{:.6f}\t{:.4f}".format( epoch, train_loss, dev_loss, dev_f1)) print("Evaluating on test set...") predictions = [] test_loss = 0 test_preds = [] for i, batch in enumerate(test_batch):
predictions = [] all_probs = [] golds = [] for i, batch in enumerate(iterator_test): inputs = {} inputs['words'], inputs['length'] = batch.token inputs['pos'] = batch.pos inputs['ner'] = batch.ner inputs['subj_pst'] = batch.subj_pst inputs['obj_pst'] = batch.obj_pst inputs['masks'] = torch.eq(batch.token[0], opt['vocab_pad_id']) target = batch.relation preds, probs, _ = model.predict(inputs, target) predictions += preds all_probs += probs golds += target.data.tolist() predictions = [RELATION.vocab.itos[p] for p in predictions] golds = [RELATION.vocab.itos[p] for p in golds] p, r, f1 = scorer.score(golds, predictions, verbose=True) # save probability scores if len(args.out) > 0: helper.ensure_dir(os.path.dirname(args.out)) with open(args.out, 'wb') as outfile: pickle.dump(all_probs, outfile) print("Prediction scores saved to {}.".format(args.out)) print("Evaluation ended.")
trainer.load(model_file) batch = DataLoader([data_file], opt['batch_size'], opt, vocab, evaluation=True, corefresolve=True) batch_iter = tqdm(batch) all_probs = [] samples = [] for i, b in enumerate(batch_iter): preds, probs, _, sample = trainer.predict(b) predictions += preds all_probs += probs # effsum+=lab_eff # lab_nums+=lab_num samples = samples + sample key += batch.gold() # with open('samples.json','w') as f: # json.dump(samples,f,indent=4) predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(batch, predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format( args.dataset, p, r, f1)) print("Evaluation ended.")
dev_loss = 0 for i, batch in enumerate(dev_batch): preds, _, loss, _ = trainer.predict(batch) predictions += preds dev_loss += loss predictions = [[id2label[l + 1]] for p in predictions for l in p] train_loss = train_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch train_sent_loss = train_sent_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch train_dep_path_loss = train_dep_path_loss / train_batch.num_examples * opt[ 'batch_size'] # avg loss per batch dev_loss = dev_loss / dev_batch.num_examples * opt['batch_size'] dev_p, dev_r, dev_f1 = scorer.score(dev_batch.gold(), predictions, method='macro') print("epoch {}: train_loss = {:.6f}, train_sent_loss = {:.6f}, train_dep_path_loss = {:.6f}, dev_loss = {:.6f}, dev_f1 = {:.4f}".format(epoch,\ train_loss, train_sent_loss, train_dep_path_loss, dev_loss, dev_f1)) dev_score = dev_f1 file_logger.log( "{}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.6f}\t{:.4f}\t{:.4f}".format( epoch, train_loss, train_sent_loss, train_dep_path_loss, dev_loss, dev_score, max([dev_score] + dev_score_history))) # save model_file = model_save_dir + '/checkpoint_epoch_{}.pt'.format(epoch) trainer.save(model_file, epoch) if epoch == 1 or dev_score > max(dev_score_history): copyfile(model_file, model_save_dir + '/best_model.pt') print("new best model saved.")
predictions = [] all_ids = [] for i, batch_tuple in enumerate(zip(*[model_stuff.data for model_stuff in models_stuff])): preds, ids = evaluator.predict(batch_tuple, cuda) all_ids += ids predictions += preds predictions = [id2label[p] for p in predictions] p, r, f1 = scorer.score(ud.data.gold(), predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format(args.dataset,p,r,f1)) if args.trace_file_for_misses != None: print(f'Preparing miss information and writing it to "{args.trace_file_for_misses}"') with open(args.trace_file_for_misses, 'w', encoding='utf-8', newline='') as trace_file_for_misses: csv_writer = csv.writer(trace_file_for_misses) csv_writer.writerow( ['id', 'gold', 'predicted']) for gold, prediction, id in zip(ud.data.gold(), predictions, all_ids): if gold != prediction: csv_writer.writerow( [id, gold, prediction])
# for k in range(attn_list[layer][bat,:,:,:].size(0)): # head =k # attn_mat = attn_list[layer][bat,:,:,:][head,:,:] # token_id = b[0][bat,:].data.cpu().numpy() # token = viz_token(token_id,id2word) # name = "layer"+str(layer)+"_"+"head"+str(head) # #print(token) # label =b[13][bat] # viz_att(token,attn_mat.data.cpu().numpy(),name,label.data.cpu().numpy()) # print(name+".svg saved") predictions += preds all_probs += probs predictions = [id2label[p] for p in predictions] acc, p, r, f1 = scorer.score(batch.gold(), predictions, verbose=True) print("{} set evaluate result: {:.2f}\t{:.2f}\t{:.2f}".format( args.dataset, p, r, f1)) print("Evaluation ended.") # args.out = "./new_trans.pkl" # with open(args.out, 'wb') as outfile: # pickle.dump(all_probs, outfile) # print("Prediction scores saved to {}.".format(args.out)) # predictions = [] # all_probs = [] # for i, b in enumerate (batch_iter): # preds, probs, _ = trainer2.predict (b) #