def main(): isconcat = True #isconcat =False #modelname = 'lambdanet-b512-model.h5' modelname = 'lambdanet-b512-l01-model.h5' #modelname = 'lambdanet-b512-windows-model.h5' #predfilename='lamdbanet-b512-pred.dat' #acc1:76 lambdanet max poolingresult #predfilename = 'lambdanet-b512-pred.dat' # acc1:60 lambdanet avg pooling result #predfilename = 'rank-pred.dat'#acc1:65 svm result predfilename = 'lambdanet-b512-l01-pred.dat' #acc1:74 lambdanet max poolingresult #predfilename = 'lambdanet-b512-windows-pred.dat' stage = 3 if stage <= 0: utils.prepare_data(vocab_size) if stage <= 1: utils.split_data(n=5) #utils.split_data(n=10) if stage <= 2: #train(n=5,isconcat=isconcat,modelname=modelname) train_lambda(n=5, isconcat=isconcat, modelname=modelname) #train_lambda(n=10, isconcat=isconcat, modelname=modelname) if stage <= 3: predict(n=5, isconcat=isconcat, modelname=modelname, predfilename=predfilename) if stage <= 4: utils.calc_metric(n=5, predfilename=predfilename) utils.calc_metric_method(n=5, predfilename=predfilename)
def eval(model, iterator, fname, write): model.eval() words_all, triggers_all, triggers_hat_all = [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): tokens_2d, triggers_2d, entities_3d, postags_2d, adj, seqlen_1d, words, triggers = batch trigger_logits, trigger_hat_2d = model.predict_triggers( tokens_2d=tokens_2d, entities_3d=entities_3d, postags_2d=postags_2d, seqlen_1d=seqlen_1d, adjm=adj) words_all.extend(words) triggers_all.extend(triggers) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) triggers_true, triggers_pred = [], [] with open('temp', 'w') as fout: for i, (words, triggers, triggers_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(i, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(i, *item) for item in find_triggers(triggers_hat)]) for w, t, t_h in zip(words, triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) fout.write("\n") print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[trigger identification]') triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p_, trigger_r_, trigger_f1_) final = fname if write: with open(final, 'w') as fout: result = open("temp", "r").read() fout.write("{}\n".format(result)) fout.write(metric) os.remove("temp") return metric
def main(): stage = 1 if stage <= 0: convert_to_svm_format() if stage <= 1: split_data(cross_project=True, n=5) if stage <= 2: cross_validation(n=5) if stage <= 3: utils.calc_metric(n=5)
def eval(model, iterator, fname): model.eval() Words, Is_heads, Triggers, Y, Y_hat = [], [], [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, triggers_y_2d, arguments_y_2d, seqlens_1d, is_heads_2d, words_2d, triggers_2d = batch _, _, y_hat = model(tokens_x_2d, entities_x_3d, triggers_y_2d) Words.extend(words_2d) Is_heads.extend(is_heads_2d) Triggers.extend(triggers_2d) Y.extend(triggers_y_2d) Y_hat.extend(y_hat.cpu().numpy().tolist()) # save with open('temp', 'w') as fout: for words, is_heads, triggers, y_hat in zip(Words, Is_heads, Triggers, Y_hat): y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1] preds = [idx2trigger[hat] for hat in y_hat] assert len(preds) == len(words) == len(triggers), \ 'len(preds)={}, len(words.split())={}, len(triggers.split())={}'.format(len(preds), len(words.split()), len(triggers.split())) for w, t, p in zip(words[1:-1], triggers[1:-1], preds[1:-1]): fout.write(f"{w}\t{t}\t{p}\n") fout.write("\n") y_true, y_pred = [], [] with open('temp', 'r') as fout: lines = fout.read().splitlines() for line in lines: if len(line) > 0: y_true.append(trigger2idx[line.split('\t')[1]]) y_pred.append(trigger2idx[line.split('\t')[2]]) precision, recall, f1 = calc_metric(y_true, y_pred) if f1 > 0.69: final = fname + ".P%.2f_R%.2f_F%.2f" % (precision, recall, f1) with open(final, 'w') as fout: result = open("temp", "r").read() fout.write(f"{result}\n") os.remove("temp") print('[classification]\t\tP={:.3f}\tR={:.3f}\tF1={:.3f}'.format(precision, recall, f1)) y_true = list(map(lambda x: 2 if x >= 2 else x, y_true)) y_pred = list(map(lambda x: 2 if x >= 2 else x, y_pred)) precision, recall, f1 = calc_metric(y_true, y_pred) print('[identification]\t\tP={:.3f}\tR={:.3f}\tF1={:.3f}'.format(precision, recall, f1))
def find_threshold(threshold, _file_name_list, _label_list, _mse_list): all_info = [] thresh_result = [] for i, file_name in enumerate(_file_name_list): label = _label_list[i] mse = _mse_list[i] if _mse_list[i] >= threshold: thresh_result.append(1) all_info.append([file_name, label, mse, 1]) else: thresh_result.append(0) all_info.append([file_name, label, mse, 0]) res = calc_metric(_label_list, thresh_result) try: precision = res['tp'] / (res['tp'] + res['fp']) recall = res['tp'] / (res['tp'] + res['fn']) f1 = 2 * precision * recall / (precision + recall) return f1 except ZeroDivisionError: return np.nan
def val_epoch(model, criterion, val_dataloader, threshold=0.5): model.eval() f1_meter, loss_meter, it_count = 0, 0, 0 acc_meter, f1_meter, f2_meter, g2_meter = 0, 0, 0, 0 cm_meter = 0 with torch.no_grad(): for inputs, target in val_dataloader: inputs = inputs.to(device) target = target.to(device) output = model(inputs) loss = criterion(output, target) loss_meter += loss.item() it_count += 1 output = torch.sigmoid(output) # f1 = utils.calc_f1(target, output, threshold) # f1_meter += f1 # acc ,f1 ,f2 ,g2 = utils.calc_metric(target, output,threshold) acc, f1, f2, g2, cm = utils.calc_metric(target, output, threshold) acc_meter += acc f1_meter += f1 f2_meter += f2 g2_meter += g2 cm_meter += cm return loss_meter / it_count, acc_meter / it_count, f1_meter / it_count, f2_meter / it_count, g2_meter / it_count, cm_meter / it_count
def compare(pre_path, gt_path): print('{} v.s. {}'.format(pre_path, gt_path)) results = calc_metric(pre_path, gt_path) avg_results = 'mse[{}]_psnr[{}]_ssim[{}]'.format(*results.mean(axis=0)) np.save('{}_{}.npy'.format(pre_path, avg_results), results) print('\t' + avg_results)
def eval(self, input, input_chr, labels): """ Evalutates the model using Accuracy, Precision, Recall and F1 metrics. :param input: Input of shape [batch_size, timestep, vector_dim] :param :return: """ if input.size == 0: return None logging.info("Evaluating on the validation set...") num_batches = input_chr.shape[0] // self.batch_size input, input_chr, labels = utils.shuffle_data( [input, input_chr, labels]) acc, prec, rec, f1, loss_sum = 0, 0, 0, 0, 0 for b in range(num_batches): word_1 = input[b * self.batch_size:(b + 1) * self.batch_size][:, 0] char_1 = input_chr[b * self.batch_size:(b + 1) * self.batch_size][:, 0] word_2 = input[b * self.batch_size:(b + 1) * self.batch_size][:, 1] char_2 = input_chr[b * self.batch_size:(b + 1) * self.batch_size][:, 1] label = labels[b * self.batch_size:(b + 1) * self.batch_size] loss, pred = self.sess.run( [self.loss, self.softmax], { self.word_embedding_input_1: word_1, self.chr_embedding_input_1: char_1, self.word_embedding_input_2: word_2, self.chr_embedding_input_2: char_2, self.labels: label }) # Update metric a, p, r, f = utils.calc_metric(np.argmax(pred, axis=1), np.argmax(label, axis=1)) acc += a prec += p rec += r f1 += f loss_sum += loss logging.info("Accuracy {:.3f}%".format(acc / num_batches * 100)) logging.info("Weighted Macro Precision {:.3f}%".format( prec / num_batches * 100)) logging.info("Weighted Macro Recall {:.3f}%".format(rec / num_batches * 100)) logging.info(" Weighted Macro F1 {:.3f}%".format(f1 / num_batches * 100)) logging.info("Average loss {:.5f}\n".format(loss_sum / num_batches)) return loss_sum / num_batches
def evaluation(sess, model): ano_scores = [] for _, batch_data in DataInput(x, test_batch_size): _ano_score = model.eval(sess, batch_data) # Extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) # Calculate auc auroc = calc_auroc(y, ano_scores) print('Eval_auroc:{:.4f}'.format(auroc)) prec, rec, f1 = calc_metric(y, ano_scores) print('Prec:{:.4f}\tRec:{:.4f}\tF1:{:.4f}\n'.format(prec, rec, f1)) draw_prc(y, ano_scores, key='ResDEAAE_' + 'cross-e')
def eval_token_level_all(arguments_true, arguments_pred): """arguments_true.append((i, t_type_str, a_start, a_end, a_type_idx))""" new_argu_true = [] new_argu_pred = [] for item in arguments_true: i, t_type_str, a_start, a_end, a_type_idx = item for index in range(a_start + 1, a_end + 1): new_argu_true.append((i, t_type_str, index, a_type_idx)) for item in arguments_pred: i, t_type_str, a_start, a_end, a_type_idx = item for index in range(a_start + 1, a_end + 1): new_argu_pred.append((i, t_type_str, index, a_type_idx)) p, r, f = calc_metric(new_argu_true, new_argu_pred) print('Precison = {}\n Recall = {}\n F1 = {}\n'.format(p, r, f))
def eval(self, input, input_chr, labels): """ Evalutates the model using Accuracy, Precision, Recall and F1 metrics. :param input: Input of shape [batch_size, timestep, vector_dim] :param :return: """ pred = input acc, prec, rec, f1 = utils.calc_metric(np.argmax(pred, axis=1), np.argmax(labels, axis=1)) logging.info("Accuracy {:.3f}%".format(acc * 100)) logging.info("Macro Precision {:.3f}%".format(prec * 100)) logging.info("Macro Recall {:.3f}%".format(rec * 100)) logging.info("Macro F1 {:.3f}%\n".format(f1 * 100))
def evaluation(sess, model, ratio): (sub_ano, sub_ano_label), _ = _split_dataset(ano, ano_label, mapping_ratio[ratio]) x = np.concatenate((norm, sub_ano), axis=0) y = np.concatenate((norm_label, sub_ano_label), axis=0) ano_scores = [] for _, batch_data in DataInput(x, test_batch_size): _ano_score = model.eval(sess, batch_data) # Extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) # Calculate auc auroc = calc_auroc(y, ano_scores) print('Anomaly ratio:{:.4f}\tEval_auroc:{:.4f}'.format(ratio, auroc)) prec, rec, f1 = calc_metric(y, ano_scores) print('Prec:{:.4f}\tRec:{:.4f}\tF1:{:.4f}\n'.format(prec, rec, f1))
def evaluation(sess, model): ano_scores = [] for _, batch_data in DataInput(x, test_batch_size): _ano_score = model.eval(sess, batch_data) # Extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) with open('scores.pkl', 'wb') as f: pickle.dump((y, ano_scores), f, pickle.HIGHEST_PROTOCOL) # Calculate auc auroc = calc_auroc(y, ano_scores) print('Eval_auroc:{:.4f}'.format(auroc)) prec, rec, f1 = calc_metric(y, ano_scores) print('Prec:{:.4f}\tRec:{:.4f}\tF1:{:.4f}\n'.format(prec, rec, f1)) draw_prc(y, ano_scores, key='DEAAE_' + method)
def _eval(sess, model, test_data, label): ano_scores = [] for _, batch_data in DataInput(test_data, test_batch_size): _ano_score = model.eval(sess, batch_data) # Extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) # Calculate auroc auroc = calc_auroc(label, ano_scores) # Calculate metric prec, rec, f1 = calc_metric(label, ano_scores) global best_auroc if best_auroc < auroc: best_auroc = auroc model.save(sess, '{}/ckpt'.format(save_path)) return auroc, prec, rec, f1
def find_outer_thresh(_gt, _mse_pred): mse_min = 0 mse_max = 100 mse_grid = range(mse_min, mse_max) f1s_list = [] for grid in mse_grid: tp = 0 fn = 0 tn = 0 fp = 0 tmp_pred = [] tmp_gt = [] for i in range(len(_mse_pred)): for j, tmp in enumerate(_mse_pred[i]): if tmp > grid: tmp_pred.append(1) else: tmp_pred.append(0) tmp_gt.append(_gt[i][j]) res = calc_metric(tmp_gt, tmp_pred) tp += res['tp'] fn += res['fn'] tn += res['tn'] fp += res['fp'] try: precision = res['tp'] / (res['tp'] + res['fp']) recall = res['tp'] / (res['tp'] + res['fn']) f1 = 2 * precision * recall / (precision + recall) f1s_list.append(f1) except ZeroDivisionError: f1s_list.append(np.nan) return list(mse_grid)[int(np.nanargmax(f1s_list))]
def _eval(sess, model, test_data, label): ano_scores = [] for _, batch_test_data in DataInput(test_data, test_batch_size): _ano_score, _, _ = model.eval(sess, batch_test_data) # Extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) # Highest 80% are anomalous prec, rec, f1 = calc_metric(label, ano_scores) # Calculate auprc _auprc = calc_auc(label, ano_scores) global best_f1 if best_f1 < f1: best_f1 = f1 global best_auprc if best_auprc < _auprc: best_auprc = _auprc model.save(sess, '{}/ckpt'.format(save_path)) return prec, rec, f1, _auprc
def train_epoch(model, optimizer, criterion, train_dataloader, show_interval=10): model.train() f1_meter, loss_meter, it_count = 0, 0, 0 acc_meter, f1_meter, f2_meter, g2_meter = 0, 0, 0, 0 cm_meter = 0 for inputs, target in train_dataloader: inputs = inputs.to(device) target = target.to(device) # zero the parameter gradients optimizer.zero_grad() # forward output = model(inputs) loss = criterion(output, target) loss.backward() optimizer.step() loss_meter += loss.item() it_count += 1 #f1 = utils.calc_f1(target, torch.sigmoid(output)) #f1_meter += f1 # acc, f1, f2, g2 = utils.calc_metric(target, torch.sigmoid(output)) acc, f1, f2, g2, cm = utils.calc_metric(target, torch.sigmoid(output)) acc_meter += acc f1_meter += f1 f2_meter += f2 g2_meter += g2 cm_meter += cm if it_count != 0 and it_count % show_interval == 0: print("%d,loss:%.3e acc:%.3f f1:%.3f f2:%.3f g2:%.3f cm:%.3f " % (it_count, loss.item(), acc, f1, f2, g2, cm)) return loss_meter / it_count, acc_meter / it_count, f1_meter / it_count, f2_meter / it_count, g2_meter / it_count, cm_meter / it_count
def run_train(ps_hosts, worker_hosts, job_name, task_index, model_f, data_path, output_path, param_path): # ====================================== # Variables ps_hosts = ps_hosts.split(",") worker_hosts = worker_hosts.split(",") param_dict = load_json(param_path) # Create a cluster from the parameter server and worker hosts. cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) # Create and start a server for the local task. server = tf.train.Server(cluster, job_name=job_name, task_index=task_index) if job_name == "ps": server.join() elif job_name == "worker": # Load Data (X_train, Y_train, X_valid, Y_valid, _, _) = read_data(data_path, param_dict['train_ratio'], param_dict['valid_ratio']) print("=" * 30) print("X_train shape: {}".format(X_train.shape)) print("Y_train shape: {}".format(Y_train.shape)) print("X_valid shape: {}".format(X_valid.shape)) print("Y_valid shape: {}".format(Y_valid.shape)) print("=" * 30) # Inference output dimension output_dim = len(Y_train[0]) # Check is_chief is_chief = task_index == 0 # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): # Build model... # Datasets train_X_dataset = tf.data.Dataset.from_tensor_slices(X_train) train_Y_dataset = tf.data.Dataset.from_tensor_slices(Y_train) train_dataset = tf.data.Dataset.zip( (train_X_dataset, train_Y_dataset)) train_dataset = train_dataset.shuffle( param_dict['dataset_shuffle_buffer_size']).batch( param_dict['batch_size']).repeat(param_dict['n_epoch']) if is_chief: valid_X_dataset = tf.data.Dataset.from_tensor_slices(X_valid) valid_Y_dataset = tf.data.Dataset.from_tensor_slices(Y_valid) valid_dataset = tf.data.Dataset.zip( (valid_X_dataset, valid_Y_dataset)) valid_dataset = valid_dataset.shuffle( param_dict['dataset_shuffle_buffer_size']).batch( param_dict['batch_size']) # Feedable Iterator handle = tf.placeholder(tf.string, shape=[]) iterator = tf.data.Iterator.from_string_handle( handle, train_dataset.output_types, train_dataset.output_shapes) # Iterators train_iterator = train_dataset.make_one_shot_iterator() train_handle_tensor = train_iterator.string_handle() if is_chief: valid_iterator = valid_dataset.make_initializable_iterator() valid_handle_tensor = valid_iterator.string_handle() X, Y = iterator.get_next() is_training = tf.placeholder_with_default(False, shape=None, name="is_training") global_step = tf.contrib.framework.get_or_create_global_step() logits = mlp(X=X, output_dim=output_dim, is_training=is_training, **param_dict['model_param']) Y_pred = slim.softmax(logits) loss = slim.losses.softmax_cross_entropy(logits, Y) accuracy, correct = calc_metric(Y, Y_pred) train_op = tf.train.AdamOptimizer( param_dict['learning_rate']).minimize(loss, global_step=global_step) tf.add_to_collection('X', X) tf.add_to_collection('Y_pred', Y_pred) #saved_model_tensor_dict = build_saved_model_graph(X, # Y_pred, # saved_model_path) # The StopAtStepHook handles stopping after running given steps. # hooks = [tf.train.StopAtStepHook(last_step=1000000)] # The MonitoredTrainingSession takes care of session initialization, # restoring from a checkpoint, saving to a checkpoint, and closing when done # or an error occurs. with tf.train.MonitoredTrainingSession( master=server.target, is_chief=is_chief, checkpoint_dir=output_path, # hooks=hooks, ) as mon_sess: # Get dataset handle train_handle = mon_sess.run(train_handle_tensor) valid_handle = mon_sess.run(valid_handle_tensor) # Metric window acc_window = [0.] * TRAIN_METRIC_WINDOW loss_window = [0.] * TRAIN_METRIC_WINDOW batch_i = 0 while not mon_sess.should_stop(): # Run a training step asynchronously. mon_sess.run(train_op, feed_dict={ is_training: True, handle: train_handle, }) if is_chief: train_accuracy, train_loss = mon_sess.run([accuracy, loss], feed_dict={ is_training: False, handle: train_handle, }) acc_window = acc_window[1:] + [train_accuracy] loss_window = loss_window[1:] + [train_loss] if batch_i % VERBOSE_INTERVAL == 0: recent_mean_train_accuracy = sum(acc_window) / len( acc_window) recent_mean_train_loss = sum(loss_window) / len( loss_window) valid_i = 0 valid_correct = 0 valid_loss = 0 valid_total_num = 0 mon_sess.run(valid_iterator.initializer) while True: try: (batch_Y_pred, batch_valid_correct, batch_valid_loss) = mon_sess.run( [Y_pred, correct, loss], feed_dict={ is_training: False, handle: valid_handle, }) curr_batch_num = batch_Y_pred.shape[0] valid_correct += batch_valid_correct.sum() valid_loss += batch_valid_loss * curr_batch_num valid_total_num += curr_batch_num valid_i += 1 except tf.errors.OutOfRangeError: break valid_accuracy = valid_correct / valid_total_num valid_loss = valid_loss / valid_total_num print("-" * 30) print("recent_mean_train_accuracy : {}".format( recent_mean_train_accuracy)) print("recent_mean_train_loss : {}".format( recent_mean_train_loss)) print("valid_accuracy : {}".format(valid_accuracy)) print("valid_loss : {}".format(valid_loss)) batch_i += 1
test_set = pickle.load(f) x_test, y_test = test_set print('test set', x_test.shape) with tf.Session() as sess: model = BiWGAN(input_dim, method, weight, degree) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) model.restore(sess, '{}/ckpt'.format(save_path)) ano_scores = [] for _, batch_test_data in DataInput(x_test, test_batch_size): _ano_score, _, _ = model.eval(sess, batch_test_data) # extend ano_scores += list(_ano_score) ano_scores = np.array(ano_scores).reshape((-1, 1)) # Highest 80% are anomalous prec, rec, f1 = calc_metric(y_test, ano_scores, percentile=80) # Calculate auc auprc = calc_auc(y_test, ano_scores) print('Prec:{:.4f} | Rec:{:.4f} | F1:{:.4f} | AUPRC:{:.4f}'.format( prec, rec, f1, auprc)) # draw prc curve # draw_prc(y_test, ano_scores)
def eval(model, iterator, fname): model.eval() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] with torch.no_grad(): # for i, batch in enumerate(iterator): for i, (test, labels) in enumerate(iterator): trigger_logits, trigger_entities_hat_2d, triggers_y_2d, argument_hidden_logits, arguments_y_1d, argument_hidden_hat_1d, argument_hat_2d, argument_keys = model( test, labels) words_all.extend(test[3]) triggers_all.extend(test[4]) triggers_hat_all.extend( trigger_entities_hat_2d.cpu().numpy().tolist()) arguments_2d = test[-1] arguments_all.extend(arguments_2d) if len(argument_keys) > 0: arguments_hat_all.extend(argument_hat_2d) else: batch_size = len(arguments_2d) argument_hat_2d = [{'events': {}} for _ in range(batch_size)] arguments_hat_all.extend(argument_hat_2d) triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] with open('temp', 'w', encoding='utf-8') as fout: for i, (words, triggers, triggers_hat, arguments, arguments_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger_entities[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true_, entities_true = find_triggers( triggers[:len(words)]) triggers_pred_, entities_pred = find_triggers(triggers_hat) triggers_true.extend([(i, *item) for item in triggers_true_]) triggers_pred.extend([(i, *item) for item in triggers_pred_]) # [(ith sentence, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)] for trigger in arguments['events']: t_start, t_end, t_type_str = trigger for argument in arguments['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_true.append( (t_type_str, a_start, a_end, a_type_idx)) for trigger in arguments_hat['events']: t_start, t_end, t_type_str = trigger if t_start >= len(words) or t_end >= len(words): continue for argument in arguments_hat['events'][trigger]: a_start, a_end, a_type_idx = argument if a_start >= len(words) or a_end >= len(words): continue arguments_pred.append( (t_type_str, a_start, a_end, a_type_idx)) for w, t, t_h in zip(words, triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) fout.write('#arguments#{}\n'.format(arguments['events'])) fout.write('#arguments_hat#{}\n'.format(arguments_hat['events'])) fout.write("\n") # print(classification_report([idx2trigger[idx] for idx in y_true], [idx2trigger[idx] for idx in y_pred])) print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[argument classification]') argument_p, argument_r, argument_f1 = calc_metric(arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p, argument_r, argument_f1)) print('[trigger identification]') triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) print('[argument identification]') arguments_true = [(item[0], item[1], item[2]) for item in arguments_true] arguments_pred = [(item[0], item[1], item[2]) for item in arguments_pred] argument_p_, argument_r_, argument_f1_ = calc_metric( arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p_, argument_r_, argument_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) metric += '[argument classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p, argument_r, argument_f1) metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p_, trigger_r_, trigger_f1_) metric += '[argument identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p_, argument_r_, argument_f1_) final = fname + ".P%.2f_R%.2f_F%.2f" % (trigger_p, trigger_r, trigger_f1) with open(final, 'w', encoding='utf-8') as fout: result = open("temp", "r", encoding='utf-8').read() fout.write("{}\n".format(result)) fout.write(metric) os.remove("temp") return metric, trigger_f1, argument_f1
gt, pred_pos, pred_mse = check_detect_result( video_frame, video_name, selected_threshold, experiment_type='test' ) #TODO: find threshold using validation set # # print(video_name) # print(gt) # print(pred_mse) # print(pred_pos) # print('*'*50) #print(len(gt)) for i in range(len(gt)): res = calc_metric(gt[i], pred_pos[i]) tp_pos += res['tp'] fn_pos += res['fn'] tn_pos += res['tn'] fp_pos += res['fp'] for i in range(len(gt)): tmp_mse = [] for j in range(len(pred_mse[i])): tmp_mse.append(pred_mse[i][j]) res = calc_metric(gt[i], tmp_mse) tp_mse += res['tp'] fn_mse += res['fn'] tn_mse += res['tn'] fp_mse += res['fp']
def eval(model, iterator, fname): model.eval() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): # tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d, \ pre_sent_tokens_x, next_sent_tokens_x, pre_sent_len, next_sent_len, maxlen = batch # maxlen = max(seqlens_1d) # pre_sent_len_max = max(pre_sent_len) # next_sent_len_max = max(next_sent_len) pre_sent_flags = [] next_sent_flags = [] pre_sent_len_mat = [] next_sent_len_mat = [] for i in pre_sent_len: tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i) pre_sent_flags.append(tmp) pre_sent_len_mat.append([i] * 768) for i in next_sent_len: tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i) next_sent_flags.append(tmp) next_sent_len_mat.append([i] * 768) # trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, trigger_logits, triggers_y_2d, trigger_hat_2d = model.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d, pre_sent_tokens_x=pre_sent_tokens_x, next_sent_tokens_x=next_sent_tokens_x, pre_sent_flags=pre_sent_flags, next_sent_flags=next_sent_flags, pre_sent_len_mat=pre_sent_len_mat, next_sent_len_mat=next_sent_len_mat) words_all.extend(words_2d) triggers_all.extend(triggers_2d) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) arguments_all.extend(arguments_2d) triggers_true, triggers_pred = [], [] with open('temp', 'w', encoding='utf-8') as fout: for i, (words, triggers, triggers_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(i, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(i, *item) for item in find_triggers(triggers_hat)]) for w, t, t_h in zip(words[1:-1], triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) fout.write("\n") # print(classification_report([idx2trigger[idx] for idx in y_true], [idx2trigger[idx] for idx in y_pred])) print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[trigger identification]') triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p_, trigger_r_, trigger_f1_) final = fname + ".P%.2f_R%.2f_F%.2f" % (trigger_p, trigger_r, trigger_f1) metric_2 = { "trigger classification": [trigger_p, trigger_r, trigger_f1], "trigger identification": [trigger_p_, trigger_r_, trigger_f1_] } with open(final, 'w') as fout: result = open("temp", "r").read() fout.write("{}\n".format(result)) fout.write(metric) os.remove("temp") return metric_2
def eval(model, iterator, fname): model.eval() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d, adjm = batch trigger_loss, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d, adjm=adjm) words_all.extend(words_2d) triggers_all.extend(triggers_2d) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) arguments_all.extend(arguments_2d) if len(argument_keys) > 0: argument_loss, arguments_y_2d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments( argument_hidden, argument_keys, arguments_2d, adjm) arguments_hat_all.extend(argument_hat_2d) # if i == 0: # print("=====sanity check for triggers======") # print('triggers_y_2d[0]:', triggers_y_2d[0]) # print("trigger_hat_2d[0]:", trigger_hat_2d[0]) # print("=======================") # print("=====sanity check for arguments======") # print('arguments_y_2d[0]:', arguments_y_2d[0]) # print('argument_hat_1d[0]:', argument_hat_1d[0]) # print("arguments_2d[0]:", arguments_2d) # print("argument_hat_2d[0]:", argument_hat_2d) # print("=======================") else: batch_size = len(arguments_2d) argument_hat_2d = [{'events': {}} for _ in range(batch_size)] arguments_hat_all.extend(argument_hat_2d) triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] with open('temp', 'w', encoding="utf-8") as fout: for i, (words, triggers, triggers_hat, arguments, arguments_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(i, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(i, *item) for item in find_triggers(triggers_hat)]) # [(ith sentence, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)] for trigger in arguments['events']: t_start, t_end, t_type_str = trigger for argument in arguments['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_true.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) for trigger in arguments_hat['events']: t_start, t_end, t_type_str = trigger for argument in arguments_hat['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_pred.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) for w, t, t_h in zip(words[1:-1], triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) arg_write = arguments['events'] for arg_key in arg_write: arg = arg_write[ arg_key] # list,eg: [(0, 5, 25), (8, 19, 17), (20, 21, 29)] for ii, tup in enumerate(arg): arg[ii] = (tup[0], tup[1], idx2argument[tup[2]] ) # 将id 转为 str arg_write[arg_key] = arg arghat_write = arguments_hat['events'] for arg_key in arghat_write: arg = arghat_write[ arg_key] # list,eg: [(0, 5, 25), (8, 19, 17), (20, 21, 29)] for ii, tup in enumerate(arg): arg[ii] = (tup[0], tup[1], idx2argument[tup[2]] ) # 将id 转为 str arghat_write[arg_key] = arg fout.write('#真实值#\t{}\n'.format(arg_write)) fout.write('#预测值#\t{}\n'.format(arghat_write)) fout.write("\n") # print(classification_report([idx2trigger[idx] for idx in y_true], [idx2trigger[idx] for idx in y_pred])) print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[argument classification]') argument_p, argument_r, argument_f1 = calc_metric(arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p, argument_r, argument_f1)) print('[trigger identification]') triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) print('[argument identification]') arguments_true = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_true] arguments_pred = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_pred] argument_p_, argument_r_, argument_f1_ = calc_metric( arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p_, argument_r_, argument_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) metric += '[argument classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p, argument_r, argument_f1) metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p_, trigger_r_, trigger_f1_) metric += '[argument identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p_, argument_r_, argument_f1_) final = fname + ".trigger-F%.2f argument-F%.2f" % (trigger_f1, argument_f1) with open(final, 'w', encoding="utf-8") as fout: result = open("temp", "r", encoding="utf-8").read() fout.write("{}\n".format(result)) fout.write(metric) os.remove("temp") return metric, trigger_f1, argument_f1
def eval_module(model, iterator, fname, module, idx2argument): model.eval() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys, trigger_info, auxiliary_feature = model.module.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d) words_all.extend(words_2d) triggers_all.extend(triggers_2d) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) arguments_all.extend(arguments_2d) if len(argument_keys) > 0: argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.module_predict_arguments( argument_hidden, argument_keys, arguments_2d, module) module_decisions_logit, module_decisions_y, argument_hat_2d = model.module.meta_classifier( argument_keys, arguments_2d, trigger_info, argument_logits, argument_hat_1d, auxiliary_feature, module) arguments_hat_all.extend(argument_hat_2d) else: batch_size = len(arguments_2d) argument_hat_2d = [{'events': {}} for _ in range(batch_size)] arguments_hat_all.extend(argument_hat_2d) triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] with open('temp', 'w') as fout: for i, (words, triggers, triggers_hat, arguments, arguments_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(i, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(i, *item) for item in find_triggers(triggers_hat)]) # [(ith sentence, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)] for trigger in arguments['events']: t_start, t_end, t_type_str = trigger for argument in arguments['events'][trigger]: a_start, a_end, a_type_idx = argument # strict metric #arguments_true.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) # relaxed metric if idx2argument[a_type_idx] == module: arguments_true.append( (i, t_type_str, a_start, a_end, 2)) #else: # arguments_true.append((i, t_type_str, a_start, a_end, 1)) #print(arguments_hat) for trigger in arguments_hat['events']: t_start, t_end, t_type_str = trigger for argument in arguments_hat['events'][trigger]: a_start, a_end, a_type_idx = argument # stric metric # arguments_pred.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) # relaxed metric #if idx2argument[a_type_idx] == module: arguments_pred.append( (i, t_type_str, a_start, a_end, a_type_idx )) # 2 is the specific argument idx in module network # else: # print(idx2argument[a_type_idx]) # arguments_pred.append((i, t_type_str, a_start, a_end, 1)) # if len(arguments_pred) == 0: # print('---batch {} -----'.format(i)) # print(arguments_hat) for w, t, t_h in zip(words[1:-1], triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) fout.write('#arguments#{}\n'.format(arguments['events'])) fout.write('#arguments_hat#{}\n'.format(arguments_hat['events'])) fout.write("\n") # print(classification_report([idx2trigger[idx] for idx in y_true], [idx2trigger[idx] for idx in y_pred])) print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[argument classification]') argument_p, argument_r, argument_f1, num_proposed, num_correct, num_gold = calc_metric( arguments_true, arguments_pred, num_flag=True) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p, argument_r, argument_f1)) #print('[trigger identification]') # triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] # triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] # trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) #print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) #print('[argument identification]') # strcit metric #arguments_true = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_true] #arguments_pred = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_pred] # relax metric # arguments_true = [(item[0], item[1], item[2], item[3]) for item in arguments_true] # arguments_pred = [(item[0], item[1], item[2], item[3]) for item in arguments_pred] # argument_p_, argument_r_, argument_f1_ = calc_metric(arguments_true, arguments_pred) #print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p_, argument_r_, argument_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) # metric += '[argument classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format(argument_p, argument_r, argument_f1) # metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format(trigger_p_, trigger_r_, trigger_f1_) # metric += '[argument identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format(argument_p_, argument_r_, argument_f1_) # final = fname + ".P%.2f_R%.2f_F%.2f" % (trigger_p, trigger_r, trigger_f1) # with open(final, 'w') as fout: # result = open("temp", "r").read() # fout.write("{}\n".format(result)) # fout.write(metric) # os.remove("temp") return metric, argument_f1, num_proposed, num_correct, num_gold #,arguments_true, arguments_pred
def eval(model, iterator, fname): model.eval() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] with torch.no_grad(): for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers( tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d) words_all.extend(words_2d) triggers_all.extend(triggers_2d) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) arguments_all.extend(arguments_2d) if len(argument_keys) > 0: argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments( argument_hidden, argument_keys, arguments_2d) arguments_hat_all.extend(argument_hat_2d) else: batch_size = len(arguments_2d) argument_hat_2d = [{'events': {}} for _ in range(batch_size)] arguments_hat_all.extend(argument_hat_2d) triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] with open('temp', 'w') as fout: for i, (words, triggers, triggers_hat, arguments, arguments_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(i, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(i, *item) for item in find_triggers(triggers_hat)]) # [(ith sentence, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)] for trigger in arguments['events']: t_start, t_end, t_type_str = trigger for argument in arguments['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_true.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) for trigger in arguments_hat['events']: t_start, t_end, t_type_str = trigger for argument in arguments_hat['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_pred.append((i, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) for w, t, t_h in zip(words[1:-1], triggers, triggers_hat): fout.write('{}\t{}\t{}\n'.format(w, t, t_h)) fout.write('#arguments#{}\n'.format(arguments['events'])) fout.write('#arguments_hat#{}\n'.format(arguments_hat['events'])) fout.write("\n") # print(classification_report([idx2trigger[idx] for idx in y_true], [idx2trigger[idx] for idx in y_pred])) print('[trigger classification]') trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p, trigger_r, trigger_f1)) print('[argument classification]') argument_p, argument_r, argument_f1 = calc_metric(arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p, argument_r, argument_f1)) print('[trigger identification]') triggers_true = [(item[0], item[1], item[2]) for item in triggers_true] triggers_pred = [(item[0], item[1], item[2]) for item in triggers_pred] trigger_p_, trigger_r_, trigger_f1_ = calc_metric(triggers_true, triggers_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(trigger_p_, trigger_r_, trigger_f1_)) print('[argument identification]') arguments_true = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_true] arguments_pred = [(item[0], item[1], item[2], item[3], item[4], item[5]) for item in arguments_pred] argument_p_, argument_r_, argument_f1_ = calc_metric( arguments_true, arguments_pred) print('P={:.3f}\tR={:.3f}\tF1={:.3f}'.format(argument_p_, argument_r_, argument_f1_)) metric = '[trigger classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p, trigger_r, trigger_f1) metric += '[argument classification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p, argument_r, argument_f1) metric += '[trigger identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( trigger_p_, trigger_r_, trigger_f1_) metric += '[argument identification]\tP={:.3f}\tR={:.3f}\tF1={:.3f}\n'.format( argument_p_, argument_r_, argument_f1_) final = fname + ".P%.2f_R%.2f_F%.2f" % (trigger_p, trigger_r, trigger_f1) with open(final, 'w') as fout: result = open("temp", "r").read() fout.write("{}\n".format(result)) fout.write(metric) os.remove("temp") return metric
def train(model, iterator, optimizer, hp): model.train() words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] # 角色 # argument_keys:(正确)预测触发词 - 正确实体 # arguments_2d:正确触发词 - 正确角色 # 正确实体 # arguments_y_2d:输入CRF的标签数据[dim0, seq_len] # argument_hat_1d: CRF计算结果 # argument_hat_2d:根据argument_keys和argument_hat_1d写成字典格式 # # 触发词 # trigger_hat_2d:CRF预测触发词 # triggers_y_2d:正确触发词 for i, batch in enumerate(iterator): tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d, adjm = batch optimizer.zero_grad() ## crf_loss, 触发词标签, 预测触发词, 实体-事件拼接张量, (7维元组) trigger_loss, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d, postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d, triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d, adjm=adjm) if len(argument_keys) > 0: argument_loss, arguments_y_2d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments(argument_hidden, argument_keys, arguments_2d, adjm) # argument_loss = criterion(argument_logits, arguments_y_1d) loss = trigger_loss + hp.LOSS_alpha* argument_loss # if i == 0: # print("=====sanity check for triggers======") # print('triggers_y_2d[0]:', triggers_y_2d[0]) # print("trigger_hat_2d[0]:", trigger_hat_2d[0]) # print("=======================") # print("=====sanity check for arguments======") # print('arguments_y_2d[0]:', arguments_y_2d[0]) # print('argument_hat_1d[0]:', argument_hat_1d[0]) # print("arguments_2d[0]:", arguments_2d) # print("argument_hat_2d[0]:", argument_hat_2d) # print("=======================") else: loss = trigger_loss nn.utils.clip_grad_norm_(model.parameters(), 3.0) loss.backward() optimizer.step() # if i == 0: # print("=====sanity check======") # print("tokens_x_2d[0]:", tokenizer.convert_ids_to_tokens(tokens_x_2d[0])[:seqlens_1d[0]]) # print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]]) # print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]]) # print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]]) # print("triggers_2d[0]:", triggers_2d[0]) # print("triggers_y_2d[0]:", triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) # print('trigger_hat_2d[0]:', trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]]) # print("seqlens_1d[0]:", seqlens_1d[0]) # print("arguments_2d[0]:", arguments_2d[0]) # print("=======================") #### 训练精度评估 #### words_all.extend(words_2d) triggers_all.extend(triggers_2d) triggers_hat_all.extend(trigger_hat_2d.cpu().numpy().tolist()) arguments_all.extend(arguments_2d) if len(argument_keys) > 0: arguments_hat_all.extend(argument_hat_2d) else: batch_size = len(arguments_2d) argument_hat_2d = [{'events': {}} for _ in range(batch_size)] arguments_hat_all.extend(argument_hat_2d) for ii, (words, triggers, triggers_hat, arguments, arguments_hat) in enumerate( zip(words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all)): triggers_hat = triggers_hat[:len(words)] triggers_hat = [idx2trigger[hat] for hat in triggers_hat] # [(ith sentence, t_start, t_end, t_type_str)] triggers_true.extend([(ii, *item) for item in find_triggers(triggers)]) triggers_pred.extend([(ii, *item) for item in find_triggers(triggers_hat)]) # [(ith sentence, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)] for trigger in arguments['events']: t_start, t_end, t_type_str = trigger for argument in arguments['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_true.append((ii, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) for trigger in arguments_hat['events']: t_start, t_end, t_type_str = trigger for argument in arguments_hat['events'][trigger]: a_start, a_end, a_type_idx = argument arguments_pred.append((ii, t_start, t_end, t_type_str, a_start, a_end, a_type_idx)) if i % 100 == 0: # monitoring trigger_p, trigger_r, trigger_f1 = calc_metric(triggers_true, triggers_pred) argument_p, argument_r, argument_f1 = calc_metric(arguments_true, arguments_pred) ## 100step 清零 words_all, triggers_all, triggers_hat_all, arguments_all, arguments_hat_all = [], [], [], [], [] triggers_true, triggers_pred, arguments_true, arguments_pred = [], [], [], [] ######################### if len(argument_keys) > 0: print("【识别到事件】step: {}, loss: {:.3f}, trigger_loss:{:.3f}, argument_loss:{:.3f}".format(i, loss.item(), trigger_loss.item(), argument_loss.item()), '【trigger】 P={:.3f} R={:.3f} F1={:.3f}'.format(trigger_p, trigger_r, trigger_f1), '【argument】 P={:.3f} R={:.3f} F1={:.3f}'.format(argument_p, argument_r, argument_f1) ) else: print("【未识别到事件】step: {}, loss: {:.3f} ".format(i, loss.item()), '【trigger】 P={:.3f} R={:.3f} F1={:.3f}'.format(trigger_p, trigger_r, trigger_f1) )
def eval_token_level(arguments_true, arguments_pred): """arguments_true.append((i, t_type_str, a_start, a_end, a_type_idx))""" new_argu_true = [] new_argu_pred = [] for item in arguments_true: i, t_type_str, a_start, a_end, a_type_idx = item for index in range(a_start + 1, a_end + 1): new_argu_true.append((i, t_type_str, index, a_type_idx)) for item in arguments_pred: i, t_type_str, a_start, a_end, a_type_idx = item for index in range(a_start + 1, a_end + 1): new_argu_pred.append((i, t_type_str, index, a_type_idx)) overlap_part_index, no_overlap_part_index, argu_dict = find_overlap( new_argu_true) print('Length of the overlapping:\n {}'.format(len(overlap_part_index))) print('Length of the NON-overlapping:\n {}'.format( len(no_overlap_part_index))) # No_overlap_part total_len = len(new_argu_true) no_overlap_part = [ new_argu_true[i] for i in range(total_len) if i in no_overlap_part_index ] p, r, f = calc_metric(new_argu_true, no_overlap_part) print('Precison = {}\n Recall = {}\n F1 = {}\n'.format(p, r, f)) # Overlap_part Random select overlap_part_select = [] mismatch_count = 0 mismatch_set = [] for items in overlap_part_index: temp_a_type = new_argu_true[items[0]][-1] mismatch = [] mismatch.append(idx2argument[temp_a_type]) for item in items: if temp_a_type != new_argu_true[item][-1]: # print('*** Mismatch ***') # print('*** Current A_TYPE = {}***'.format(temp_a_type)) # print(new_argu_true[item]) mismatch.append(idx2argument[new_argu_true[item][-1]]) mismatch_count += 1 overlap_part_select.append(new_argu_true[item]) mismatch = set(mismatch) mismatch_set.append(mismatch) print('MISMATCH COUNT = {}'.format(mismatch_count)) print('MISMATCH_SET = {}'.format(mismatch_set)) # for items in overlap_part_index: # temp_len = len(items) # select_index = items[random.randint(0, temp_len-1)] # try: # overlap_part_select.append(new_argu_true[select_index]) # except: # print(select_index) # print(total_len) p, r, f = calc_metric(new_argu_true, overlap_part_select) print('Precison = {}\n Recall = {}\n F1 = {}\n'.format(p, r, f))
nr_gt = len(centroids1) nr_det = len(centroids2) thresh = 0.5 # combine both boxes total_spines1 = len(centroids1) total_spines2 = len(centroids2) # both_spines contains all spines and their IoM, with keys of centroids2 both_spines = OrderedDict() # boxes1 are GT, boxes2 are like Predictions # -> compare each box2 vs all boxes of boxes1 for key in centroids2.keys(): # all stacks are combined therefore no comparing to same stack necessary! # VORAUSSETZUNG: NUR EIN SPINE IN DIESER POSITION IN 3D! # Andernfalls muss noch die z-Achse berucksichtigt werden! all_dist = [(key, other_key, calc_metric( centroids2[key], centroids1[other_key], args.metric)) for other_key in centroids1.keys()] all_dist.sort(key=lambda x: x[2], reverse=True) # correct centroid with highest IoM if len(all_dist) == 0: continue best_key, best_other_key, best_metric = all_dist[0] if best_metric >= thresh: both_spines[best_key] = (best_other_key, best_metric) del centroids1[best_other_key] print(f"{'# spines':^13s}|{nr_gt:^10d}|{nr_det:^10d}|{len(both_spines):^10d}") total_spines.append(total_spines1) total_both_spines.append(len(both_spines)) precision = np.array(total_both_spines)/total_spines2