validation_losses.append(np.mean(loss_validation)) f1 = 0 em = 0 for i in range(len(estimated_end_index)): if answer_start_batch_actual[i] == -1: if (estimated_start_index[i] == 0 or estimated_start_index[i] == 0): f1 += 1.0 em += 1.0 else: estimated_start_index[i] -= 1 estimated_end_index[i] -= 1 f1 += get_f1_from_tokens(answer_start_batch_actual[i], answer_end_batch_actual[i], estimated_start_index[i], estimated_end_index[i], context_batch_validation[i], D) em += get_exact_match_from_tokens( answer_start_batch_actual[i], answer_end_batch_actual[i], estimated_start_index[i], estimated_end_index[i], context_batch_validation[i], D) f1score.append(f1 / len(estimated_end_index)) emscore.append(em / len(estimated_end_index)) #print("f1 score: ", f1/len(estimated_end_index)) print("F1 mean on validation: ", np.mean(f1score)) print("EM mean on validation: ", np.mean(emscore)) print("Mean validation loss on epoch: ", np.mean(validation_losses)) val_loss_means.append(np.mean(validation_losses))
dropout_keep_rate: 1 }) all_answers = np.array( list(map(lambda qas: (qas["all_answers"]), batch))).reshape(CONFIG.BATCH_SIZE) f1 = 0 # Calculate f1 and em scores across batch size for i in range(CONFIG.BATCH_SIZE): f1_score_answers = [] for true_answer in all_answers[i]: f1_score_answers.append( get_f1_from_tokens(true_answer["answer_start"], true_answer["answer_end"], estimated_start_index[i], estimated_end_index[i], context_batch[i], D)) f1 += max(f1_score_answers) f1score_curr = f1 / CONFIG.BATCH_SIZE print("Current f1 score: ", f1score_curr) f1score.append(f1score_curr) print("Tested (", iteration, "/", len(padded_data), ")") if len(f1score) != 0: results = dict() results["average"] = np.mean(f1score) results["max"] = np.max(f1score)