test_pred = predict(trained_model, test_iter) dev_preds[:, i] = (np.array(dev_pred) >= delta).astype(int) test_preds[:, i] = (np.array(test_pred) >= delta).astype(int) dev = pd.read_csv(os.path.join(TEMP_DIRECTORY, DEV_FILE), sep='\t') dev["predictions"] = (dev_preds.mean(axis=1) > 0.5).astype(int) test = pd.read_csv(os.path.join(TEMP_DIRECTORY, TEST_FILE), sep='\t') test["subtask_a"] = le.inverse_transform( (test_preds.mean(axis=1) > 0.5).astype(int)) # Performing the evaluation ( tn, fp, fn, tp ), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores( dev, 'encoded_subtask_a', "predictions") dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') test = test[["id", "subtask_a"]] test.to_csv(os.path.join(TEMP_DIRECTORY, SUBMISSION_FOLDER, RESULT_FILE), header=False, sep=',', index=False, encoding='utf-8') shutil.make_archive(os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE), 'zip',
def run_hasoc_experiment(): if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) full = pd.read_csv(HASOC_DATA_PATH, sep='\t') le = LabelEncoder() train, test = train_test_split(full, test_size=0.2, random_state=SEED) train['label'] = le.fit_transform(train["task_1"]) train = train[['text', 'label']] train['text'] = train['text'].apply(lambda x: remove_names(x)) train['text'] = train['text'].apply(lambda x: remove_urls(x)) test['label'] = le.fit_transform(test["task_1"]) test = test[['text', 'label']] test['text'] = test['text'].apply(lambda x: remove_names(x)) test['text'] = test['text'].apply(lambda x: remove_urls(x)) # Create a ClassificationModel model = ClassificationModel( MODEL_TYPE, MODEL_NAME, args=hasoc_args, use_cuda=torch.cuda.is_available( )) # You can set class weights by using the optional weight argument # Train the model logging.info("Started Training") if hasoc_args["evaluate_during_training"]: train, eval_df = train_test_split(train, test_size=0.1, random_state=SEED) model.train_model(train, eval_df=eval_df, f1=sklearn.metrics.f1_score, accuracy=sklearn.metrics.accuracy_score) else: model.train_model(train, f1=sklearn.metrics.f1_score, accuracy=sklearn.metrics.accuracy_score) logging.info("Finished Training") # Evaluate the model test_sentences = test['text'].tolist() if hasoc_args["evaluate_during_training"]: model = ClassificationModel(MODEL_TYPE, hasoc_args["best_model_dir"], args=hasoc_args, use_cuda=torch.cuda.is_available()) predictions, raw_outputs = model.predict(test_sentences) test['predictions'] = predictions ( tn, fp, fn, tp ), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores( test, 'label', "predictions") test.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format( tn, fp, fn, tp)) logging.info("Accuracy {}".format(accuracy)) logging.info("Weighted F1 {}".format(weighted_f1)) logging.info("Macro F1 {}".format(macro_f1)) logging.info("Weighted Recall {}".format(weighted_recall)) logging.info("Weighted Precision {}".format(weighted_precision)) return hasoc_args['best_model_dir']
if name.startswith('bert'): param.requires_grad = False if i == 0: print_model(model) criterion = criterion.to(device) trained_model, trained_losses, valid_losses = fit(model, train_iter, valid_iter, optimizer, criterion, scheduler, N_EPOCHS, os.path.join(path, MODEL_NAME), GRADUALLY_UNFREEZE, FREEZE_FOR) draw_graph(n_epohs=N_EPOCHS, valid_losses=valid_losses, trained_losses=trained_losses, path=os.path.join(path, GRAPH_NAME)) delta = threshold_search(trained_model, valid_iter) test_pred, test_id = predict(trained_model, test_iter) test_preds[:, i] = (np.array(test_pred) >= delta).astype(int) test = pd.read_csv(os.path.join(TEMP_DIRECTORY, TEST_FILE), sep='\t') test["predictions"] = (test_preds.mean(axis=1) > 0.5).astype(int) # Performing the evaluation (tn, fp, fn, tp), accuracy, weighted_f1, weighted_recall, weighted_precision = evaluatation_scores(test, 'encoded_subtask_a', "predictions") logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format(tn, fp, fn, tp)) logging.info("Accuracy {}".format(accuracy)) logging.info("Weighted F1 {}".format(weighted_f1)) logging.info("Weighted Recall {}".format(weighted_recall)) logging.info("Weighted Precision {}".format(weighted_precision))
logging.info("Started Evaluation") dev_sentences = dev['text'].tolist() if turkish_args["evaluate_during_training"]: model = ClassificationModel(MODEL_TYPE, turkish_args["best_model_dir"], args=turkish_args, use_cuda=torch.cuda.is_available()) dev_predictions, raw_outputs = model.predict(dev_sentences) dev['predictions'] = dev_predictions ( tn, fp, fn, tp ), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores( dev, 'label', "predictions") dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format( tn, fp, fn, tp)) logging.info("Accuracy {}".format(accuracy)) logging.info("Weighted F1 {}".format(weighted_f1)) logging.info("Macro F1 {}".format(macro_f1)) logging.info("Weighted Recall {}".format(weighted_recall)) logging.info("Weighted Precision {}".format(weighted_precision))
test_predictions, test_raw_outputs = model.predict(test_sentences) test['predictions'] = test_predictions dev['predictions'] = decode(dev['predictions']) dev['class'] = decode(dev['class']) test['predictions'] = decode(test['predictions']) if INCLUDE_RAW_PREDICTIONS: dev['raw-predictions'] = decode(dev['raw-predictions']) test['raw-predictions'] = decode(test['raw-predictions']) time.sleep(5) print("Started Evaluation") results = evaluatation_scores(dev, 'class', 'predictions', labels, pos_label) print_results(results) save_eval_results(results, os.path.join(TEMP_DIRECTORY, DEV_EVAL_FILE)) if INCLUDE_RAW_PREDICTIONS: print("Evaluation - Raw Outputs") results = evaluatation_scores(dev, 'class', 'raw-predictions', labels, pos_label) print_results(results) save_eval_results(results, os.path.join(TEMP_DIRECTORY, TAG_RAW + "-" + DEV_EVAL_FILE)) dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') test.to_csv(os.path.join(TEMP_DIRECTORY, TEST_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') output_file = open(os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE), 'w', encoding='utf-8') test_preds = test['predictions'] for pred in test_preds: