Exemplo n.º 1
0
    test_pred = predict(trained_model, test_iter)

    dev_preds[:, i] = (np.array(dev_pred) >= delta).astype(int)
    test_preds[:, i] = (np.array(test_pred) >= delta).astype(int)

dev = pd.read_csv(os.path.join(TEMP_DIRECTORY, DEV_FILE), sep='\t')
dev["predictions"] = (dev_preds.mean(axis=1) > 0.5).astype(int)

test = pd.read_csv(os.path.join(TEMP_DIRECTORY, TEST_FILE), sep='\t')
test["subtask_a"] = le.inverse_transform(
    (test_preds.mean(axis=1) > 0.5).astype(int))

# Performing the evaluation
(
    tn, fp, fn, tp
), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores(
    dev, 'encoded_subtask_a', "predictions")

dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE),
           header=True,
           sep='\t',
           index=False,
           encoding='utf-8')

test = test[["id", "subtask_a"]]
test.to_csv(os.path.join(TEMP_DIRECTORY, SUBMISSION_FOLDER, RESULT_FILE),
            header=False,
            sep=',',
            index=False,
            encoding='utf-8')

shutil.make_archive(os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE), 'zip',
def run_hasoc_experiment():
    if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY)

    full = pd.read_csv(HASOC_DATA_PATH, sep='\t')

    le = LabelEncoder()
    train, test = train_test_split(full, test_size=0.2, random_state=SEED)
    train['label'] = le.fit_transform(train["task_1"])
    train = train[['text', 'label']]
    train['text'] = train['text'].apply(lambda x: remove_names(x))
    train['text'] = train['text'].apply(lambda x: remove_urls(x))

    test['label'] = le.fit_transform(test["task_1"])
    test = test[['text', 'label']]
    test['text'] = test['text'].apply(lambda x: remove_names(x))
    test['text'] = test['text'].apply(lambda x: remove_urls(x))

    # Create a ClassificationModel
    model = ClassificationModel(
        MODEL_TYPE,
        MODEL_NAME,
        args=hasoc_args,
        use_cuda=torch.cuda.is_available(
        ))  # You can set class weights by using the optional weight argument

    # Train the model
    logging.info("Started Training")

    if hasoc_args["evaluate_during_training"]:
        train, eval_df = train_test_split(train,
                                          test_size=0.1,
                                          random_state=SEED)
        model.train_model(train,
                          eval_df=eval_df,
                          f1=sklearn.metrics.f1_score,
                          accuracy=sklearn.metrics.accuracy_score)

    else:
        model.train_model(train,
                          f1=sklearn.metrics.f1_score,
                          accuracy=sklearn.metrics.accuracy_score)

    logging.info("Finished Training")
    # Evaluate the model
    test_sentences = test['text'].tolist()

    if hasoc_args["evaluate_during_training"]:
        model = ClassificationModel(MODEL_TYPE,
                                    hasoc_args["best_model_dir"],
                                    args=hasoc_args,
                                    use_cuda=torch.cuda.is_available())

    predictions, raw_outputs = model.predict(test_sentences)

    test['predictions'] = predictions

    (
        tn, fp, fn, tp
    ), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores(
        test, 'label', "predictions")

    test.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE),
                header=True,
                sep='\t',
                index=False,
                encoding='utf-8')

    logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format(
        tn, fp, fn, tp))
    logging.info("Accuracy {}".format(accuracy))
    logging.info("Weighted F1 {}".format(weighted_f1))
    logging.info("Macro F1 {}".format(macro_f1))
    logging.info("Weighted Recall {}".format(weighted_recall))
    logging.info("Weighted Precision {}".format(weighted_precision))

    return hasoc_args['best_model_dir']
        if name.startswith('bert'):
            param.requires_grad = False

    if i == 0: print_model(model)
    criterion = criterion.to(device)

    trained_model, trained_losses, valid_losses = fit(model, train_iter, valid_iter, optimizer, criterion, scheduler,
                                                      N_EPOCHS, os.path.join(path, MODEL_NAME), GRADUALLY_UNFREEZE,
                                                      FREEZE_FOR)

    draw_graph(n_epohs=N_EPOCHS, valid_losses=valid_losses, trained_losses=trained_losses,
               path=os.path.join(path, GRAPH_NAME))

    delta = threshold_search(trained_model, valid_iter)
    test_pred, test_id = predict(trained_model, test_iter)

    test_preds[:, i] = (np.array(test_pred) >= delta).astype(int)

test = pd.read_csv(os.path.join(TEMP_DIRECTORY, TEST_FILE), sep='\t')
test["predictions"] = (test_preds.mean(axis=1) > 0.5).astype(int)

# Performing the evaluation
(tn, fp, fn, tp), accuracy, weighted_f1, weighted_recall, weighted_precision = evaluatation_scores(test,
                                                                                                   'encoded_subtask_a',
                                                                                                   "predictions")
logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format(tn, fp, fn, tp))
logging.info("Accuracy {}".format(accuracy))
logging.info("Weighted F1 {}".format(weighted_f1))
logging.info("Weighted Recall {}".format(weighted_recall))
logging.info("Weighted Precision {}".format(weighted_precision))
logging.info("Started Evaluation")
dev_sentences = dev['text'].tolist()

if turkish_args["evaluate_during_training"]:
    model = ClassificationModel(MODEL_TYPE,
                                turkish_args["best_model_dir"],
                                args=turkish_args,
                                use_cuda=torch.cuda.is_available())

dev_predictions, raw_outputs = model.predict(dev_sentences)

dev['predictions'] = dev_predictions

(
    tn, fp, fn, tp
), accuracy, weighted_f1, macro_f1, weighted_recall, weighted_precision = evaluatation_scores(
    dev, 'label', "predictions")

dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE),
           header=True,
           sep='\t',
           index=False,
           encoding='utf-8')

logging.info("Confusion Matrix (tn, fp, fn, tp) {} {} {} {}".format(
    tn, fp, fn, tp))
logging.info("Accuracy {}".format(accuracy))
logging.info("Weighted F1 {}".format(weighted_f1))
logging.info("Macro F1 {}".format(macro_f1))
logging.info("Weighted Recall {}".format(weighted_recall))
logging.info("Weighted Precision {}".format(weighted_precision))
    test_predictions, test_raw_outputs = model.predict(test_sentences)
    test['predictions'] = test_predictions

dev['predictions'] = decode(dev['predictions'])
dev['class'] = decode(dev['class'])

test['predictions'] = decode(test['predictions'])

if INCLUDE_RAW_PREDICTIONS:
    dev['raw-predictions'] = decode(dev['raw-predictions'])
    test['raw-predictions'] = decode(test['raw-predictions'])

time.sleep(5)

print("Started Evaluation")
results = evaluatation_scores(dev, 'class', 'predictions', labels, pos_label)
print_results(results)
save_eval_results(results, os.path.join(TEMP_DIRECTORY, DEV_EVAL_FILE))

if INCLUDE_RAW_PREDICTIONS:
    print("Evaluation - Raw Outputs")
    results = evaluatation_scores(dev, 'class', 'raw-predictions', labels, pos_label)
    print_results(results)
    save_eval_results(results, os.path.join(TEMP_DIRECTORY, TAG_RAW + "-" + DEV_EVAL_FILE))

dev.to_csv(os.path.join(TEMP_DIRECTORY, DEV_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8')
test.to_csv(os.path.join(TEMP_DIRECTORY, TEST_RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8')

output_file = open(os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE), 'w', encoding='utf-8')
test_preds = test['predictions']
for pred in test_preds: