Пример #1
0
    train = fit(train, 'labels')
    test = fit(test, 'labels')
# print(train)
if transformer_config["evaluate_during_training"]:
    if transformer_config["n_fold"] > 1:
        test_preds = np.zeros((len(test), transformer_config["n_fold"]))
        for i in range(transformer_config["n_fold"]):

            if os.path.exists(
                    transformer_config['output_dir']) and os.path.isdir(
                        transformer_config['output_dir']):
                shutil.rmtree(transformer_config['output_dir'])

            model = QuestModel(MODEL_TYPE,
                               MODEL_NAME,
                               num_labels=NUM_LABELS,
                               use_cuda=torch.cuda.is_available(),
                               args=transformer_config)
            train, eval_df = train_test_split(train,
                                              test_size=0.11,
                                              random_state=SEED * i)
            # model.train_model(train, eval_df=eval_df)
            if NUM_LABELS == 1:
                model.train_model(train,
                                  eval_df=eval_df,
                                  pearson_corr=pearson_corr,
                                  spearman_corr=spearman_corr,
                                  mae=mean_absolute_error)
            else:
                model.train_model(train,
                                  eval_df=eval_df,
Пример #2
0
train = fit(train, 'labels')
test = fit(test, 'labels')

if transformer_config["evaluate_during_training"]:
    if transformer_config["n_fold"] > 1:
        test_preds = np.zeros((len(test), transformer_config["n_fold"]))
        for i in range(transformer_config["n_fold"]):

            if os.path.exists(
                    transformer_config['output_dir']) and os.path.isdir(
                        transformer_config['output_dir']):
                shutil.rmtree(transformer_config['output_dir'])

            model = QuestModel(MODEL_TYPE,
                               MODEL_NAME,
                               num_labels=1,
                               use_cuda=torch.cuda.is_available(),
                               args=transformer_config)
            train, eval_df = train_test_split(train,
                                              test_size=0.1,
                                              random_state=SEED * i)
            model.train_model(train,
                              eval_df=eval_df,
                              pearson_corr=pearson_corr,
                              spearman_corr=spearman_corr,
                              mae=mean_absolute_error)
            model = QuestModel(MODEL_TYPE,
                               transformer_config["best_model_dir"],
                               num_labels=1,
                               use_cuda=torch.cuda.is_available(),
                               args=transformer_config)
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--results_fname')
    parser.add_argument('--train_path')
    parser.add_argument('--test_path')
    parser.add_argument('--inject_features')
    parser.add_argument('--output_dir')
    args = parser.parse_args()

    train_config.update({
        'output_dir':
        os.path.join(args.output_dir, 'outputs'),
        'best_model_dir':
        os.path.join(args.output_dir, 'best_model'),
        'cache_dir':
        os.path.join(args.output_dir, 'cache_dir')
    })

    train, test = read_data_files(args.train_path,
                                  args.test_path,
                                  inject_features=args.inject_features)
    if train_config['evaluate_during_training']:
        if train_config['n_fold'] > 1:
            test_preds = np.zeros((len(test), train_config['n_fold']))
            for i in range(train_config['n_fold']):

                if os.path.exists(
                        train_config['output_dir']) and os.path.isdir(
                            train_config['output_dir']):
                    shutil.rmtree(train_config['output_dir'])

                model = QuestModel(MODEL_TYPE,
                                   MODEL_NAME,
                                   num_labels=1,
                                   use_cuda=torch.cuda.is_available(),
                                   args=train_config)
                train, eval_df = train_test_split(train,
                                                  test_size=0.1,
                                                  random_state=SEED * i)
                model.train_model(train,
                                  eval_df=eval_df,
                                  pearson_corr=pearson_corr,
                                  spearman_corr=spearman_corr,
                                  mae=mean_absolute_error)
                model = QuestModel(MODEL_TYPE,
                                   train_config['best_model_dir'],
                                   num_labels=1,
                                   use_cuda=torch.cuda.is_available(),
                                   args=train_config)
                result, model_outputs, wrong_predictions = model.eval_model(
                    test,
                    pearson_corr=pearson_corr,
                    spearman_corr=spearman_corr,
                    mae=mean_absolute_error)
                test_preds[:, i] = model_outputs

            test['predictions'] = test_preds.mean(axis=1)

        else:
            model = QuestModel(MODEL_TYPE,
                               MODEL_NAME,
                               num_labels=1,
                               use_cuda=torch.cuda.is_available(),
                               args=train_config)
            train, eval_df = train_test_split(train,
                                              test_size=0.1,
                                              random_state=SEED)
            model.train_model(train,
                              eval_df=eval_df,
                              pearson_corr=pearson_corr,
                              spearman_corr=spearman_corr,
                              mae=mean_absolute_error)
            model = QuestModel(MODEL_TYPE,
                               train_config['best_model_dir'],
                               num_labels=1,
                               use_cuda=torch.cuda.is_available(),
                               args=train_config)
            result, model_outputs, wrong_predictions = model.eval_model(
                test,
                pearson_corr=pearson_corr,
                spearman_corr=spearman_corr,
                mae=mean_absolute_error)
            test['predictions'] = model_outputs
    else:
        model = QuestModel(MODEL_TYPE,
                           MODEL_NAME,
                           num_labels=1,
                           use_cuda=torch.cuda.is_available(),
                           args=train_config)
        model.train_model(train,
                          pearson_corr=pearson_corr,
                          spearman_corr=spearman_corr,
                          mae=mean_absolute_error,
                          inject_features=args.inject_features)
        result, model_outputs, wrong_predictions = model.eval_model(
            test,
            pearson_corr=pearson_corr,
            spearman_corr=spearman_corr,
            mae=mean_absolute_error)
        test['predictions'] = model_outputs

    test = un_fit(test, 'labels')
    test = un_fit(test, 'predictions')
    test.to_csv(os.path.join(args.output_dir,
                             '{}.tsv'.format(args.results_fname)),
                header=True,
                sep='\t',
                index=False,
                encoding='utf-8')
    draw_scatterplot(
        test, 'labels', 'predictions',
        os.path.join(args.output_dir, '{}.png'.format(args.results_fname)),
        MODEL_TYPE + ' ' + MODEL_NAME)