#         parts = line[:-1].split('\t')
#         label = int(parts[0])
#         string = parts[1]
#         augmented_sentences = eda.get_delete_sentences(string, n_aug=n_aug, alpha_rd=alpha)

#         cosine_sim_list = utils_bert.compute_sent_similarities(string, augmented_sentences)
#         dataset_cosine_sim_list += cosine_sim_list

#     return dataset_cosine_sim_list

if __name__ == "__main__":

    for method, name in [
            #(compute_swap_similarity, 'swap'),
        (compute_insert_similarity, 'insert'),
            # (compute_insertswap_similarity, 'insertswap'),
    ]:

        all_cosine_sim_list = []
        for dataset_name in ['sst2', 'subj', 'trec']:

            data_folder = config.data_folders[dataset_name]
            delete_folder = utils_config.make_exp_folder(data_folder, 'delete')
            delete_train_txt_path, _, _, _ = utils_config.get_txt_paths(
                delete_folder)
            dataset_cosine_sim_list = method(delete_train_txt_path)
            all_cosine_sim_list += dataset_cosine_sim_list

        print(
            f"{name} has sim {mean(all_cosine_sim_list):.3f} stdev {stdev(all_cosine_sim_list):.3f}"
        )
Пример #2
0
import config, utils_config, utils_mlp_a1_grad_sim_classification
from pathlib import Path

dataset_name = 'sst2'
data_folder = config.data_folders[dataset_name]
output_folder = Path("outputs")
exp_id = '9b'
num_classes = 2
resume_checkpoint_path = Path(
    "checkpoints/sst2_vanilla_mlp_50_seednum0/e10_va0.7460.pt")

if __name__ == "__main__":

    train_txt_path, train_embedding_path, test_txt_path, test_embedding_path = utils_config.get_txt_paths(
        data_folder)

    for train_subset in [500]:  #, 20, 50]:

        mean_val_acc, stdev_acc, mean_conf_acc, stdev_conf_acc, mlp_val_acc, mlp_stdev_acc, mlp_conf_acc, mlp_stdev_conf_acc = utils_mlp_a1_grad_sim_classification.train_mlp_multiple(
            train_txt_path,
            train_embedding_path,
            test_txt_path,
            test_embedding_path,
            num_classes=num_classes,
            dataset_name=dataset_name,
            exp_id=exp_id,
            train_subset=train_subset,
            resume_checkpoint_path=resume_checkpoint_path,
            num_seeds=1,
        )
Пример #3
0
import config, utils_bert, utils_config, utils_svm, utils_processing

data_folder = config.data_folders['sst2']

if __name__ == "__main__":

    train_txt_path, train_embedding_path, test_txt_path, test_embedding_path = utils_config.get_txt_paths(
        data_folder)
    # utils_bert.get_embedding_dict(train_txt_path, train_embedding_path)
    # utils_bert.get_embedding_dict(test_txt_path, test_embedding_path)

    insert_folder = utils_config.make_exp_folder(data_folder, f"insert-eval")
    _, _, insert_test_txt_path, insert_test_embedding_path = utils_config.get_txt_paths(
        insert_folder)
    # utils_processing.augment_insert(test_txt_path, insert_test_txt_path, n_aug=2, alpha=0.3)
    # utils_bert.get_embedding_dict(insert_test_txt_path, insert_test_embedding_path)

    swap_folder = utils_config.make_exp_folder(data_folder, f"swap-eval")
    _, _, swap_test_txt_path, swap_test_embedding_path = utils_config.get_txt_paths(
        swap_folder)
    # utils_processing.augment_swap(test_txt_path, swap_test_txt_path, n_aug=2, alpha=0.2)
    # utils_bert.get_embedding_dict(swap_test_txt_path, swap_test_embedding_path)

    # utils_svm.evaluate_svm_baselines(   train_txt_path,
    #                                     test_txt_path,
    #                                     train_embedding_path,
    #                                     test_embedding_path,
    #                                     insert_test_txt_path,
    #                                     insert_test_embedding_path,
    #                                     swap_test_txt_path,
    #                                     swap_test_embedding_path,