示例#1
0
def test_FireBERT_base(task, set, reps=1, sample=1, hparams_default={}, tf=False):

    # prepare hyperparameters
    hparams = hparams_default

    # load the right processor class
    if task == "MNLI":
        processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
        adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        processor = ImdbProcessor({'sample_percent':sample})
        adv_processor = ImdbProcessor({'sample_percent':sample})

    lightning = "_on_lightning" if not tf else ""
    # now instantiate the models - one for the regular set
    model = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)

    dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set)
    model.set_test_dataset(dataset, examples)

    # one for the adversarial set

    model_adv = FireBERT_base(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)

    dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set)
    model_adv.set_test_dataset(dataset_adv, examples_adv)

    #
    # now test them both, and log results
    #
    trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
    trainer.test(model)
    result = trainer.tqdm_metrics

    f = open("results/base/hparams-results.csv", "a+")
    print(task, ",", set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f)
    f.close()

    trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
    trainer.test(model_adv)
    result = trainer.tqdm_metrics

    f = open("results/base/hparams-results.csv", "a+")
    print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result['avg_test_acc'], sep="", file=f)
    f.close()


    print("baseline data logged.")
    elapsed_time()
    print()
示例#2
0
def test_FireBERT_FVE(task, set, reps=1, sample=1, hparams_default={}, hparams_lists=None, lightning=''):

    # prepare hyperparameters
    hparams = hparams_default

    # load the right processor class
    if task == "MNLI":
        processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        processor = ImdbProcessor({'sample_percent':sample})

    # now instantiate the models
    model = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)
    processor.set_tokenizer(model.tokenizer)

    dataset, examples = processor.load_and_cache_examples("data/"+task, example_set=set)
    model.set_test_dataset(dataset, examples)

    #adv set
    # load the right processor class
    if task == "MNLI":
        adv_processor = MnliProcessor({'sample_percent':sample}) # negative number means abs number of samples, not percent
    elif task == "IMDB":
        adv_processor = ImdbProcessor({'sample_percent':sample})

    model_adv = FireBERT_FVE(load_from='resources/models/'+task+lightning+'/pytorch_model.bin', 
                        processor=processor, 
                        hparams=hparams_default)
    adv_processor.set_tokenizer(model.tokenizer)

    dataset_adv, examples_adv = adv_processor.load_and_cache_examples("data/"+task, example_set="adv_"+set)
    model_adv.set_test_dataset(dataset_adv, examples_adv)

    for i in range(reps):
        if hparams_lists is None:
            print("FireBERT_FVE specific test", task, set)
        else:
            print("FireBERT_FVE hparam test", task, set)
            print("{")
            for item in hparams_lists.items():
                key = item[0]
                values = item[1]
                hparams[key] = random.choice(values)
                print("  '"+key+"':",str(hparams[key])+",")
            print("}")

        # set the new hparams
        model.update_hparams(hparams)
        model_adv.update_hparams(hparams)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        trainer.test(model)
        result1 = trainer.tqdm_metrics

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        trainer.test(model_adv)
        result2 = trainer.tqdm_metrics

        f = open("results/five/hparams-results.csv", "a+")
        print(task, ",", "adv_"+set, ",", sample, ',"',hparams,'",',result1['avg_test_acc'],",",result2['avg_test_acc'], sep="", file=f)
        f.close()

        print("iteration",i,"logged.")
        elapsed_time()
        print()
    
        if hparams_lists is None:
            break
示例#3
0
def test():
    import numpy

    import torch
    import pytorch_lightning as pl

    from processors import ImdbProcessor
    from bert_base_model import LightningBertForSequenceClassification

    # prepare hyperparameters

    gradient_accumulation_steps = 1
    learning_rate = 2e-5
    weight_decay = 0.0
    adam_epsilon = 1e-8

    max_steps = -1  # if -1 then calculate number of training steps based on the length of the train set
    warmup_steps = 0

    num_train_epochs = 5
    batch_size = 16

    len_train_set = 40000

    if max_steps > 0:
        num_train_epochs = max_steps // (len_train_set //
                                         gradient_accumulation_steps) + 1
        num_training_steps = max_steps
    else:
        num_training_steps = len_train_set // gradient_accumulation_steps * num_train_epochs

    hparams = {
        'learning_rate': learning_rate,
        'adam_epsilon': adam_epsilon,
        'weight_decay': weight_decay,
        'warmup_steps': warmup_steps,
        'num_training_steps': num_training_steps,
        'batch_size': batch_size,
    }

    model = LightningBertForSequenceClassification(processor=ImdbProcessor(),
                                                   hparams=hparams)
    processor = model.get_processor()
    train_dataset, _ = processor.load_and_cache_examples("data/IMDB",
                                                         example_set='train')
    val_dataset, _ = processor.load_and_cache_examples("data/IMDB",
                                                       example_set='dev')
    test_dataset, _ = processor.load_and_cache_examples("data/IMDB",
                                                        example_set='test')

    model.set_train_dataset(train_dataset)
    model.set_val_dataset(val_dataset)
    model.set_test_dataset(test_dataset)

    from pytorch_lightning.logging import TensorBoardLogger

    save_root_path = 'models/IMDB_on_lightning/'
    tensor_logger = TensorBoardLogger(save_dir=save_root_path + 'logs',
                                      version=10,
                                      name='imdb_finetuning')
    checkpoint_save_path = save_root_path + 'checkpoints/'

    from pytorch_lightning.callbacks import ModelCheckpoint

    checkpoint_callback = ModelCheckpoint(filepath=checkpoint_save_path,
                                          verbose=True,
                                          monitor='val_loss',
                                          mode='min')
    amp_opt_level = 'O1'  # https://nvidia.github.io/apex/amp.html#opt-levels
    max_grad_norm = 1.0

    trainer = pl.Trainer(default_save_path=checkpoint_save_path,
                         logger=tensor_logger,
                         gpus=(-1 if torch.cuda.is_available() else None),
                         max_epochs=num_train_epochs,
                         amp_level=amp_opt_level,
                         gradient_clip_val=max_grad_norm,
                         checkpoint_callback=checkpoint_callback)

    trainer.fit(model)
示例#4
0
    'use_full_example': use_full_example,
    'leave_alone': leave_alone,
    'random_out_of': random_out_of,
    'judge_bert': judge_bert
}

print(hparams)

proc_hparams = {}
# delete this next line to run full 100%
proc_hparams.update({'sample_percent': 3, 'randomize': False})

# instantiate the model used for SWITCH
switch_model = LightningBertForSequenceClassification(
    load_from='resources/models/IMDB/pytorch_model.bin',
    processor=ImdbProcessor(),
    hparams={'batch_size': 6})
switch_model.cuda()

model = FireBERT_FCT(switch_model=switch_model,
                     processor=ImdbProcessor(hparams=proc_hparams),
                     hparams=hparams)

processor = model.get_processor()

train_dataset, train_examples = processor.load_and_cache_examples(
    "data/IMDB", example_set='train')
val_dataset, _ = processor.load_and_cache_examples("data/IMDB",
                                                   example_set='dev')
test_dataset, _ = processor.load_and_cache_examples("data/IMDB",
                                                    example_set='test')
示例#5
0
def test_iter_FireBERT_FSE(set='mnli', sample_pct=3):

    import gc
    import random
    gc.enable()

    print("Simple FireBERT_FSE tests")
    # create a FireBERT_FSE classifier for MNLI

    # negative number means abs number of samples, not percent
    u = random.randint(2, 20)
    p = random.randint(2, 15)
    c = random.randint(3, 15)
    t = random.randint(3, 20)
    b = 32
    random_bit = random.getrandbits(1)
    v = bool(random_bit)

    # prepare hyperparameters
    hparams = {
        'use_USE': True,
        'USE_method': "filter",
        'USE_multiplier': u,
        'stop_words': True,
        'perturb_words': p,
        'candidates_per_word': c,
        'total_alternatives': t,
        'match_pos': True,
        'batch_size': b,
        'verbose': False,
        'vote_avg_logits': v
    }
    if set == 'mnli':
        # now instantiate the MNLI model
        processor = MnliProcessor({'sample_percent': sample_pct})
        model = FireBERT_FSE(
            load_from='resources/models/MNLI/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("MNLI: Start testing dev set")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='adv_dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("MNLI: Start testing adversarial dev set")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        fname = str("results/fse/mnli-hparams-results.txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" +
            str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None
        gc.collect()
    elif set == 'imdb':
        # now instantiate the IMDB model
        processor = ImdbProcessor({
            'sample_percent': sample_pct
        })  # negative number means abs number of samples, not percent

        model = FireBERT_FSE(
            load_from='resources/models/IMDB/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("IMDB: Start testing dev set")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='adv_dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("IMDB: Start testing adversarial dev set")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        fname = str("results/fse/imdb-hparams-results.txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(p) + "-" + str(c) + "-" + str(t) + "-" +
            str(b) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None
        gc.collect()
    else:
        print("Invalid data set selected!")
示例#6
0
def test_param_FireBERT_FSE(hparams, dset='mnli', sample_pct=3):
    '''This class exists specifically to reproduce results from randomized
    control trials.  Pass in hparams as an argument'''

    u = hparams['use_USE']
    um = hparams['USE_method']
    p = hparams['perturb_words']
    c = hparams['candidates_per_word']
    t = hparams['total_alternatives']
    v = hparams['vote_avg_logits']

    print("Simple Static FireBERT_FSE test")
    if dset == 'mnli':
        # now instantiate the MNLI model
        processor = MnliProcessor({'sample_percent': sample_pct})
        model = FireBERT_FSE(
            load_from='resources/models/MNLI/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/MNLI", example_set='adv_test')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        print("Results: ")
        print("Regular results", resulta['avg_test_acc'])
        print("Adversarial results", resultb['avg_test_acc'])

        fname = str("results/fse/ind-result-mnli-" + str(u) + str(um) + "-" +
                    str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) +
                    ".txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" +
            str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None

    elif dset == 'imdb':
        # now instantiate the IMDB model
        processor = ImdbProcessor({
            'sample_percent': sample_pct
        })  # negative number means abs number of samples, not percent

        model = FireBERT_FSE(
            load_from='resources/models/IMDB/pytorch_model.bin',
            processor=processor,
            hparams=hparams)

        processor = model.get_processor()

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='dev')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resulta = trainer.tqdm_metrics

        dataset, examples = processor.load_and_cache_examples(
            "data/IMDB", example_set='adv_test')
        model.set_test_dataset(dataset, examples)

        trainer = pl.Trainer(gpus=(-1 if torch.cuda.is_available() else None))
        print("Start testing")
        trainer.test(model)
        resultb = trainer.tqdm_metrics

        print("Results: ")
        print("Regular results", resulta['avg_test_acc'])
        print("Adversarial results", resultb['avg_test_acc'])

        fname = str("results/fse/ind-result-imdb-" + str(u) + str(um) + "-" +
                    str(p) + "-" + str(c) + "-" + str(t) + "-" + str(v) +
                    ".txt")
        f = open(fname, "a")
        f.write(
            str(u) + "-" + str(um) + "-" + str(p) + "-" + str(c) + "-" +
            str(t) + "-" + str(v) + "," + str(resulta['avg_test_acc']) + "," +
            str(resultb['avg_test_acc']))
        f.write("\n")
        f.close()
        processor = None
        dataset = None
        examples = None
        model = None
        trainer = None

    else:
        print("Invalid data set selected!")
示例#7
0
def debug():
    from processors import MnliProcessor, ImdbProcessor
    from firebert_fse import FireBERT_FSE

    print("Simple SWITCH tests (through FSE)")
    # create a FireBERT_FVE classifier for MNNLI

    # prepare hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False
    }

    # now instantiate the model
    model = FireBERT_FSE(load_from='resources/models/MNLI/pytorch_model.bin',
                         processor=MnliProcessor(),
                         hparams=hparams)

    print()
    print()
    print()
    data = [{
        'premise':
        "Conceptually cream skimming has two basic dimensions - product and geography.",
        'hypothesis':
        "Product and geography are what make cream skimming work.",
        'label': "neutral"
    }, {
        'premise':
        "He writes that it 's the first time he 's added such a track .",
        'hypothesis': "This is the first time he 's added such a track .",
        'label': "neutral"
    }, {
        'premise': "The new rights are nice enough .",
        'hypothesis': "Everyone really likes the newest benefits .",
        'label': "neutral"
    }, {
        'premise':
        "This site includes a list of all award winners and a searchable database of Government Executive articles.",
        'hypothesis':
        "The Government Executive articles housed on the website are not able to be searched.",
        'label': "contradiction"
    }, {
        'premise':
        "yeah i i think my favorite restaurant is always been the one closest  you know the closest as long as it's it meets the minimum criteria you know of good food",
        'hypothesis':
        "My favorite restaurants are always at least a hundred miles away from my house.",
        'label': "contradiction"
    }, {
        'premise':
        "Calcutta seems to be the only other production center having any pretensions to artistic creativity at all, but ironically you're actually more likely to see the works of Satyajit Ray or Mrinal Sen shown in Europe or North America than in India itself.",
        'hypothesis':
        "Most of Mrinal Sen's work can be found in European collections.",
        'label': "neutral"
    }]

    def top_n(index, words, n):
        result = [words[i] for i in index if i != -1]
        result = [
            word for word in result if word not in model.switch.stop_words
        ]
        return result[:n]

    for d in data:
        # make a "feature" tensor out of those
        example, input_ids, attention_mask, token_type_ids, label = \
            model.processor.make_single_example_with_features(d["premise"], d["hypothesis"], d["label"])

        # use SWITCH to figure out word importance within the list
        word_indices, token_indices, word_list = \
            model.switch.get_important_indices_from_example(example, input_ids, token_type_ids, attention_mask)

        print("Premise:", d["premise"])
        print("Original hypothesis:", d["hypothesis"], "(original label: ",
              d['label'], ")")

        print("Top 5 hypothesis words (new):", top_n(word_indices, word_list,
                                                     5))
        print()

    print()

    print()
    print("IMDB test")
    print()

    # prepare hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False,
        'perturb_words': 2
    }

    # now instantiate the model
    model = FireBERT_FSE(load_from='resources/models/IMDB/pytorch_model.bin',
                         processor=ImdbProcessor(),
                         hparams=hparams)

    text = "This movie is truly fun for the whole family. Adults and kids will totally enjoy it!"
    label = 1

    # make a "feature" tensor out of those
    example, input_ids, attention_mask, token_type_ids, label = \
        model.processor.make_single_example_with_features(text, None, label)

    texts = model.switch.generate_candidates_from_example(example)
    print(text)
    for t in texts:
        print(":", t)
    print()

    # prepare new hyperparameters
    hparams = {
        'batch_size': 32,
        'leave_alone': 0,
        'random_out_of': 0,
        'judge_bert': False,
        'perturb_words': 5
    }

    # make the model use the new hparams
    model.update_hparams(hparams)

    texts = model.switch.generate_candidates_from_example(example)
    print(text)
    for t in texts:
        print(":", t)