def test_finetune_last(): """ finetuning using 'last'. """ dataset_path = ROOT_PATH + '/data/SS-Youtube/raw.pickle' nb_classes = 2 min_acc = 0.68 with open(VOCAB_PATH, 'r') as f: vocab = json.load(f) data = load_benchmark(dataset_path, vocab) print('Loading model from {}.'.format(PRETRAINED_PATH)) model = torchmoji_transfer(nb_classes, PRETRAINED_PATH) print(model) model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last', nb_epochs=1) print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc)) assert acc >= min_acc
def test_finetune_full(): """ finetuning using 'full'. """ DATASET_PATH = ROOT_PATH + '/data/SS-Youtube/raw.pickle' nb_classes = 2 # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results # We reduce the min accuracy needed here to pass the test # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11 min_acc = 0.68 with open(VOCAB_PATH, 'r') as f: vocab = json.load(f) data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH)) model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added']) print(model) model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='full', nb_epochs=1) print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc)) assert acc >= min_acc
def test_finetune_full(): """ finetuning using 'full'. """ DATASET_PATH = ROOT_PATH+'/data/SS-Youtube/raw.pickle' nb_classes = 2 # Keras and pyTorch implementation of the Adam optimizer are slightly different and change a bit the results # We reduce the min accuracy needed here to pass the test # See e.g. https://discuss.pytorch.org/t/suboptimal-convergence-when-compared-with-tensorflow-model/5099/11 min_acc = 0.68 with open(VOCAB_PATH, 'r') as f: vocab = json.load(f) data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) print('Loading pyTorch model from {}.'.format(PRETRAINED_PATH)) model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added']) print(model) model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='full', nb_epochs=1) print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc)) assert acc >= min_acc
nb_model_classes, weight_path, extend_embedding=data['added']) print(model) # Training print('Training: {}'.format(path)) if use_f1_score: model, result = class_avg_finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, verbose=VERBOSE) else: model, result = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, metric='acc', verbose=VERBOSE) # Write results if use_f1_score: print('Overall F1 score (dset = {}): {}'.format(dset, result)) with open('{}/{}_{}_{}_results.txt'. format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), "w") as f: f.write("F1: {}\n".format(result)) else: print('Test accuracy (dset = {}): {}'.format(dset, result)) with open('{}/{}_{}_{}_results.txt'. format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), "w") as f: f.write("Acc: {}\n".format(result))
4) Unfreeze all layers and train entire model. """ from __future__ import print_function import example_helper import json from torchmoji.model_def import torchmoji_transfer from torchmoji.global_variables import PRETRAINED_PATH from torchmoji.finetuning import ( load_benchmark, finetune) DATASET_PATH = '../data/kaggle-insults/raw.pickle' nb_classes = 2 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) # Load dataset. Extend the existing vocabulary with up to 10000 tokens from # the training dataset. data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) # Set up model and finetune. Note that we have to extend the embedding layer # with the number of tokens added to the vocabulary. model = torchmoji_transfer(nb_classes, PRETRAINED_PATH, extend_embedding=data['added']) print(model) model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='chain-thaw') print('Acc: {}'.format(acc))
# Training print('Training: {}'.format(path)) if use_f1_score: model, result = class_avg_finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, verbose=VERBOSE) else: model, result = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, metric='acc', verbose=VERBOSE) # Write results if use_f1_score: print('Overall F1 score (dset = {}): {}'.format(dset, result)) with open( '{}/{}_{}_{}_results.txt'.format(RESULTS_DIR, dset, FINETUNE_METHOD, rerun_iter), "w") as f: f.write("F1: {}\n".format(result)) else: print('Test accuracy (dset = {}): {}'.format(dset, result)) with open(