def test_finetune_full(): """ finetuning using 'full'. """ DATASET_PATH = '../data/SS-Youtube/raw.pickle' nb_classes = 2 min_acc = 0.65 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) model = deepmoji_transfer(nb_classes, data['maxlen'], PRETRAINED_PATH, extend_embedding=data['added']) model.summary() model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='full', nb_epochs=1) print("Finetune full SS-Youtube 1 epoch acc: {}".format(acc)) assert acc >= min_acc
def test_deepmoji_transfer_extend_embedding(): """ Defining deepmoji with extension. """ extend_with = 50 model = deepmoji_transfer(5, 30, weight_path=PRETRAINED_PATH, extend_embedding=extend_with) embedding_layer = model.layers[1] assert embedding_layer.input_dim == NB_TOKENS + extend_with
def test_change_trainable(): """ change_trainable() changes trainability of layers. """ model = deepmoji_transfer(5, 30) change_trainable(model.layers[0], False) assert not model.layers[0].trainable change_trainable(model.layers[0], True) assert model.layers[0].trainable
def test_change_trainable(): """ change_trainable() changes trainability of layers. """ model = deepmoji_transfer(5, 30) change_trainable(model.layers[0], False) assert not model.layers[0].trainable change_trainable(model.layers[0], True) assert model.layers[0].trainable
def test_deepmoji_transfer_extend_embedding(): """ Defining deepmoji with extension. """ extend_with = 50 model = deepmoji_transfer(5, 30, weight_path=PRETRAINED_PATH, extend_embedding=extend_with) embedding_layer = model.layers[1] assert embedding_layer.input_dim == NB_TOKENS + extend_with
def test_freeze_layers(): """ Correct layers are frozen. """ model = deepmoji_transfer(5, 30) keyword = 'softmax' model = freeze_layers(model, unfrozen_keyword=keyword) for layer in model.layers: if layer is not None and len(layer.trainable_weights): if keyword in layer.name: assert layer.trainable else: assert not layer.trainable
def test_freeze_layers(): """ Correct layers are frozen. """ model = deepmoji_transfer(5, 30) keyword = 'softmax' model = freeze_layers(model, unfrozen_keyword=keyword) for layer in model.layers: if layer is not None and len(layer.trainable_weights): if keyword in layer.name: assert layer.trainable else: assert not layer.trainable
def test_finetune_last(): """ finetuning using 'last'. """ DATASET_PATH = '../data/SS-Youtube/raw.pickle' nb_classes = 2 min_acc = 0.65 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) data = load_benchmark(DATASET_PATH, vocab) model = deepmoji_transfer(nb_classes, data['maxlen'], PRETRAINED_PATH) model.summary() model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last', nb_epochs=1) print("Finetune last SS-Youtube 1 epoch acc: {}".format(acc)) assert acc >= min_acc
1) Freeze all layers except for the softmax layer. 2) Train. """ from __future__ import print_function import example_helper import json from deepmoji.model_def import deepmoji_transfer from deepmoji.global_variables import PRETRAINED_PATH from deepmoji.finetuning import (load_benchmark, finetune) DATASET_PATH = '../data/SS-Youtube/raw.pickle' nb_classes = 2 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) # Load dataset. data = load_benchmark(DATASET_PATH, vocab) # Set up model and finetune model = deepmoji_transfer(nb_classes, data['maxlen'], PRETRAINED_PATH) model.summary() model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last') print('Acc: {}'.format(acc))
""" from __future__ import print_function import example_helper import json from deepmoji.model_def import deepmoji_transfer from deepmoji.global_variables import PRETRAINED_PATH from deepmoji.finetuning import ( load_benchmark, finetune) DATASET_PATH = '../data/kaggle-insults/raw.pickle' nb_classes = 2 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) # Load dataset. Extend the existing vocabulary with up to 10000 tokens from # the training dataset. data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) # Set up model and finetune. Note that we have to extend the embedding layer # with the number of tokens added to the vocabulary. model = deepmoji_transfer(nb_classes, data['maxlen'], PRETRAINED_PATH, extend_embedding=data['added']) model.summary() model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='chain-thaw', verbose=2) print('Acc: {}'.format(acc))
from deepmoji.finetuning import load_benchmark from deepmoji.class_avg_finetuning import class_avg_finetune from deepmoji.model_def import deepmoji_transfer from deepmoji.global_variables import PRETRAINED_PATH DATASET_PATH = '../data/SE0714/raw.pickle' nb_classes = 3 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) # Load dataset. Extend the existing vocabulary with up to 10000 tokens from # the training dataset. data = load_benchmark(DATASET_PATH, vocab, extend_with=10000) # Set up model and finetune. Note that we have to extend the embedding layer # with the number of tokens added to the vocabulary. # # Also note that when using class average F1 to evaluate, the model has to be # defined with two classes, since the model will be trained for each class # separately. model = deepmoji_transfer(2, data['maxlen'], PRETRAINED_PATH, extend_embedding=data['added']) model.summary() # For finetuning however, pass in the actual number of classes. model, f1 = class_avg_finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last') print('F1: {}'.format(f1))
elif FINETUNE_METHOD in ['new', 'full', 'chain-thaw']: extend_with = 10000 else: raise ValueError('Finetuning method not recognised!') # Load dataset. data = load_benchmark(path, vocab, extend_with=extend_with) (X_train, y_train) = (data['texts'][0], data['labels'][0]) (X_val, y_val) = (data['texts'][1], data['labels'][1]) (X_test, y_test) = (data['texts'][2], data['labels'][2]) weight_path = PRETRAINED_PATH if FINETUNE_METHOD != 'new' else None nb_model_classes = 2 if use_f1_score else nb_classes model = deepmoji_transfer( nb_model_classes, data['maxlen'], weight_path, extend_embedding=data['added']) model.summary() # Training print('Training: {}'.format(path)) if use_f1_score: model, result = class_avg_finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, verbose=VERBOSE) else: model, result = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], FINETUNE_METHOD, metric='acc',
0) Load all weights except for the softmax layer. Do not add tokens to the vocabulary and do not extend the embedding layer. 1) Freeze all layers except for the softmax layer. 2) Train. """ from __future__ import print_function import example_helper import json from deepmoji.model_def import deepmoji_transfer from deepmoji.global_variables import PRETRAINED_PATH from deepmoji.finetuning import ( load_benchmark, finetune) DATASET_PATH = '../data/SS-Youtube/raw.pickle' nb_classes = 2 with open('../model/vocabulary.json', 'r') as f: vocab = json.load(f) # Load dataset. data = load_benchmark(DATASET_PATH, vocab) # Set up model and finetune model = deepmoji_transfer(nb_classes, data['maxlen'], PRETRAINED_PATH) model.summary() model, acc = finetune(model, data['texts'], data['labels'], nb_classes, data['batch_size'], method='last') print('Acc: {}'.format(acc))