def test_text_cnn_trainer_regression_test(): utils.set_seed_everywhere() test_args = Namespace( # Model hyper-parameters max_sequence_length=50, dim_model=128, num_filters=128, window_sizes=[3, 5, 7], num_classes=2, dropout=0.5, # Training hyper-parameters num_epochs=4, learning_rate=1.e-6, batch_size=64) train_loader, vocab = text_cnn_dataset.TextCNNDataset.get_training_dataloader( test_args) model = text_cnn.TextCNN(vocab_size=len(vocab), dim_model=test_args.dim_model, num_filters=test_args.num_filters, window_sizes=test_args.window_sizes, num_classes=test_args.num_classes, dropout=test_args.dropout) trainer = train.TextCNNTrainer(test_args, vocab.mask_index, model, train_loader, vocab, True) trainer.run() losses = trainer.loss_cache # last loss across initial epochs should be converging assert losses[0].data >= losses[-1].data
def test_gpt_trainer_regression_test(): utils.set_seed_everywhere() test_args = Namespace( # Model hyper-parameters num_layers_per_stack=2, dim_model=12, dim_ffn=48, num_heads=2, block_size=64, dropout=0.1, # Training hyper-parameters num_epochs=5, learning_rate=0.0, batch_size=64, ) train_loader, vocab = gpt_dataset.GPTDataset.get_training_dataloader( test_args) model = gpt.GPT(vocab_size=len(vocab), num_layers_per_stack=test_args.num_layers_per_stack, dim_model=test_args.dim_model, dim_ffn=test_args.dim_ffn, num_heads=test_args.num_heads, block_size=test_args.block_size, dropout=test_args.dropout) trainer = train.GPTTrainer(test_args, vocab.mask_index, model, train_loader, vocab, True) trainer.run() losses = trainer.loss_cache # last loss across initial epochs should be converging assert losses[0].data > losses[-1].data
def test_word2vec_trainer_regression_test(): utils.set_seed_everywhere() test_args = Namespace( # skip gram data hyper-parameters context_window_size=2, subsample_t=10.e-200, # Model hyper-parameters embedding_size=300, negative_sample_size=15, # Training hyper-parameters num_epochs=2, learning_rate=1.e-3, batch_size=4096, ) train_dataloader, vocab = skipgram_dataset.SkipGramDataset.get_training_dataloader( test_args.context_window_size, test_args.subsample_t, test_args.batch_size) word_frequencies = torch.from_numpy(vocab.get_word_frequencies()) model = word2vec.SkipGramNSModel(len(vocab), test_args.embedding_size, test_args.negative_sample_size, word_frequencies) trainer = train.Word2VecTrainer(test_args, model, train_dataloader, True) trainer.run() losses = trainer.loss_cache # last loss across initial epochs should be converging assert losses[0].data > losses[-1].data
def test_transformer_regression_test(): utils.set_seed_everywhere() test_2_args = Namespace( num_layers_per_stack=2, dim_model=512, dim_ffn=2048, num_heads=8, max_sequence_length=20, dropout=0.1, ) # mock dataset src_tokens = [["the", "cow", "jumped", "over", "the", "moon"], ["the", "british", "are", "coming"]] tgt_tokens = [["la", "vache", "a", "sauté", "sur", "la", "lune"], ["les", "britanniques", "arrivent"]] batch_size = len(src_tokens) dictionary_source = NLPVocabulary.build_vocabulary(src_tokens) dictionary_target = NLPVocabulary.build_vocabulary(tgt_tokens) max_seq_length = 20 src_padded = TransformerDataset.padded_string_to_integer( src_tokens, max_seq_length, dictionary_source) tgt_padded = TransformerDataset.padded_string_to_integer( tgt_tokens, max_seq_length + 1, dictionary_target) data = TransformerBatch(torch.LongTensor(src_padded), torch.LongTensor(tgt_padded)) model = transformer.Transformer(len(dictionary_source), len(dictionary_target), test_2_args.num_layers_per_stack, test_2_args.dim_model, test_2_args.dim_ffn, test_2_args.num_heads, test_2_args.max_sequence_length, test_2_args.dropout) # push through model y_hat = model(data) # expected output expected_output = transformer_regression_test_data.TRANSFORMER_REGRESSION_TEST_DATA # assert y_hat is within eps eps = 1.e-4 assert np.allclose(y_hat.data.numpy(), expected_output.data.numpy(), atol=eps)
def test_regression_test_cnn(): utils.set_seed_everywhere() test_2_args = Namespace( vocab_size=300, # Model hyper-parameters max_sequence_length= 200, # Important parameter. Makes a big difference on output. dim_model=3, # embedding size I tried 300->50 num_filters=100, # output filters from convolution window_sizes=[ 3, 5 ], # different filter sizes, total number of filters len(window_sizes)*num_filters num_classes=2, # binary classification problem dropout=0.5, # 0.5 from original implementation, kind of high # Training hyper-parameters num_epochs=3, # 30 from original implementation learning_rate= 1.e-4, # chosing LR is important, often accompanied with scheduler to change batch_size=64 # from original implementation ) # mock dataset src_tokens = torch.randint(0, test_2_args.vocab_size - 1, size=(test_2_args.batch_size, test_2_args.max_sequence_length)) model = text_cnn.TextCNN(test_2_args.vocab_size, test_2_args.dim_model, test_2_args.num_filters, test_2_args.window_sizes, test_2_args.num_classes, test_2_args.dropout) # push through model y_hat = model((None, src_tokens)) #expected output expected_output = cnn_regression_test_data.CNN_REGRESSION_TEST_DATA # assert y_hat is within eps eps = 1.e-4 assert np.allclose(y_hat.data.numpy(), expected_output.data.numpy(), atol=eps)
def test_transformer_trainer_regression_test(): utils.set_seed_everywhere() test_args = Namespace( # Model hyper-parameters num_layers_per_stack=2, dim_model=512, dim_ffn=2048, num_heads=8, max_sequence_length=20, dropout=0.1, # Label smoothing loss function hyper-parameters label_smoothing=0.1, # Training hyper-parameters num_epochs=10, learning_rate=0.0, batch_size=128, ) train_dataloader, vocab_source, vocab_target = transformer_dataset.TransformerDataset.get_training_dataloader( test_args) vocab_source_size = len(vocab_source) vocab_target_size = len(vocab_target) model = transformer.Transformer(vocab_source_size, vocab_target_size, test_args.num_layers_per_stack, test_args.dim_model, test_args.dim_ffn, test_args.num_heads, test_args.max_sequence_length, test_args.dropout) trainer = train.TransformerTrainer(test_args, vocab_target_size, vocab_target.mask_index, model, train_dataloader, True) trainer.run() losses = trainer.loss_cache # last loss across initial epochs should be converging assert losses[0].data >= losses[-1].data
def test_word2vec_regression_test(): utils.set_seed_everywhere() test_2_args = Namespace( # Model hyper-parameters embedding_size=300, negative_sample_size= 20, # k examples to be used in negative sampling loss function # Training hyper-parameters batch_size=4096, # Vocabulary vocab_size=1000, ) word_frequencies = torch.from_numpy(np.random.rand(1000)) mock_input_1 = torch.randint(0, test_2_args.vocab_size - 1, size=(test_2_args.batch_size, )) mock_input_2 = torch.randint(0, test_2_args.vocab_size - 1, size=(test_2_args.batch_size, )) data = (mock_input_1, mock_input_2) model = word2vec.SkipGramNSModel(test_2_args.vocab_size, test_2_args.embedding_size, test_2_args.negative_sample_size, word_frequencies) loss = model(data) # expected output expected_output = word2vec_regression_test_data.WORD2VEC_REGRESSION_TEST_DATA # assert y_hat is within eps eps = 1.e-4 assert np.allclose(loss.data.numpy(), expected_output.data.numpy(), atol=eps)
from argparse import Namespace import torch import numpy as np from nlpmodels.models import transformer from nlpmodels.utils import utils from nlpmodels.utils.elt.transformer_batch import TransformerBatch from nlpmodels.utils.elt.transformer_dataset import TransformerDataset from nlpmodels.utils.vocabulary import NLPVocabulary from tests.test_data import transformer_regression_test_data utils.set_seed_everywhere() def test_input_output_dims_transformer(): test_1_args = Namespace( num_layers_per_stack=2, dim_model=512, dim_ffn=2048, num_heads=8, max_sequence_length=20, dropout=0.1, ) # mock dataset src_tokens = [["the", "cow", "jumped", "over", "the", "moon"], ["the", "british", "are", "coming"]] tgt_tokens = [["la", "vache", "a", "sauté", "sur", "la", "lune"], ["les", "britanniques", "arrivent"]] batch_size = len(src_tokens) dictionary_source = NLPVocabulary.build_vocabulary(src_tokens)