def test_update_freq(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_update_freq') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'fconv_iwslt_de_en', ['--update-freq', '3']) generate_main(data_dir)
def test_levenshtein_transformer(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( "test_levenshtein_transformer" ) as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir, ["--joined-dictionary"]) train_translation_model( data_dir, "levenshtein_transformer", [ "--apply-bert-init", "--early-exit", "6,6,6", "--criterion", "nat_loss", ], task="translation_lev", ) generate_main( data_dir, [ "--task", "translation_lev", "--iter-decode-max-iter", "9", "--iter-decode-eos-penalty", "0", "--print-step", ], )
def test_raw(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_fconv_raw') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir, ['--dataset-impl', 'raw']) train_translation_model(data_dir, 'fconv_iwslt_de_en', ['--dataset-impl', 'raw']) generate_main(data_dir, ['--dataset-impl', 'raw'])
def test_max_positions(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_max_positions') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) with self.assertRaises(Exception) as context: train_translation_model( data_dir, 'fconv_iwslt_de_en', ['--max-target-positions', '5'], ) self.assertTrue( 'skip this example with --skip-invalid-size-inputs-valid-test' in str(context.exception)) train_translation_model( data_dir, 'fconv_iwslt_de_en', [ '--max-target-positions', '5', '--skip-invalid-size-inputs-valid-test' ], ) with self.assertRaises(Exception) as context: generate_main(data_dir) generate_main(data_dir, ['--skip-invalid-size-inputs-valid-test'])
def test_mixture_of_experts(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_moe') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'transformer_iwslt_de_en', [ '--task', 'translation_moe', '--user-dir', 'examples/translation_moe/src', '--method', 'hMoElp', '--mean-pool-gating-network', '--num-experts', '3', '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', ]) generate_main(data_dir, [ '--task', 'translation_moe', '--user-dir', 'examples/translation_moe/src', '--method', 'hMoElp', '--mean-pool-gating-network', '--num-experts', '3', '--gen-expert', '0' ])
def test_optimizers(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_optimizers') as data_dir: # Use just a bit of data and tiny model to keep this test runtime reasonable create_dummy_data(data_dir, num_examples=10, maxlen=5) preprocess_translation_data(data_dir) optimizers = [ 'adafactor', 'adam', 'nag', 'adagrad', 'sgd', 'adadelta' ] last_checkpoint = os.path.join(data_dir, 'checkpoint_last.pt') for optimizer in optimizers: if os.path.exists(last_checkpoint): os.remove(last_checkpoint) train_translation_model(data_dir, 'lstm', [ '--required-batch-size-multiple', '1', '--encoder-layers', '1', '--encoder-hidden-size', '32', '--decoder-layers', '1', '--optimizer', optimizer, ]) generate_main(data_dir)
def test_iterative_nonautoregressive_transformer(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( 'test_iterative_nonautoregressive_transformer' ) as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir, ['--joined-dictionary']) train_translation_model( data_dir, 'iterative_nonautoregressive_transformer', [ '--apply-bert-init', '--src-embedding-copy', '--criterion', 'nat_loss', '--noise', 'full_mask', '--stochastic-approx', '--dae-ratio', '0.5', '--train-step', '3' ], task='translation_lev') generate_main(data_dir, [ '--task', 'translation_lev', '--iter-decode-max-iter', '9', '--iter-decode-eos-penalty', '0', '--print-step', ])
def test_fp16(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_fp16") as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, "fconv_iwslt_de_en", ["--fp16"]) generate_main(data_dir)
def test_transformer_pointer_generator(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( 'test_transformer_pointer_generator') as data_dir: create_dummy_data(data_dir) preprocess_summarization_data(data_dir) train_translation_model(data_dir, 'transformer_pointer_generator', [ '--user-dir', 'examples/pointer_generator/src', '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--alignment-layer', '-1', '--alignment-heads', '1', '--source-position-markers', '0', ], run_validation=True) generate_main(data_dir)
def test_fconv_self_att_wp(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory( 'test_fconv_self_att_wp') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) config = [ '--encoder-layers', '[(128, 3)] * 2', '--decoder-layers', '[(128, 3)] * 2', '--decoder-attention', 'True', '--encoder-attention', 'False', '--gated-attention', 'True', '--self-attention', 'True', '--project-input', 'True', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--decoder-out-embed-dim', '8', '--multihead-self-attention-nheads', '2' ] train_translation_model(data_dir, 'fconv_self_att_wp', config) generate_main(data_dir) # fusion model os.rename(os.path.join(data_dir, 'checkpoint_last.pt'), os.path.join(data_dir, 'pretrained.pt')) config.extend([ '--pretrained', 'True', '--pretrained-checkpoint', os.path.join(data_dir, 'pretrained.pt'), '--save-dir', os.path.join(data_dir, 'fusion_model'), ]) train_translation_model(data_dir, 'fconv_self_att_wp', config)
def test_multilingual_transformer(self): # test with all combinations of encoder/decoder lang tokens encoder_langtok_flags = [[], ['--encoder-langtok', 'src'], ['--encoder-langtok', 'tgt']] decoder_langtok_flags = [[], ['--decoder-langtok']] with contextlib.redirect_stdout(StringIO()): for i in range(len(encoder_langtok_flags)): for j in range(len(decoder_langtok_flags)): enc_ltok_flag = encoder_langtok_flags[i] dec_ltok_flag = decoder_langtok_flags[j] with tempfile.TemporaryDirectory(f'test_multilingual_transformer_{i}_{j}') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model( data_dir, arch='multilingual_transformer', task='multilingual_translation', extra_flags=[ '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', ] + enc_ltok_flag + dec_ltok_flag, lang_flags=['--lang-pairs', 'in-out,out-in'], run_validation=True, extra_valid_flags=enc_ltok_flag + dec_ltok_flag, ) generate_main( data_dir, extra_flags=[ '--task', 'multilingual_translation', '--lang-pairs', 'in-out,out-in', '--source-lang', 'in', '--target-lang', 'out', ] + enc_ltok_flag + dec_ltok_flag, )
def test_dynamicconv(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_dynamicconv') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'lightconv_iwslt_de_en', [ '--encoder-conv-type', 'dynamic', '--decoder-conv-type', 'dynamic', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', ]) generate_main(data_dir)
def test_transformer(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_transformer') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'transformer_iwslt_de_en', [ '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', ], run_validation=True) generate_main(data_dir)
def test_lstm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_lstm') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'lstm_wiseman_iwslt_de_en', [ '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--decoder-out-embed-dim', '8', ]) generate_main(data_dir)
def test_fp16_multigpu(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_fp16") as data_dir: log = os.path.join(data_dir, "train.log") create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model( data_dir, "fconv_iwslt_de_en", ["--fp16", "--log-file", log], world_size=min(torch.cuda.device_count(), 2), ) generate_main(data_dir) assert os.path.exists(log)
def test_transformer_relative_positional_embeddings(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_transformer') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model( data_dir, 'transformer_rel_pos_embeddings', [ '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--max-relative-pos', '5' ], run_validation=True) generate_main(data_dir)
def test_lstm_lm_residuals(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_lstm_lm_residuals') as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_language_model( data_dir, 'lstm_lm', ['--add-bos-token', '--residuals'], run_validation=True, ) eval_lm_main(data_dir) generate_main(data_dir, [ '--task', 'language_modeling', '--sample-break-mode', 'eos', '--tokens-per-sample', '500', ])
def test_insertion_transformer(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_insertion_transformer') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir, ['--joined-dictionary']) train_translation_model(data_dir, 'insertion_transformer', [ '--apply-bert-init', '--criterion', 'nat_loss', '--noise', 'random_mask' ], task='translation_lev') generate_main(data_dir, [ '--task', 'translation_lev', '--iter-decode-max-iter', '9', '--iter-decode-eos-penalty', '0', '--print-step', ])
def test_lstm_bidirectional(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_lstm_bidirectional') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'lstm', [ '--encoder-layers', '2', '--encoder-bidirectional', '--encoder-hidden-size', '16', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--decoder-out-embed-dim', '8', '--decoder-layers', '2', ]) generate_main(data_dir)
def test_fsdp_checkpoint_generate(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_fsdp_sharded") as data_dir: log = os.path.join(data_dir, "train.log") create_dummy_data(data_dir) preprocess_translation_data(data_dir) world_size = min(torch.cuda.device_count(), 2) train_translation_model( data_dir, "fconv_iwslt_de_en", ["--log-file", log, "--ddp-backend", "fully_sharded"], world_size=world_size, ) generate_main(data_dir) assert os.path.exists(log)
def test_fconv_lm(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_fconv_lm') as data_dir: create_dummy_data(data_dir) preprocess_lm_data(data_dir) train_language_model(data_dir, 'fconv_lm', [ '--decoder-layers', '[(850, 3)] * 2 + [(1024,4)]', '--decoder-embed-dim', '280', '--optimizer', 'nag', '--lr', '0.1', ]) eval_lm_main(data_dir) generate_main(data_dir, [ '--task', 'language_modeling', '--sample-break-mode', 'eos', '--tokens-per-sample', '500', ])
def test_multilingual_translation_latent_depth(self): # test with latent depth in encoder, decoder, or both encoder_latent_layer = [[], ['--encoder-latent-layer']] decoder_latent_layer = [[], ['--decoder-latent-layer']] with contextlib.redirect_stdout(StringIO()): for i in range(len(encoder_latent_layer)): for j in range(len(decoder_latent_layer)): if i == 0 and j == 0: continue enc_ll_flag = encoder_latent_layer[i] dec_ll_flag = decoder_latent_layer[j] with tempfile.TemporaryDirectory(f'test_multilingual_translation_latent_depth_{i}_{j}') as data_dir: create_dummy_data(data_dir) preprocess_translation_data( data_dir, extra_flags=['--joined-dictionary'] ) train_translation_model( data_dir, arch='latent_multilingual_transformer', task='multilingual_translation_latent_depth', extra_flags=[ '--user-dir', 'examples/latent_depth/src', '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--share-encoders', '--share-decoders', '--sparsity-weight', '0.1', ] + enc_ll_flag + dec_ll_flag, lang_flags=['--lang-pairs', 'in-out,out-in'], run_validation=True, extra_valid_flags=['--user-dir', 'examples/latent_depth/src'] + enc_ll_flag + dec_ll_flag, ) generate_main( data_dir, extra_flags=[ '--user-dir', 'examples/latent_depth/src', '--task', 'multilingual_translation_latent_depth', '--lang-pairs', 'in-out,out-in', '--source-lang', 'in', '--target-lang', 'out', ] + enc_ll_flag + dec_ll_flag, )
def test_alignment(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_alignment') as data_dir: create_dummy_data(data_dir, alignment=True) preprocess_translation_data(data_dir, ['--align-suffix', 'align']) train_translation_model( data_dir, 'transformer_align', [ '--encoder-layers', '2', '--decoder-layers', '2', '--encoder-embed-dim', '8', '--decoder-embed-dim', '8', '--load-alignments', '--alignment-layer', '1', '--criterion', 'label_smoothed_cross_entropy_with_alignment' ], run_validation=True, ) generate_main(data_dir)
def test_transformer_fp16(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory("test_transformer") as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model( data_dir, "transformer_iwslt_de_en", [ "--encoder-layers", "2", "--decoder-layers", "2", "--encoder-embed-dim", "64", "--decoder-embed-dim", "64", "--fp16", ], run_validation=True, ) generate_main(data_dir)
def test_generation(self): with contextlib.redirect_stdout(StringIO()): with tempfile.TemporaryDirectory('test_sampling') as data_dir: create_dummy_data(data_dir) preprocess_translation_data(data_dir) train_translation_model(data_dir, 'fconv_iwslt_de_en') generate_main(data_dir, [ '--sampling', '--temperature', '2', '--beam', '2', '--nbest', '2', ]) generate_main(data_dir, [ '--sampling', '--sampling-topk', '3', '--beam', '2', '--nbest', '2', ]) generate_main(data_dir, [ '--sampling', '--sampling-topp', '0.2', '--beam', '2', '--nbest', '2', ]) generate_main(data_dir, [ '--diversity-rate', '0.5', '--beam', '6', ]) with self.assertRaises(ValueError): generate_main(data_dir, [ '--diverse-beam-groups', '4', '--match-source-len', ]) generate_main(data_dir, ['--prefix-size', '2']) generate_main(data_dir, ['--retain-dropout'])