def test_badinput(self): """ Ensures model doesn't crash on malformed inputs. """ testing_utils.train_model( dict( task='integration_tests:bad_example', model='transformer/generator', batchsize=10, datatype='train:ordered:stream', num_epochs=1, numthreads=1, no_cuda=True, embedding_size=16, skip_generation=True, hiddensize=16, ))
def test_train_fixed(self): args = self._get_args() args['candidates'] = 'fixed' args['encode_candidate_vecs'] = False valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual(valid['hits@1'], threshold)
def test_badinput(self): """ Ensures model doesn't crash on malformed inputs. """ testing_utils.train_model( dict( task='integration_tests:bad_example', model='seq2seq', learningrate=LR, batchsize=10, datatype='train:ordered:stream', num_epochs=1, numthreads=1, embeddingsize=16, hiddensize=16, inference='greedy', ))
def _run(self, **kwargs): opt = {**self.BASE_ARGS, **kwargs} valid_report, test_report = testing_utils.train_model(opt) assert valid_report['unique'] == NUM_TEST assert valid_report['times_seen'] == 1 assert test_report['unique'] == NUM_TEST assert test_report['times_seen'] == 1 return valid_report, test_report
def test_train_batch_all(self): args = self._get_args() args['candidates'] = 'batch-all-cands' args['eval_candidates'] = 'batch-all-cands' valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual(valid['hits@1'], threshold)
def test_train_inline(self): args = self._get_args() args['candidates'] = 'inline' args['eval_candidates'] = 'inline' valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual(valid['hits@1'], threshold)
def test_eval_vocab(self): args = self._get_args() args['eval_candidates'] = 'vocab' args['encode_candidate_vecs'] = True valid, test = testing_utils.train_model(args) # accuracy should be zero, none of the vocab candidates should be the # correct label self.assertEqual(valid['hits@100'], 0)
def test_training(self): valid, test = testing_utils.train_model({ 'model': 'starspace', 'task': 'integration_tests', 'num_epochs': 1.0 }) assert valid['hits@1'] > 0.5 assert test['hits@1'] > 0.5
def test_text_task(self): """ Test that model correctly handles text task. """ args = BASE_ARGS.copy() args.update(TEXT_ARGS) valid, test = testing_utils.train_model(args) self.assertLessEqual(valid['ppl'], 1.5, 'failed to train image_seq2seq on text task')
def test_unigram(self): valid, test = testing_utils.train_model({ 'model': 'test_agents/unigram', 'task': 'integration_tests', 'num_epochs': 1.0, 'batchsize': 32, 'truncate': 4, }) assert valid['f1'] > 0
def test_ranker(self): testing_utils.train_model({ 'task': 'integration_tests', 'model': 'transformer/ranker', 'candidates': 'batch', 'model_parallel': True, **MODEL_OPTS, }) with self.assertRaises(RuntimeError): testing_utils.train_model({ 'task': 'integration_tests', 'model': 'transformer/ranker', 'data_parallel': True, 'model_parallel': True, 'candidates': 'batch', **MODEL_OPTS, })
def test_resuming(self): """ Test saving and resuming training. """ with testing_utils.tempdir() as tmpdir: model_file = os.path.join(tmpdir, 'model') valid1, test1 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', optimizer='adamax', learningrate=7e-3, batchsize=32, num_epochs=1, n_layers=1, n_heads=1, ffn_size=32, embedding_size=32, warmup_updates=1, lr_scheduler='invsqrt', ) ) valid2, test2 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', num_epochs=1, ) ) # make sure the number of updates is being tracked correctly self.assertGreater( valid2['total_train_updates'], valid1['total_train_updates'], 'Number of updates is not increasing', ) # make sure the learning rate is decreasing self.assertLess( valid2['lr'], valid1['lr'], 'Learning rate is not decreasing' )
def test_babi(self): valid, test = testing_utils.train_model({ 'task': 'babi:task1k:1', 'model': 'ir_baseline', 'batchsize': 1, 'datatype': 'train:ordered', 'num_epochs': 1, }) assert valid['f1'] == 0.41 assert test['f1'] >= 0.437
def test_integration(self): valid, test = testing_utils.train_model({ 'task': 'integration_tests', 'model': 'ir_baseline', 'batchsize': 1, 'datatype': 'train:ordered', 'num_epochs': 1, }) assert valid['f1'] >= 0.99 assert test['f1'] >= 0.99
def test_fast_final_eval(self): valid, test = testing_utils.train_model({ 'task': 'integration_tests', 'validation_max_exs': 10, 'model': 'repeat_label', 'short_final_eval': True, 'num_epochs': 1.0, }) self.assertEqual(valid['exs'], 10, 'Validation exs is wrong') self.assertEqual(test['exs'], 10, 'Test exs is wrong')
def test_topk(self): """Test topk generation.""" # Topk is inherently stochastic, just ensure no crash. testing_utils.train_model( dict( task='integration_tests:nocandidate', model='transformer/generator', optimizer='adamax', learningrate=7e-3, batchsize=32, num_epochs=20, n_layers=1, n_heads=1, ffn_size=32, embedding_size=32, inference='topk', topk=5, beam_size=5, ))
def test_resuming_memeff2safe(self): """ Test switching from memory efficient fp16 to safe fp16. """ with testing_utils.tempdir() as tmpdir: model_file = os.path.join(tmpdir, 'model') valid1, test1 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests', model='transformer/generator', optimizer='adam', fp16=True, fp16_impl='mem_efficient', learningrate=1e-3, batchsize=32, num_epochs=0.25, n_layers=1, n_heads=1, ffn_size=32, embedding_size=32, warmup_updates=1, lr_scheduler='invsqrt', skip_generation=True, )) valid2, test2 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests', model='transformer/generator', fp16_impl='safe', num_epochs=0.5, )) # make sure the number of updates is being tracked correctly self.assertGreater( valid2['total_train_updates'], valid1['total_train_updates'], 'Number of updates is not increasing', )
def test_multitasking_metrics_macro(self): valid, test = testing_utils.train_model({ 'task': 'integration_tests:candidate,' 'integration_tests:multiturnCandidate', 'model': 'random_candidate', 'num_epochs': 0.5, 'aggregate_micro': False, }) task1_acc = valid['integration_tests:candidate/accuracy'] task2_acc = valid['integration_tests:multiturnCandidate/accuracy'] total_acc = valid['accuracy'] self.assertEqual( total_acc, 0.5 * (task1_acc.value() + task2_acc.value()), 'Task accuracy is averaged incorrectly', ) valid, test = testing_utils.train_model({ 'task': 'integration_tests:candidate,' 'integration_tests:multiturnCandidate', 'model': 'random_candidate', 'num_epochs': 0.5, 'aggregate_micro': False, }) task1_acc = valid['integration_tests:candidate/accuracy'] task2_acc = valid['integration_tests:multiturnCandidate/accuracy'] total_acc = valid['accuracy'] # metrics should be averaged equally across tasks self.assertEqual( total_acc, 0.5 * (task1_acc.value() + task2_acc.value()), 'Task accuracy is averaged incorrectly', )
def test_multitask(self): """ Test that model can handle multiple inputs. """ args = BASE_ARGS.copy() args.update(MULTITASK_ARGS) valid, test = testing_utils.train_model(args) self.assertLessEqual( valid['ppl'], 5.0, 'failed to train image_seq2seq on image+text task')
def test_train_model(self): """ Check the training script doesn't crash. """ opt = { 'model': 'projects.self_feeding.self_feeding_agent:SelfFeedingAgent', 'task': 'self_feeding:all', 'max_train_time': 120, 'dia_train': 'train_hh131k_hb60k.txt', 'n_layers': 2, 'n_heads': 2, 'candidates': 'batch', 'validation_metric': 'dia_acc', 'optimizer': 'adamax', 'learningrate': 0.0025, 'ffn_size': 32, 'batchsize': 32, 'embeddings_scale': False, } testing_utils.train_model(opt)
def test_eval_inline(self): args = self._get_args() args['eval_candidates'] = 'inline' stdout, valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual( valid['hits@1'], threshold, "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout), )
def test_train_batch_all(self): args = self._get_args() args['candidates'] = 'batch-all-cands' stdout, valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual( valid['hits@1'], threshold, "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout), )
def test_multitasking_metrics(self): stdout, valid, test = testing_utils.train_model( { 'task': 'integration_tests:candidate,' 'integration_tests:multiturnCandidate', 'model': 'random_candidate', 'num_epochs': 0.5, 'aggregate_micro': True, } ) task1_acc = valid['tasks']['integration_tests:candidate']['accuracy'] task2_acc = valid['tasks']['integration_tests:multiturnCandidate']['accuracy'] total_acc = valid['accuracy'] # task 2 is 4 times the size of task 1 self.assertAlmostEqual( total_acc, (task1_acc + 4 * task2_acc) / 5, 4, 'Task accuracy is averaged incorrectly', ) stdout, valid, test = testing_utils.train_model( { 'task': 'integration_tests:candidate,' 'integration_tests:multiturnCandidate', 'model': 'random_candidate', 'num_epochs': 0.5, 'aggregate_micro': False, } ) task1_acc = valid['tasks']['integration_tests:candidate']['accuracy'] task2_acc = valid['tasks']['integration_tests:multiturnCandidate']['accuracy'] total_acc = valid['accuracy'] # metrics should be averaged equally across tasks self.assertAlmostEqual( total_acc, (task1_acc + task2_acc) / 2, 4, 'Task accuracy is averaged incorrectly', )
def test_resuming_fp32(self): """ Test resuming without FP16. """ with testing_utils.tempdir() as tmpdir: model_file = os.path.join(tmpdir, 'model') valid1, test1 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', optimizer='adam', fp16=True, fp16_impl='mem_efficient', learningrate=7e-3, batchsize=32, num_epochs=0.1, n_layers=1, n_heads=1, ffn_size=32, embedding_size=32, warmup_updates=1, lr_scheduler='invsqrt', )) valid2, test2 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', num_epochs=0.25, fp16=False, )) # make sure the number of updates is being tracked correctly self.assertGreater( valid2['total_train_updates'], valid1['total_train_updates'], 'Number of updates is not increasing', )
def test_train_fixed(self): args = self._get_args() args['candidates'] = 'fixed' args['encode_candidate_vecs'] = False stdout, valid, test = testing_utils.train_model(args) threshold = self._get_threshold() self.assertGreaterEqual( valid['hits@1'], threshold, "valid hits@1 = {}\nLOG:\n{}".format(valid['hits@1'], stdout), )
def test_bart(self): valid, test = testing_utils.train_model( dict( task='integration_tests:nocandidate', model='bart', dict_file='zoo:bart/bart_large/model.dict', optimizer='sgd', learningrate=1, batchsize=4, num_epochs=1, )) self.assertAlmostEqual(test['ppl'], 1.0, places=2)
def test_short_multiobj_training(self): opt = { 'model': 'projects.light_whoami.agents.multi_objective:MultiObjectiveGeneratorAgent', 'n_multiobjective_heads': 4, 'n_multiobjective_layers': 2, **COMMON_OPT, 'task': 'projects.light_whoami.task.agents:MultiObjectiveTeacher', } opt.pop('num_examples') opt.update(TRAIN_COMMON_OPT) for choice in [ 'decoder_final_layer', 'encoder_final_layer', 'encoder_and_decoder', ]: opt['multiobjective_latent_representation'] = choice testing_utils.train_model(opt)
def test_multitask(self): """ Test that model correctly handles multiple inputs. Random chance is 10%, so this should be able to get much better than that very quickly. """ args = Opt({**self.base_args, **self.multitask_args}) valid, test = testing_utils.train_model(args) assert ( valid['accuracy'] > 0.2 ), f'ImagePolyencoderAgent val-set accuracy on a simple task was {valid["accuracy"].value():0.2f}.'
def test_image_task(self): """ Test that model correctly handles image task. No training, only eval """ args = BASE_ARGS.copy() args.update(IMAGE_ARGS) valid, test = testing_utils.train_model(args) self.assertLessEqual(valid['ppl'], 8.6, 'failed to train image_seq2seq on image task')
def test_resuming_reduce_on_plateau(self): """ Reduce on Plateau can be tricky when combined with warmup. See: https://github.com/facebookresearch/ParlAI/pull/1812 """ with testing_utils.tempdir() as tmpdir: model_file = os.path.join(tmpdir, 'model') valid1, test1 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', optimizer='adamax', learningrate=7e-3, batchsize=32, num_epochs=1, n_layers=1, n_heads=1, ffn_size=32, embedding_size=32, warmup_updates=1, lr_scheduler='reduceonplateau', ) ) valid2, test2 = testing_utils.train_model( dict( model_file=model_file, task='integration_tests:candidate', model='transformer/ranker', num_epochs=1, lr_scheduler='reduceonplateau', ) ) # make sure the learning rate is decreasing self.assertGreater( valid2['lr'], 1e-5, 'Learning rate should not be that low when resuming' )