def test_file_inference(self): """ Test --inference with older model files. """ testing_utils.download_unittest_models() with testing_utils.capture_output(): pp = ParlaiParser(True, True) opt = pp.parse_args( ['--model-file', 'zoo:unittest/transformer_generator2/model']) agent = create_agent(opt, True) self.assertEqual(agent.opt['inference'], 'greedy') with testing_utils.capture_output(): pp = ParlaiParser(True, True) opt = pp.parse_args( [ '--model-file', 'zoo:unittest/transformer_generator2/model', '--beam-size', '5', ], print_args=False, ) agent = create_agent(opt, True) self.assertEqual(agent.opt['inference'], 'beam')
def test_help(self): with testing_utils.capture_output() as output: script.superscript_main(args=['help']) assert 'test_script' in output.getvalue() assert 'hidden_script' not in output.getvalue() with testing_utils.capture_output() as output: script.superscript_main(args=['helpall']) assert 'test_script' in output.getvalue() assert 'hidden_script' in output.getvalue()
def test_help(self): with testing_utils.capture_output() as output: script.superscript_main(args=['help']) assert 'test_script' in output.getvalue() assert 'hidden_script' not in output.getvalue() # showing help for the super command, not the subcommand assert '--foo' not in output.getvalue() with testing_utils.capture_output() as output: script.superscript_main(args=['helpall']) assert 'test_script' in output.getvalue() assert 'hidden_script' in output.getvalue()
def test_download_multiprocess_chunks(self): # Tests that the three finish downloading but may finish in any order urls = [ 'https://parl.ai/downloads/mnist/mnist.tar.gz', 'https://parl.ai/downloads/mnist/mnist.tar.gz.BAD', 'https://parl.ai/downloads/mnist/mnist.tar.gz.BAD', ] with testing_utils.capture_output() as stdout: download_results = build_data.download_multiprocess( urls, self.datapath, dest_filenames=self.dest_filenames, chunk_size=1) stdout = stdout.getvalue() output_filenames, output_statuses, output_errors = zip( *download_results) self.assertIn('mnist0.tar.gz', output_filenames, f'missing file:\n{stdout}') self.assertIn('mnist1.tar.gz', output_filenames, f'missing file:\n{stdout}') self.assertIn('mnist2.tar.gz', output_filenames, f'missing file:\n{stdout}') self.assertIn(200, output_statuses, f'unexpected error code:\n{stdout}') self.assertIn(403, output_statuses, f'unexpected error code:\n{stdout}')
def run_selfchat(self): """ Run selfchat for each model. """ for model in self.models: try: torch.cuda.empty_cache() except Exception: pass self._print_progress(f'Running self-chat for {model}') outfile = self._get_selfchat_log_path(model) if not os.path.exists(outfile): config = self._get_selfchat_config(model) with capture_output(): parser = self_chat_setup_args() parser.set_params(**config) opt = parser.parse_args(args=[]) self_chat(opt) if os.path.exists(outfile): self._print_progress(f'Chats saved to {outfile} for {model}') self._print_progress(f'Chats already exist in {outfile}, moving on...') self.chat_files[model] = outfile
def test_gpt2_bpe_tokenize(self): with testing_utils.capture_output(): opt = Opt({'dict_tokenizer': 'gpt2', 'datapath': './data'}) agent = DictionaryAgent(opt) self.assertEqual( # grinning face emoji agent.gpt2_tokenize(u'Hello, ParlAI! \U0001f600'), [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ], ) self.assertEqual( agent.vec2txt(agent.tok2ind[w] for w in [ 'Hello', ',', r'\xc4\xa0Par', 'l', 'AI', '!', r'\xc4\xa0\xc3\xb0\xc5\x81\xc4\xba', r'\xc4\xa2', ]), # grinning face emoji u'Hello, ParlAI! \U0001f600', )
def setup_teardown(self): """ Call code to set up and tear down tests. Run this only once because we'll be running all analysis code before checking any results. """ outputs = {} for case, flag_string in self.CASES.items(): # Paths analysis_samples_folder = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'analysis_samples', case ) analysis_outputs_folder = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'test_model_chat_analysis', ) outputs[f'{case}__expected_stdout_path'] = os.path.join( analysis_outputs_folder, f'{case}__test_stdout.txt' ) prefixes = ['results', 'worker_results'] with testing_utils.tempdir() as tmpdir: # Run analysis with testing_utils.capture_output() as output: arg_string = f"""\ --results-folders {analysis_samples_folder} --output-folder {tmpdir} \ {flag_string} """ parser_ = ModelChatResultsCompiler.setup_args() args_ = parser_.parse_args(arg_string.split()) ModelChatResultsCompiler(vars(args_)).compile_and_save_results() stdout = output.getvalue() # Define output structure filtered_stdout = '\n'.join( [ line for line in stdout.split('\n') if not line.endswith('.csv') ] ) # Don't track lines that record where a file was saved to, because # filenames are timestamped outputs[f'{case}__stdout'] = filtered_stdout for prefix in prefixes: results_path = list( glob.glob(os.path.join(tmpdir, f'{prefix}_*')) )[0] with open(results_path) as f: outputs[f'{case}__{prefix}'] = f.read() yield outputs
def test_final_extra_eval_and_save_json(self): """ Test "final_extra_valid_opt_filepath". Happens to test that saving reports as json works too. We copy train_model from testing_utils to directly access train loop. """ import parlai.scripts.train_model as tms def get_tl(tmpdir): final_opt = Opt({ 'task': 'integration_tests', 'datatype': 'valid', 'validation_max_exs': 30, 'short_final_eval': True, }) final_opt.save(os.path.join(tmpdir, "final_opt.opt")) opt = Opt({ 'task': 'integration_tests', 'validation_max_exs': 10, 'model': 'repeat_label', 'model_file': os.path.join(tmpdir, 'model'), 'short_final_eval': True, 'num_epochs': 1.0, 'final_extra_opt': str(os.path.join(tmpdir, "final_opt.opt")), }) parser = tms.setup_args() parser.set_params(**opt) popt = parser.parse_args([]) for k, v in opt.items(): popt[k] = v return tms.TrainLoop(popt) with testing_utils.capture_output(), testing_utils.tempdir() as tmpdir: tl = get_tl(tmpdir) _, _ = tl.train() with open(os.path.join(tmpdir, 'model.trainstats')) as f: data = json.load(f) print(data) self.assertEqual( data["final_valid_report"]["exs"], 10, "Validation exs saved incorrectly", ) self.assertEqual( data["final_extra_valid_report"]["exs"], 30, "Final validation exs saved incorrectly", )
def test_inference(self): """Test --inference with simple options.""" with testing_utils.capture_output(): upgraded = TorchGeneratorAgent.upgrade_opt({'beam_size': 1}) self.assertEqual(upgraded['inference'], 'greedy') upgraded = TorchGeneratorAgent.upgrade_opt({'beam_size': 5}) self.assertEqual(upgraded['inference'], 'beam')
def setUpClass(cls): # go ahead and download things here with testing_utils.capture_output(): parser = display_data.setup_args() parser.set_defaults(**END2END_OPTIONS) opt = parser.parse_args(print_args=False) opt['num_examples'] = 1 display_data.display_data(opt)
def setUpClass(cls): """Set up the test by downloading the model/data.""" with testing_utils.capture_output(): parser = display_data.setup_args() parser.set_defaults(**MODEL_OPTIONS) opt = parser.parse_args(print_args=False) opt['num_examples'] = 1 display_data.display_data(opt)
def test_help(self): helptext = _TestScript.help() assert 'My Description' in helptext assert '--foo' in helptext assert '--bar' not in helptext with testing_utils.capture_output() as output: with self.assertRaises(SystemExit): _TestScript.main('--help') assert '--foo' in output.getvalue() assert '--bar' not in output.getvalue() with testing_utils.capture_output() as output: with self.assertRaises(SystemExit): _TestScript.main('--helpall') assert '--foo' in output.getvalue() assert '--bar' in output.getvalue()
def test_train_model_with_no_dict_file(self): """Ensure training a model requires a dict_file or model_file.""" import parlai.scripts.train_model as tms with testing_utils.capture_output(): parser = tms.setup_args() parser.set_params(task='babi:task1k:1', model='seq2seq') popt = parser.parse_args(print_args=False) with self.assertRaises(RuntimeError): tms.TrainLoop(popt)
def get_teacher_act(defaults, teacher_processed=False, agent_to=None): parser = train_setup_args() parser.set_defaults(**defaults) opt = parser.parse_args() build_dict(opt) with testing_utils.capture_output() as _: teacher = create_task_agent_from_taskname(opt)[0] agent = create_agent(opt) act = teacher.act() if teacher_processed: return act, agent return agent.observe(act), agent
def test_resume_checkpoint(self): """Make sure when resuming training that model uses appropriate mf. Copy train_model from testing_utils to directly access agent. """ import parlai.scripts.train_model as tms def get_popt_and_tl(opt): parser = tms.setup_args() parser.set_params(**opt) popt = parser.parse_args(print_args=False) for k, v in opt.items(): popt[k] = v return popt, tms.TrainLoop(popt) def get_opt(init_mf, mf): return { 'task': 'integration_tests', 'init_model': init_mf, 'model': 'parlai.agents.test_agents.dummy_torch_agent:MockTorchAgent', 'model_file': mf, 'num_epochs': 3, 'validation_every_n_epochs': 1, 'save_after_valid': True, 'log_every_n_secs': 10, } with capture_output(): with tempdir() as tmpdir: # First train model with init_model path set mf = os.path.join(tmpdir, 'model') init_mf = os.path.join(tmpdir, 'init_model') with open(init_mf, 'w') as f: f.write(' ') opt = get_opt(init_mf, mf) popt, tl = get_popt_and_tl(opt) agent = tl.agent # init model file should be set appropriately init_model_file, is_finetune = agent._get_init_model( popt, None) self.assertEqual(init_model_file, init_mf) self.assertTrue(is_finetune) valid, test = tl.train() # now, train the model for another epoch opt = get_opt('{}.checkpoint'.format(mf), mf) opt['load_from_checkpoint'] = True popt, tl = get_popt_and_tl(opt) agent = tl.agent init_model_file, is_finetune = agent._get_init_model( popt, None) self.assertEqual(init_model_file, '{}.checkpoint'.format(mf)) self.assertFalse(is_finetune)
def test_verify_data(self): parser = setup_args() opt = parser.parse_args([]) changed_task_files = [ fn for fn in testing_utils.git_changed_files() if testing_utils.is_new_task_filename(fn) ] if not changed_task_files: return found_errors = False for file in changed_task_files: task = file.split('/')[-2] module_name = "%s.tasks.%s.agents" % ('parlai', task) task_module = importlib.import_module(module_name) subtasks = [ ':'.join([task, x]) for x in dir(task_module) if x.endswith('Teacher') and x not in BASE_TEACHERS ] if testing_utils.is_this_circleci(): if len(subtasks) == 0: continue self.fail( 'test_verify_data plays poorly with CircleCI. Please run ' '`python tests/datatests/test_new_tasks.py` locally and ' 'paste the output in your pull request.' ) for subt in subtasks: parser = setup_args() opt = parser.parse_args(args=['--task', subt]) opt['task'] = subt try: with testing_utils.capture_output(): text, log = verify(opt) except Exception: found_errors = True traceback.print_exc() print("Got above exception in {}".format(subt)) for key in KEYS: if log[key] != 0: print('There are {} {} in {}.'.format(log[key], key, subt)) found_errors = True if found_errors: self.fail( "Please fix the above listed errors, or describe in the PR why " "you do not expect them to pass." )
def _convert_task_to_conversations(self, model: str): """ Convert task data to conversations format. """ self._print_progress( f'Converting task data to conversations format for {model}') config = self._get_task_conversion_config(model) with capture_output(): parser = convert_task_setup_args() parser.set_params(**config) opt = parser.parse_args(args=[]) convert_task_data(opt)
def run_display_test(defaults, ep_and_ex_counts): with testing_utils.capture_output() as f: parser = display_setup_args() parser.set_defaults(**defaults) opt = parser.parse_args() display_data(opt) str_output = f.getvalue() self.assertTrue( '[ loaded {} episodes with a total of {} examples ]'.format( ep_and_ex_counts[0], ep_and_ex_counts[1]) in str_output, 'PytorchDataTeacher multitasking failed with ' 'following args: {}'.format(opt), )
def test_output(self): """Does display_data reach the end of the loop?""" with testing_utils.capture_output() as stdout: parser = ParlaiParser() opt = parser.parse_args(['--task', 'babi:task1k:1'], print_args=False) opt['num_examples'] = 1 display_data(opt) str_output = stdout.getvalue() self.assertGreater(len(str_output), 0, "Output is empty") self.assertIn("[babi:task1k:1]:", str_output, "Babi task did not print") self.assertIn("~~", str_output, "Example output did not complete")
def test_pyt_preprocess(self): """ Test that the preprocess functionality works with the PytorchDataTeacher with a sample TorchAgent (here, the Seq2seq model). This tests whether the action provided by the preprocessed teacher is equivalent to the agent's observation after the agent processes it. """ def get_teacher_act(defaults, teacher_processed=False, agent_to=None): parser = train_setup_args() parser.set_defaults(**defaults) opt = parser.parse_args() build_dict(opt) with testing_utils.capture_output() as _: teacher = create_task_agent_from_taskname(opt)[0] agent = create_agent(opt) act = teacher.act() if teacher_processed: return act, agent return agent.observe(act), agent with testing_utils.capture_output() as _, testing_utils.tempdir( ) as tmpdir: defaults = unit_test_parser_defaults.copy() defaults['batch_size'] = 1 defaults['datatype'] = 'train:stream:ordered' # Get processed act from agent defaults['model_file'] = os.path.join(tmpdir, 'model') defaults['dict_file'] = os.path.join(tmpdir, 'model.dict') agent_processed_observation, agent1 = get_teacher_act(defaults) # Get preprocessed act from teacher defaults['model_file'] = os.path.join(tmpdir, 'model') defaults['dict_file'] = os.path.join(tmpdir, 'model.dict') defaults['pytorch_preprocess'] = True teacher_processed_act, agent2 = get_teacher_act( defaults, teacher_processed=True) # noqa: E501 for key in agent_processed_observation: val1 = agent_processed_observation[key] val2 = teacher_processed_act[key] if isinstance(val1, torch.Tensor): self.assertTrue( bool(torch.all(torch.eq(val1, val2))), '{} is not equal to {}'.format(val1, val2), ) else: self.assertEqual(val1, val2)
def test_seed_messages_from_file(self): with testing_utils.capture_output() as output: with NamedTemporaryFile() as tmpfile: tmpfile.write(b'howdy\nunique message') tmpfile.seek(0) SelfChat.main( model='fixed_response', fixed_response='hi', seed_messages_from_file=tmpfile.name, num_self_chats=10, selfchat_max_turns=2, ) output = output.getvalue() assert 'howdy' in output assert 'unique message' in output
def _run_display_test(self, kwargs): with testing_utils.capture_output() as stdout: parser = setup_args() parser.set_defaults(**kwargs) opt = parser.parse_args([]) agent = RepeatLabelAgent(opt) world = create_task(opt, agent) display(opt) str_output = stdout.getvalue() self.assertTrue( '[ loaded {} episodes with a total of {} examples ]'.format( world.num_episodes(), world.num_examples()) in str_output, 'Wizard of Wikipedia failed with following args: {}'.format(opt), )
def _run_selfchat(self, config_id: str): """ Run self-chat for model. :param config_id: id in config """ self._print_progress(f'Running self-chat for {config_id}') config = self._get_selfchat_config(config_id) with capture_output(): parser = self_chat_setup_args() parser.set_params(**config) opt = parser.parse_args(args=[]) self_chat(opt)
def test_display_model(self): from parlai.scripts.display_model import DisplayModel with testing_utils.capture_output() as output: DisplayModel.main( model='fixed_response', fixed_response='1 2 3 4', task='integration_tests', verbose=True, ) output = output.getvalue() assert 'metrics' in output assert 'accuracy' in output assert '1 2 3 4' in output
def test_recommendations_single(self): """Test whether recommended args work for non-group.""" parser = ParlaiParser() parser.add_argument( '-bs', '--batchsize', default=1, type=int, help='batch size for minibatch training schemes', recommended="10", ) with testing_utils.capture_output() as _: parser.parse_args() help_str = parser.format_help() assert re.search(r'--batchsize[^\n]*\n[^\n]*\(recommended: 10\)', help_str)
def _convert_task_to_conversations(self, config_id: str): """ Convert task data to conversations format. :param config_id: id in config """ self._print_progress( f'Converting task data to conversations format for {config_id}') config = self._get_task_conversion_config(config_id) with capture_output(): parser = convert_task_setup_args() parser.set_params(**config) opt = parser.parse_args(args=[], print_args=False) convert_task_data(opt)
def get_agent(**kwargs): r""" Return opt-initialized agent. :param kwargs: any kwargs you want to set using parser.set_params(\*\*kwargs) """ if 'no_cuda' not in kwargs: kwargs['no_cuda'] = True from parlai.core.params import ParlaiParser parser = ParlaiParser() MockTorchAgent.add_cmdline_args(parser) parser.set_params(**kwargs) opt = parser.parse_args(print_args=False) with testing_utils.capture_output(): return MockTorchAgent(opt)
def _test_opt_step_opts(self, update_freq: int): """ Test -tstep, -vstep, -lstep. :param update_freq: update frequency We copy train_model from testing_utils to directly access train loop. """ import parlai.scripts.train_model as tms num_train_steps = 1001 num_validations = 10 num_logs = 100 def get_tl(tmpdir): opt = { 'task': 'integration_tests', 'model': 'parlai.agents.test_agents.test_agents:MockTrainUpdatesAgent', 'model_file': os.path.join(tmpdir, 'model'), 'dict_file': os.path.join(tmpdir, 'model.dict'), # step opts 'max_train_steps': num_train_steps, 'validation_every_n_steps': int(num_train_steps / num_validations), 'log_every_n_steps': int(num_train_steps / num_logs), 'update_freq': update_freq, } parser = tms.setup_args() parser.set_params(**opt) popt = parser.parse_args([]) for k, v in opt.items(): popt[k] = v return tms.TrainLoop(popt) with testing_utils.capture_output(), testing_utils.tempdir() as tmpdir: tl = get_tl(tmpdir) valid, _ = tl.train() self.assertEqual(tl.valid_reports[-1]['total_train_updates'], num_train_steps - 1) self.assertEqual(len(tl.valid_reports), num_validations) self.assertEqual(len(tl.train_reports), num_logs + num_validations) # log every valid as well
def test_fill_stack(self, file_regression: FileRegressionFixture): """ Check the expected output when filling up the stack. Request image/model slots from the stack, and check that the behavior is as expected. """ seed = 0 random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) with testing_utils.tempdir() as tmpdir: # Params opt = { 'evals_per_image_model_combo': 2, 'models': ['model_1', 'model_2'], 'num_images': 3, 'stack_folder': tmpdir, } num_stack_slots = (opt['evals_per_image_model_combo'] * len(opt['models']) * opt['num_images']) num_workers = 5 worker_id_to_remove = '2' stack_idx_to_remove_worker_from = 0 # Create the stack stack = ImageStack(opt) with testing_utils.capture_output() as output: for _ in range(num_stack_slots): worker_id = random.randrange(num_workers) _ = stack.get_next_image(str(worker_id)) print('STACK: ', stack.stack) stack.remove_worker_from_stack( worker=worker_id_to_remove, stack_idx=stack_idx_to_remove_worker_from, ) print('STACK: ', stack.stack) stdout = output.getvalue() # Check the output against what it should be file_regression.check(contents=stdout)
def test_train_model(self): """ Check the training script doesn't crash. """ import projects.controllable_dialogue.train_controllable_seq2seq as tcs2s parser = tcs2s.setup_args() # make it much smaller just for testing parser.set_params( max_train_time=120, validation_max_exs=128, batchsize=16, truncate=32, short_final_eval=True, ) with testing_utils.capture_output(): opt = parser.parse_args() tcs2s.TrainLoop(opt).train()