def setup_teardown(self): """ Call code to set up and tear down tests. Run this only once because we'll be running all Fast ACUTE code before checking any results. """ self._setup() # Set up common temp directory root_dir = tempfile.mkdtemp() # Define output structure outputs = {} # Set up config test_overrides = [ f'+mephisto.blueprint.config_path={self.TASK_DIRECTORY}/task_config/model_config_dataset.json', f'+mephisto.blueprint.models=\"{self.MODEL_STRING}\"', '+mephisto.blueprint.model_pairs=""', '+mephisto.blueprint.num_task_data_episodes=500', '+mephisto.blueprint.selfchat_max_turns=6', ] # TODO: clean this up when Hydra has support for recursive defaults self._set_up_config( blueprint_type=FAST_ACUTE_BLUEPRINT_TYPE, task_directory=self.TASK_DIRECTORY, overrides=self._get_common_overrides(root_dir) + test_overrides, ) self.config.mephisto.blueprint.model_pairs = None # TODO: hack to manually set mephisto.blueprint.model_pairs to None. Remove # when Hydra releases support for recursive defaults # Run Fast ACUTEs runner = FastAcuteExecutor(self.config) runner.compile_chat_logs() runner.set_up_acute_eval() self.config.mephisto.blueprint = runner.fast_acute_args self._set_up_server() outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA) # Run analysis runner.analyze_results(args=f'--mephisto-root {self.database_path}') outputs['results_folder'] = runner.results_path yield outputs # All code after this will be run upon teardown self._teardown() # Tear down temp file shutil.rmtree(root_dir)
def setup_teardown(self): """ Call code to set up and tear down tests. Run this only once because we'll be running all Fast ACUTE code before checking any results. """ self._setup() # Set up common temp directory root_dir = tempfile.mkdtemp() # Copy over expected self-chat files shutil.copytree( os.path.join(self.TASK_DIRECTORY, 'task_config', 'self_chats'), os.path.join(root_dir, 'self_chats'), ) # Define output structure outputs = {} # Set up config test_overrides = [ 'mephisto.blueprint.use_existing_self_chat_files=True' ] self._set_up_config( task_directory=self.TASK_DIRECTORY, overrides=self._get_common_overrides(root_dir) + test_overrides, config_name=FAST_ACUTE_CONFIG_NAME, ) # Run Fast ACUTEs runner = FastAcuteExecutor(self.config) runner.compile_chat_logs() runner.set_up_acute_eval() self.config.mephisto.blueprint = runner.fast_acute_args self._set_up_server() outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA) # Run analysis runner.analyze_results( args=f'--mephisto-root {self.database_path}') outputs['results_folder'] = runner.results_path yield outputs # All code after this will be run upon teardown self._teardown() # Tear down temp file shutil.rmtree(root_dir)
def setup_teardown(self): """ Call code to set up and tear down tests. Run this only once because we'll be running all Fast ACUTE code before checking any results. """ self._setup() # Set up common temp directory root_dir = tempfile.mkdtemp() # Params config_path = os.path.join(root_dir, 'config.json') # Copy over expected self-chat files shutil.copytree( os.path.join(self.TASK_DIRECTORY, 'task_config', 'self_chats'), os.path.join(root_dir, 'self_chats'), ) # Define output structure outputs = {} # # Run Fast ACUTEs and analysis on the base task # Set up config assert len(self.MODELS) == 2 test_overrides = [ f'+mephisto.blueprint.config_path={config_path}', '+mephisto.blueprint.models=""', f'+mephisto.blueprint.model_pairs={self.MODELS[0]}:{self.MODELS[1]}', ] # TODO: clean this up when Hydra has support for recursive defaults self._set_up_config( blueprint_type=FAST_ACUTE_BLUEPRINT_TYPE, task_directory=self.TASK_DIRECTORY, overrides=self._get_common_overrides(root_dir) + test_overrides, ) self.config.mephisto.blueprint.models = None # TODO: hack to manually set mephisto.blueprint.models to None. Remove when # Hydra releases support for recursive defaults # Save the config file config = {} for model in self.MODELS: config[model] = { 'log_path': FastAcuteExecutor.get_relative_selfchat_log_path( root_dir=self.config.mephisto.blueprint.root_dir, model=model, task=self.config.mephisto.blueprint.task, ), 'is_selfchat': True, } with open(config_path, 'w') as f: json.dump(config, f) # Run Fast ACUTEs runner = FastAcuteExecutor(self.config) runner.compile_chat_logs() runner.set_up_acute_eval() self.config.mephisto.blueprint = runner.fast_acute_args self._set_up_server() outputs['state'] = self._get_agent_state(task_data=self.TASK_DATA) # Run analysis runner.analyze_results(args=f'--mephisto-root {self.database_path}') outputs['results_folder'] = runner.results_path yield outputs # All code after this will be run upon teardown self._teardown() # Tear down temp file shutil.rmtree(root_dir)