def test_report_render(self): """ Test rendering of nice reports. """ report_s = nice_report({'foo': 3}) assert "foo" in report_s assert "3" in report_s assert nice_report({}) == ""
def log(self): """ Output a training log entry. """ opt = self.opt if opt['display_examples']: print(self.world.display() + '\n~~') logs = [] # get report train_report = self.world.report() train_report = self._sync_metrics(train_report) self.world.reset_metrics() # time elapsed logs.append('time:{}s'.format(np.floor(self.train_time.time()))) logs.append('total_exs:{}'.format(self._total_exs)) if self._total_epochs >= 0: # only if it's unbounded logs.append('epochs:{}'.format(round(self._total_epochs, 2))) time_left = self._compute_eta(self._total_epochs, self.train_time.time()) if time_left is not None: logs.append('time_left:{}s'.format(max(0, np.ceil(time_left)))) log = '[ {} ] {}'.format(' '.join(logs), nice_report(train_report)) print(log) self.log_time.reset() if opt['tensorboard_log'] and is_primary_worker(): self.tb_logger.log_metrics('train', self.parleys, train_report)
def run(self): self.opt['no_cuda'] = True if 'ordered' not in self.opt['datatype'] and 'train' in self.opt[ 'datatype']: self.opt['datatype'] = self.opt['datatype'] + ':ordered' agent = create_agent(self.opt) agent.opt.log() num_examples = self.opt['num_examples'] field = self.opt['field'] + '_vec' if num_examples < 0: num_examples = float('inf') assert self.opt['batchsize'] == 1 assert isinstance(agent, TorchAgent) world = create_task(self.opt, agent) teacher = world.get_task_agent() # set up logging log_every_n_secs = self.opt.get('log_every_n_secs', -1) if log_every_n_secs <= 0: log_every_n_secs = float('inf') log_time = TimeLogger() lengths = [] cnt = 0 total = min(teacher.num_examples(), num_examples) while not teacher.epoch_done() and cnt < num_examples: act = teacher.act() processed = agent.observe(act) try: text_vec = processed[field] except KeyError: raise KeyError(f"Pick one of {list(processed.keys())}") if text_vec is not None and (not self.opt['final_only'] or act.get('episode_done')): cnt += 1 lengths.append(float(len(text_vec))) agent.self_observe({}) if log_time.time() > log_every_n_secs: report = self._compute_stats(lengths) text, report = log_time.log(report['exs'], total, report) logging.info(text) report = self._compute_stats(lengths) print(nice_report(report)) return report
def eval_model(opt, print_parser=None): """ Evaluates a model. :param opt: tells the evaluation function how to run :param bool print_parser: if provided, prints the options that are set within the model after loading the model :return: the final result of calling report() """ random.seed(42) if 'train' in opt['datatype'] and 'evalmode' not in opt['datatype']: raise ValueError( 'You should use --datatype train:evalmode if you want to evaluate on ' 'the training set.' ) if opt['save_world_logs'] and not opt['report_filename']: raise RuntimeError( 'In order to save model replies, please specify the save path ' 'with --report-filename' ) # load model and possibly print opt agent = create_agent(opt, requireModelExists=True) if print_parser: # show args after loading model print_parser.opt = agent.opt print_parser.print_args() tasks = opt['task'].split(',') reports = [] for task in tasks: task_report = _eval_single_world(opt, agent, task) reports.append(task_report) report = aggregate_named_reports( dict(zip(tasks, reports)), micro_average=opt.get('aggregate_micro', False) ) # print announcments and report print_announcements(opt) print( '[ Finished evaluating tasks {} using datatype {} ]'.format( tasks, opt.get('datatype', 'N/A') ) ) print(nice_report(report)) _save_eval_stats(opt, report)
def log(self): """ Output a training log entry. """ opt = self.opt if opt['display_examples']: print(self.world.display() + '\n~~') logs = [] # get report train_report = self.world.report() train_report = self._sync_metrics(train_report) self.world.reset_metrics() train_report_trainstats = dict_report(train_report) train_report_trainstats['total_epochs'] = self._total_epochs train_report_trainstats['total_exs'] = self._total_exs train_report_trainstats['parleys'] = self.parleys train_report_trainstats['train_steps'] = self._train_steps train_report_trainstats['train_time'] = self.train_time.time() self.train_reports.append(train_report_trainstats) # time elapsed logs.append(f'time:{self.train_time.time():.0f}s') logs.append(f'total_exs:{self._total_exs}') logs.append(f'total_steps:{self._train_steps}') if self._total_epochs >= 0: # only if it's unbounded logs.append(f'epochs:{self._total_epochs:.2f}') time_left = self._compute_eta( self._total_epochs, self.train_time.time(), self._train_steps ) if time_left is not None: logs.append(f'time_left:{max(0,time_left):.0f}s') log = '{}\n{}\n'.format(' '.join(logs), nice_report(train_report)) logging.info(log) self.log_time.reset() self._last_log_steps = 0 if opt['tensorboard_log'] and is_primary_worker(): self.tb_logger.log_metrics('train', self.parleys, train_report) if opt['wandb_log'] and is_primary_worker(): self.wb_logger.log_metrics('train', self.parleys, train_report) return train_report
def eval_model(opt): """ Evaluates a model. :param opt: tells the evaluation function how to run :return: the final result of calling report() """ random.seed(42) if 'train' in opt['datatype'] and 'evalmode' not in opt['datatype']: raise ValueError( 'You should use --datatype train:evalmode if you want to evaluate on ' 'the training set.') if opt['save_world_logs'] and not opt['report_filename']: raise RuntimeError( 'In order to save model replies, please specify the save path ' 'with --report-filename') # load model and possibly print opt agent = create_agent(opt, requireModelExists=True) agent.opt.log() tasks = opt['task'].split(',') reports = [] for task in tasks: task_report = _eval_single_world(opt, agent, task) reports.append(task_report) report = aggregate_named_reports(dict(zip(tasks, reports)), micro_average=opt.get( 'aggregate_micro', False)) # print announcments and report print_announcements(opt) logging.info( f'Finished evaluating tasks {tasks} using datatype {opt.get("datatype")}' ) print(nice_report(report)) _save_eval_stats(opt, report) return report
def obtain_stats(opt): report = verify(opt) print(nice_report(report)) return report
def _save_outputs(self, opt, world, logger, episode_metrics): if is_distributed(): # flatten everything intelligently if need be world_report = aggregate_unnamed_reports( all_gather_list(world.report())) episode_metrics_unflattened = all_gather_list(episode_metrics) flattened = [] for rank_elem in episode_metrics_unflattened: for elem in rank_elem: flattened.append(elem) episode_metrics = flattened else: world_report = world.report() logging.report("Final report:\n" + nice_report(world_report)) report = dict_report(world_report) def get_episode_report(goal, episode_metric): metrics_dict = dict_report(episode_metric.report()) metrics_dict["goal"] = goal return metrics_dict report["tod_metrics"] = [ get_episode_report(g, e) for g, e in episode_metrics ] if "report_filename" in opt and opt["report_filename"] is not None: if len(world_report) == 0: logging.warning("Report is empty; not saving report") report_fname = f"{opt['report_filename']}.json" # Save report if not is_distributed() or is_primary_worker(): with PathManager.open(report_fname, "w") as f: logging.info(f"Saving model report to {report_fname}") json.dump({"opt": opt, "report": report}, f, indent=4) f.write("\n") # for jq if "world_logs" in opt and opt["world_logs"] is not None: if is_distributed(): # Save separately, then aggregate together rank = get_rank() log_outfile_part = ( f"{opt['world_logs']}_{opt['save_format']}_{rank}.jsonl") logger.write(log_outfile_part, world, file_format=opt["save_format"]) sync_object(None) if is_primary_worker(): log_outfile = f"{opt['world_logs']}_{opt['save_format']}.jsonl" log_outfile_metadata = ( f"{opt['world_logs']}_{opt['save_format']}.metadata") with open(log_outfile, "w+") as outfile: for rank in range(num_workers()): log_outfile_part = ( f"{opt['world_logs']}_{opt['save_format']}_{rank}.jsonl" ) with open(log_outfile_part) as infile: for line in infile: json_blob = json.loads(line.strip()) if ( len(json_blob["dialog"]) < 2 ): # skip when we don't have generation continue json_blob[ "metadata_path"] = log_outfile_metadata outfile.write(json.dumps(json_blob)) outfile.write("\n") log_output_part_metadata = f"{opt['world_logs']}_{opt['save_format']}_{rank}.metadata" if rank == 0: copyfile(log_output_part_metadata, log_outfile_metadata), os.remove(log_outfile_part) os.remove(log_output_part_metadata) else: log_outfile = f"{opt['world_logs']}_{opt['save_format']}.jsonl" logger.write(log_outfile, world, file_format=opt["save_format"]) return report