def _report_benchmark(self, summary, start_time_sec, wall_time_sec): metrics = [{ 'name': 'train_loss', 'value': summary['train_loss'], }, { 'name': 'example_per_second', 'value': self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size * FLAGS.steps_per_loop) }, { 'name': 'startup_time', 'value': self.timer_callback.get_startup_time(start_time_sec) }, { 'name': 'masked_lm_accuracy', 'value': summary['masked_lm_accuracy'], 'min_value': MIN_MLM_ACCURACY, 'max_value': MAX_MLM_ACCURACY, }, { 'name': 'next_sentence_accuracy', 'value': summary['next_sentence_accuracy'], 'min_value': MIN_NSP_ACCURACY, 'max_value': MAX_NSP_ACCURACY, }] self.report_benchmark( iters=summary['total_training_steps'], wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_core.get_nondefault_flags_as_str()})
def _report_benchmark(self, stats, wall_time_sec, max_value=None, min_value=None): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from keras models with known entries. wall_time_sec: the during of the benchmark execution in seconds max_value: highest passing level. min_value: lowest passing level. """ metrics = [] metrics.append({ 'name': 'training_loss', 'value': stats['training_loss'], 'min_value': min_value, 'max_value': max_value }) # These metrics are placeholders to avoid PerfZero failure. metrics.append({ 'name': 'exp_per_second', 'value': 0.0, }) metrics.append({ 'name': 'startup_time', 'value': 9999., }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _report_benchmark(self, stats, wall_time_sec, min_accuracy, max_accuracy): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from BERT models with known entries. wall_time_sec: the during of the benchmark execution in seconds min_accuracy: Minimum classification accuracy constraint to verify correctness of the model. max_accuracy: Maximum classification accuracy constraint to verify correctness of the model. """ metrics = [{ 'name': 'training_loss', 'value': stats['train_loss'], }, { 'name': 'exp_per_second', 'value': self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size) }] if 'eval_metrics' in stats: metrics.append({ 'name': 'eval_accuracy', 'value': stats['eval_metrics'], 'min_value': min_accuracy, 'max_value': max_accuracy, }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=stats['total_training_steps'], wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _report_benchmark(self, stats, wall_time_sec, top_1_max=None, top_1_min=None, total_batch_size=None, log_steps=None, warmup=1): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from keras models with known entries. wall_time_sec: the during of the benchmark execution in seconds top_1_max: highest passing level for top_1 accuracy. top_1_min: lowest passing level for top_1 accuracy. total_batch_size: Global batch-size. log_steps: How often the log was created for stats['step_timestamp_log']. warmup: number of entries in stats['step_timestamp_log'] to ignore. """ metrics = [] if 'eval_acc' in stats: metrics.append({ 'name': 'accuracy_top_1', 'value': stats['eval_acc'], 'min_value': top_1_min, 'max_value': top_1_max }) metrics.append({'name': 'eval_loss', 'value': stats['eval_loss']}) metrics.append({ 'name': 'top_1_train_accuracy', 'value': stats['train_acc'] }) metrics.append({'name': 'train_loss', 'value': stats['train_loss']}) if (warmup and 'step_timestamp_log' in stats and len(stats['step_timestamp_log']) > warmup): # first entry in the time_log is start of step 1. The rest of the # entries are the end of each step recorded time_log = stats['step_timestamp_log'] elapsed = time_log[-1].timestamp - time_log[warmup].timestamp num_examples = ( total_batch_size * log_steps * (len(time_log) - warmup - 1)) examples_per_sec = num_examples / elapsed metrics.append({'name': 'exp_per_second', 'value': examples_per_sec}) if 'avg_exp_per_second' in stats: metrics.append({ 'name': 'avg_exp_per_second', 'value': stats['avg_exp_per_second'] }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark( iters=-1, wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _run_and_report_benchmark(self, top_1_train_min=0.923, top_1_train_max=0.93, warmup=1, log_steps=100): """Report benchmark results by writing to local protobuf file. Average epoch time is calculated by skipping the first epoch. This average ignores time spent between epoch and is recorded by begin and end epoch. To skip accuracy check set `top_1_train_min=None`. Args: top_1_train_min: lowest passing value. top_1_train_max: highest passing value. warmup: number of entries in `timestamp_log` to ignore. log_steps: How often the log was created for `timestamp_log`. """ total_batch_size = FLAGS.batch_size metrics = [] start_time_sec = time.time() stats = shakespeare_main.run(FLAGS) wall_time_sec = time.time() - start_time_sec if top_1_train_min: metrics.append({ 'name': 'accuracy_top_1_train', 'value': stats['history']['RecallAt1'][-1], 'min_value': top_1_train_min, 'max_value': top_1_train_max }) # Look for the time history callback which was used during keras.fit for callback in stats['callbacks']: if isinstance(callback, keras_utils.TimeHistory): epoch_timings = callback.epoch_runtime_log average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:]) metrics.append({ 'name': 'avg_epoch_time', 'value': average_time }) # First entry in timestamp_log is the start of step 1. The rest of the # entries are the end of each step recorded. time_log = callback.timestamp_log elapsed = time_log[-1].timestamp - time_log[warmup].timestamp num_examples = (total_batch_size * log_steps * (len(time_log) - warmup - 1)) examples_per_sec = num_examples / elapsed metrics.append({ 'name': 'exp_per_second', 'value': examples_per_sec }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _run_and_report_benchmark(self, bleu_max=None, bleu_min=None, log_steps=None, total_batch_size=None, warmup=1): """Report benchmark results by writing to local protobuf file. Args: bleu_max: highest passing level for bleu score. bleu_min: lowest passing level for bleu score. log_steps: How often the log was created for stats['step_timestamp_log']. total_batch_size: Global batch-size. warmup: number of entries in stats['step_timestamp_log'] to ignore. """ start_time_sec = time.time() task = transformer_main.TransformerTask(FLAGS) stats = task.train() wall_time_sec = time.time() - start_time_sec metrics = [] if 'bleu_uncased' in stats: if 'bleu_uncased_history' in stats: bleu_uncased_best = max(stats['bleu_uncased_history'], key=lambda x: x[1]) metrics.append({'name': 'bleu_uncased', 'value': bleu_uncased_best[1], 'min_value': bleu_min, 'max_value': bleu_max}) metrics.append({'name': 'bleu_best_score_iteration', 'value': bleu_uncased_best[0]}) metrics.append({'name': 'bleu_uncased_last', 'value': stats['bleu_uncased']}) else: metrics.append({'name': 'bleu_uncased', 'value': stats['bleu_uncased'], 'min_value': bleu_min, 'max_value': bleu_max}) if (warmup and 'step_timestamp_log' in stats and len(stats['step_timestamp_log']) > warmup): # first entry in the time_log is start of step 1. The rest of the # entries are the end of each step recorded time_log = stats['step_timestamp_log'] elapsed = time_log[-1].timestamp - time_log[warmup].timestamp num_examples = ( total_batch_size * log_steps * (len(time_log) - warmup - 1)) examples_per_sec = num_examples / elapsed metrics.append({'name': 'exp_per_second', 'value': examples_per_sec}) if 'avg_exp_per_second' in stats: metrics.append({'name': 'avg_exp_per_second', 'value': stats['avg_exp_per_second']}) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _run_and_report_benchmark(self, summary_path: str): """Runs and reports the benchmark given the provided configuration.""" distribution = distribution_utils.get_distribution_strategy( distribution_strategy='tpu', tpu_address=FLAGS.tpu) logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str()) start_time_sec = time.time() run_pretraining.run_bert_pretrain(strategy=distribution, custom_callbacks=self.timer_callback) wall_time_sec = time.time() - start_time_sec with tf.io.gfile.GFile(summary_path, 'rb') as reader: summary = json.loads(reader.read().decode('utf-8')) self._report_benchmark(summary, start_time_sec, wall_time_sec)
def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap, max_ap, warmup): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from Detection models with known entries. start_time_sec: the start of the benchmark execution in seconds wall_time_sec: the duration of the benchmark execution in seconds min_ap: Minimum detection AP constraint to verify correctness of the model. max_ap: Maximum detection AP accuracy constraint to verify correctness of the model. warmup: Number of time log entries to ignore when computing examples/sec. """ metrics = [{ 'name': 'total_loss', 'value': stats['total_loss'], }] if self.timer_callback: metrics.append({ 'name': 'exp_per_second', 'value': self.timer_callback.get_examples_per_sec(warmup) }) metrics.append({ 'name': 'startup_time', 'value': self.timer_callback.get_startup_time(start_time_sec) }) else: metrics.append({ 'name': 'exp_per_second', 'value': 0.0, }) if 'eval_metrics' in stats: metrics.append({ 'name': 'AP', 'value': stats['AP'], 'min_value': min_ap, 'max_value': max_ap, }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=stats['total_steps'], wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _report_benchmark(self, stats, wall_time_sec, min_ap, max_ap, train_batch_size=None): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from Detection models with known entries. wall_time_sec: the during of the benchmark execution in seconds min_ap: Minimum detection AP constraint to verify correctness of the model. max_ap: Maximum detection AP accuracy constraint to verify correctness of the model. train_batch_size: Train batch size. It is needed for computing exp_per_second. """ metrics = [{ 'name': 'total_loss', 'value': stats['total_loss'], }] if self.timer_callback: metrics.append({ 'name': 'exp_per_second', 'value': self.timer_callback.get_examples_per_sec( FLAGS.train_batch_size) }) else: metrics.append({ 'name': 'exp_per_second', 'value': 0.0, }) if 'eval_metrics' in stats: metrics.append({ 'name': 'AP', 'value': stats['AP'], 'min_value': min_ap, 'max_value': max_ap, }) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=stats['total_steps'], wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def _report_benchmark(self, stats, wall_time_sec, top_1_max=None, top_1_min=None): """Report benchmark results by writing to local protobuf file. Args: stats: dict returned from estimator models with known entries. wall_time_sec: the during of the benchmark execution in seconds top_1_max: highest passing level for top_1 accuracy. top_1_min: lowest passing level for top_1 accuracy. """ examples_per_sec_hook = None for hook in stats['train_hooks']: if isinstance(hook, hooks.ExamplesPerSecondHook): examples_per_sec_hook = hook break eval_results = stats['eval_results'] metrics = [] if 'accuracy' in eval_results: metrics.append({ 'name': 'accuracy_top_1', 'value': eval_results['accuracy'].item(), 'min_value': top_1_min, 'max_value': top_1_max }) if 'accuracy_top_5' in eval_results: metrics.append({ 'name': 'accuracy_top_5', 'value': eval_results['accuracy_top_5'].item() }) if examples_per_sec_hook: exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list # ExamplesPerSecondHook skips the first 10 steps. exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list)) metrics.append({'name': 'exp_per_second', 'value': exp_per_sec}) flags_str = flags_core.get_nondefault_flags_as_str() self.report_benchmark(iters=eval_results.get('global_step', None), wall_time=wall_time_sec, metrics=metrics, extras={'flags': flags_str})
def test_get_nondefault_flags_as_str(self): defaults = dict(clean=True, data_dir="abc", hooks=["LoggingTensorHook"], stop_threshold=1.5, use_synthetic_data=False) flags_core.set_defaults(**defaults) flags_core.parse_flags() expected_flags = "" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) flags.FLAGS.clean = False expected_flags += "--noclean" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) flags.FLAGS.data_dir = "xyz" expected_flags += " --data_dir=xyz" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) flags.FLAGS.hooks = ["aaa", "bbb", "ccc"] expected_flags += " --hooks=aaa,bbb,ccc" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) flags.FLAGS.stop_threshold = 3. expected_flags += " --stop_threshold=3.0" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) flags.FLAGS.use_synthetic_data = True expected_flags += " --use_synthetic_data" self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags) # Assert that explicit setting a flag to its default value does not cause it # to appear in the string flags.FLAGS.use_synthetic_data = False expected_flags = expected_flags[:-len(" --use_synthetic_data")] self.assertEqual(flags_core.get_nondefault_flags_as_str(), expected_flags)