示例#1
0
def _setup_outputs(root_output_dir, experiment_name, hparam_dict):
  """Set up directories for experiment loops, write hyperparameters to disk."""

  if not experiment_name:
    raise ValueError('experiment_name must be specified.')

  create_if_not_exists(root_output_dir)

  checkpoint_dir = os.path.join(root_output_dir, 'checkpoints', experiment_name)
  create_if_not_exists(checkpoint_dir)
  checkpoint_mngr = checkpoint_manager.FileCheckpointManager(checkpoint_dir)

  results_dir = os.path.join(root_output_dir, 'results', experiment_name)
  create_if_not_exists(results_dir)
  csv_file = os.path.join(results_dir, 'experiment.metrics.csv')
  metrics_mngr = csv_manager.CSVMetricsManager(csv_file)

  summary_logdir = os.path.join(root_output_dir, 'logdir', experiment_name)
  create_if_not_exists(summary_logdir)
  summary_writer = tf.summary.create_file_writer(summary_logdir)

  if hparam_dict:
    hparam_dict['metrics_file'] = metrics_mngr.metrics_filename
    hparams_file = os.path.join(results_dir, 'hparams.csv')
    utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file)
    with summary_writer.as_default():
      hp.hparams({k: v for k, v in hparam_dict.items() if v is not None})

  logging.info('Writing...')
  logging.info('    checkpoints to: %s', checkpoint_dir)
  logging.info('    metrics csv to: %s', metrics_mngr.metrics_filename)
  logging.info('    summaries to: %s', summary_logdir)

  return checkpoint_mngr, metrics_mngr, summary_writer
示例#2
0
    def test_fn_writes_metrics(self):
        experiment_name = 'test_metrics'
        iterative_process = _build_federated_averaging_process()

        def client_datasets_fn(round_num):
            del round_num
            return _federated_data()

        def test_fn(model):
            keras_model = _compiled_keras_model_builder()
            model.assign_weights_to(keras_model)
            batch = next(iter(_create_tf_dataset_for_client(5)))
            return {'loss': keras_model.evaluate(batch['x'], batch['y'])}

        root_output_dir = self.get_temp_dir()
        training_loop.run(iterative_process=iterative_process,
                          train_client_datasets_fn=client_datasets_fn,
                          evaluation_fn=_evaluation_fn(),
                          total_rounds=1,
                          experiment_name=experiment_name,
                          root_output_dir=root_output_dir,
                          rounds_per_eval=10,
                          test_fn=test_fn)

        csv_file = os.path.join(root_output_dir, 'results', experiment_name,
                                'experiment.metrics.csv')
        metrics_manager = csv_manager.CSVMetricsManager(csv_file)
        fieldnames, metrics = metrics_manager.get_metrics()
        self.assertLen(metrics, 2)
        self.assertIn('eval/sparse_categorical_accuracy/example_weighted',
                      fieldnames)
        self.assertIn('test/loss', fieldnames)
    def test_fn_writes_metrics(self):
        experiment_name = 'test_metrics'
        iterative_process = _build_federated_averaging_process()
        batch = _batch_fn()
        federated_data = [[batch]]

        def client_datasets_fn(round_num):
            del round_num
            return federated_data

        def evaluate(model):
            keras_model = tff.simulation.models.mnist.create_keras_model(
                compile_model=True)
            model.assign_weights_to(keras_model)
            return {'loss': keras_model.evaluate(batch.x, batch.y)}

        root_output_dir = self.get_temp_dir()
        training_loop.run(iterative_process=iterative_process,
                          client_datasets_fn=client_datasets_fn,
                          validation_fn=evaluate,
                          total_rounds=1,
                          experiment_name=experiment_name,
                          root_output_dir=root_output_dir,
                          rounds_per_eval=10,
                          test_fn=evaluate)

        csv_file = os.path.join(root_output_dir, 'results', experiment_name,
                                'experiment.metrics.csv')
        metrics_manager = csv_manager.CSVMetricsManager(csv_file)
        fieldnames, metrics = metrics_manager.get_metrics()
        self.assertLen(metrics, 2)
        self.assertIn('eval/loss', fieldnames)
        self.assertIn('test/loss', fieldnames)
示例#4
0
    def test_reload_of_csvfile(self):
        temp_dir = self.get_temp_dir()
        csv_file = os.path.join(temp_dir, 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)
        csv_mngr.update_metrics(0, _create_scalar_metrics())
        csv_mngr.update_metrics(5, _create_scalar_metrics())

        new_csv_mngr = csv_manager.CSVMetricsManager(csv_file)
        fieldnames, metrics = new_csv_mngr.get_metrics()
        self.assertCountEqual(fieldnames, ['round_num', 'a/b', 'a/c'])
        self.assertLen(metrics, 2,
                       'There should be 2 rows (for rounds 0 and 5).')
        self.assertEqual(5, metrics[-1]['round_num'],
                         'Last metrics are for round 5.')

        self.assertEqual(set(os.listdir(temp_dir)), set(['metrics.csv']))
示例#5
0
    def test_rows_are_cleared_is_reflected_in_saved_file(self):
        temp_dir = self.get_temp_dir()
        csv_file = os.path.join(temp_dir, 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)

        csv_mngr.update_metrics(0, _create_scalar_metrics())
        csv_mngr.update_metrics(5, _create_scalar_metrics())
        csv_mngr.update_metrics(10, _create_scalar_metrics())

        filename = os.path.join(temp_dir, 'metrics.csv')
        with tf.io.gfile.GFile(filename, 'r') as csvfile:
            num_lines_before = len(csvfile.readlines())

        # The CSV file should have 4 lines, one for the fieldnames, and 3 for each
        # call to `update_metrics`.
        self.assertEqual(num_lines_before, 4)

        csv_mngr.clear_rounds_after(last_valid_round_num=7)

        with tf.io.gfile.GFile(filename, 'r') as csvfile:
            num_lines_after = len(csvfile.readlines())

        # The CSV file should have 3 lines, one for the fieldnames, and 2 for the
        # calls to `update_metrics` with round_nums less <= 7.
        self.assertEqual(num_lines_after, 3)
示例#6
0
    def test_rows_are_cleared_and_last_round_num_is_reset(self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)

        csv_mngr.update_metrics(0, _create_scalar_metrics())
        csv_mngr.update_metrics(5, _create_scalar_metrics())
        csv_mngr.update_metrics(10, _create_scalar_metrics())
        _, metrics = csv_mngr.get_metrics()
        self.assertLen(metrics, 3,
                       'There should be 3 rows (for rounds 0, 5, and 10).')

        csv_mngr.clear_rounds_after(last_valid_round_num=7)

        _, metrics = csv_mngr.get_metrics()
        self.assertLen(
            metrics, 2,
            'After clearing all rounds after last_valid_round_num=7, should be 2 '
            'rows of metrics (for rounds 0 and 5).')
        self.assertEqual(5, metrics[-1]['round_num'],
                         'Last metrics retained are for round 5.')

        # The internal state of the manager knows the last round number is 7, so it
        # raises an exception if a user attempts to add new metrics at round 7, ...
        with self.assertRaises(ValueError):
            csv_mngr.update_metrics(7, _create_scalar_metrics())

        # ... but allows a user to add new metrics at a round number greater than 7.
        csv_mngr.update_metrics(8, _create_scalar_metrics())  # (No exception.)
示例#7
0
 def test_update_metrics_adds_empty_str_if_previous_column_not_provided(
         self):
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     csv_mngr.update_metrics(0, _create_scalar_metrics_with_extra_column())
     csv_mngr.update_metrics(1, _create_scalar_metrics())
     _, metrics = csv_mngr.get_metrics()
     self.assertEqual(metrics[1]['a/d'], '')
示例#8
0
    def test_clear_rounds_after_raises_value_error_if_round_num_is_negative(
            self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)
        csv_mngr.update_metrics(0, _create_scalar_metrics())

        with self.assertRaises(ValueError):
            csv_mngr.clear_rounds_after(last_valid_round_num=-1)
示例#9
0
 def test_column_names_with_list(self):
     metrics_to_append = {'a': [3, 4, 5], 'b': 6}
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     csv_mngr.update_metrics(0, metrics_to_append)
     fieldnames, _ = csv_mngr.get_metrics()
     self.assertCountEqual(['a/0', 'a/1', 'a/2', 'b', 'round_num'],
                           fieldnames)
示例#10
0
    def test_update_metrics_raises_value_error_if_round_num_is_out_of_order(
            self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)

        csv_mngr.update_metrics(1, _create_scalar_metrics())

        with self.assertRaises(ValueError):
            csv_mngr.update_metrics(0, _create_scalar_metrics())
示例#11
0
    def test_clear_rounds_after_raises_runtime_error_if_no_metrics(self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)

        # Clear is allowed with no metrics if no rounds have yet completed.
        csv_mngr.clear_rounds_after(last_valid_round_num=0)

        with self.assertRaises(RuntimeError):
            # Raise exception with no metrics if no rounds have yet completed.
            csv_mngr.clear_rounds_after(last_valid_round_num=1)
示例#12
0
 def test_update_metrics_returns_flat_dict_with_scalars(self):
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     input_data_dict = _create_scalar_metrics()
     appended_data_dict = csv_mngr.update_metrics(0, input_data_dict)
     self.assertEqual(
         collections.OrderedDict({
             'a/b': 1.0,
             'a/c': 2.0,
             'round_num': 0.0
         }), appended_data_dict)
示例#13
0
 def test_get_metrics_with_nonscalars_returns_list_of_lists(self):
     metrics_to_append = {
         'a': tf.ones([1], dtype=tf.int32),
         'b': tf.zeros([2, 2], dtype=tf.int32)
     }
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     csv_mngr.update_metrics(0, metrics_to_append)
     _, metrics = csv_mngr.get_metrics()
     self.assertEqual(metrics[0]['a'], '[1]')
     self.assertEqual(metrics[0]['b'], '[[0, 0], [0, 0]]')
示例#14
0
    def test_nonscalar_metrics_are_appended(self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)
        _, metrics = csv_mngr.get_metrics()
        self.assertEmpty(metrics)

        csv_mngr.update_metrics(0, _create_nonscalar_metrics())
        _, metrics = csv_mngr.get_metrics()
        self.assertLen(metrics, 1)

        csv_mngr.update_metrics(1, _create_nonscalar_metrics())
        _, metrics = csv_mngr.get_metrics()
        self.assertLen(metrics, 2)
示例#15
0
    def test_update_metrics_adds_column_if_previously_unseen_metric_added(
            self):
        csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
        csv_mngr = csv_manager.CSVMetricsManager(csv_file)
        csv_mngr.update_metrics(0, _create_scalar_metrics())
        fieldnames, metrics = csv_mngr.get_metrics()
        self.assertCountEqual(fieldnames, ['round_num', 'a/b', 'a/c'])
        self.assertNotIn('a/d', metrics[0].keys())

        csv_mngr.update_metrics(1, _create_scalar_metrics_with_extra_column())
        fieldnames, metrics = csv_mngr.get_metrics()
        self.assertCountEqual(fieldnames, ['round_num', 'a/b', 'a/c', 'a/d'])
        self.assertEqual(metrics[0]['a/d'], '')
示例#16
0
    def test_constructor_raises_value_error_if_csvfile_is_invalid(self):
        metrics_missing_round_num = _create_scalar_metrics()
        temp_dir = self.get_temp_dir()
        # This csvfile is 'invalid' in that it was not originally created by an
        # instance of CSVMetricsManager, and is missing a column for
        # round_num.
        invalid_csvfile = os.path.join(temp_dir, 'invalid_metrics.csv')
        with tf.io.gfile.GFile(invalid_csvfile, 'w') as csvfile:
            writer = csv.DictWriter(
                csvfile, fieldnames=metrics_missing_round_num.keys())
            writer.writeheader()
            writer.writerow(metrics_missing_round_num)

        with self.assertRaises(ValueError):
            csv_manager.CSVMetricsManager(invalid_csvfile)
示例#17
0
 def test_update_metrics_returns_flat_dict_with_nonscalars(self):
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     input_data_dict = _create_nonscalar_metrics()
     appended_data_dict = csv_mngr.update_metrics(0, input_data_dict)
     expected_dict = collections.OrderedDict({
         'a/b': tf.ones([1]),
         'a/c': tf.zeros([2, 2]),
         'round_num': 0.0
     })
     self.assertListEqual(list(expected_dict.keys()),
                          list(appended_data_dict.keys()))
     self.assertEqual(expected_dict['round_num'],
                      appended_data_dict['round_num'])
     self.assertAllEqual(expected_dict['a/b'], appended_data_dict['a/b'])
     self.assertAllEqual(expected_dict['a/c'], appended_data_dict['a/c'])
示例#18
0
def _setup_outputs(root_output_dir,
                   experiment_name,
                   hparam_dict,
                   rounds_per_profile=0):
    """Set up directories for experiment loops, write hyperparameters to disk."""

    if not experiment_name:
        raise ValueError('experiment_name must be specified.')

    create_if_not_exists(root_output_dir)

    checkpoint_dir = os.path.join(root_output_dir, 'checkpoints',
                                  experiment_name)
    create_if_not_exists(checkpoint_dir)
    checkpoint_mngr = checkpoint_manager.FileCheckpointManager(checkpoint_dir)

    results_dir = os.path.join(root_output_dir, 'results', experiment_name)
    create_if_not_exists(results_dir)
    csv_file = os.path.join(results_dir, 'experiment.metrics.csv')
    metrics_mngr = csv_manager.CSVMetricsManager(csv_file)

    summary_logdir = os.path.join(root_output_dir, 'logdir', experiment_name)
    tb_mngr = tensorboard_manager.TensorBoardManager(
        summary_dir=summary_logdir)

    if hparam_dict:
        hparam_dict['metrics_file'] = metrics_mngr.metrics_filename
        hparams_file = os.path.join(results_dir, 'hparams.csv')
        utils_impl.atomic_write_to_csv(pd.Series(hparam_dict), hparams_file)
        tb_mngr.update_hparams(
            {k: v
             for k, v in hparam_dict.items() if v is not None})

    logging.info('Writing...')
    logging.info('    checkpoints to: %s', checkpoint_dir)
    logging.info('    metrics csv to: %s', metrics_mngr.metrics_filename)
    logging.info('    summaries to: %s', summary_logdir)

    @contextlib.contextmanager
    def profiler(round_num):
        if (rounds_per_profile > 0 and round_num % rounds_per_profile == 0):
            with tf.profiler.experimental.Profile(summary_logdir):
                yield
        else:
            yield

    return checkpoint_mngr, metrics_mngr, tb_mngr, profiler
示例#19
0
 def test_column_names(self):
     csv_file = os.path.join(self.get_temp_dir(), 'metrics.csv')
     csv_mngr = csv_manager.CSVMetricsManager(csv_file)
     csv_mngr.update_metrics(0, _create_scalar_metrics())
     fieldnames, _ = csv_mngr.get_metrics()
     self.assertCountEqual(['a/b', 'a/c', 'round_num'], fieldnames)
示例#20
0
 def test_csvfile_is_saved(self):
     temp_dir = self.get_temp_dir()
     csv_file = os.path.join(temp_dir, 'metrics.csv')
     csv_manager.CSVMetricsManager(csv_file)
     self.assertEqual(set(os.listdir(temp_dir)), set(['metrics.csv']))