def test_failure_with_invalid_context_type(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    with self.assertRaises(TypeError):
      # `tf.int32` is not a `tff.Type`.
      bad_context_tff_type = tf.int32
      federated_p13n_eval = p13n_eval.build_personalization_eval(
          model_fn,
          p13n_fn_dict,
          _evaluate_fn,
          context_tff_type=bad_context_tff_type)

    with self.assertRaises(TypeError):
      # `context_tff_type` is provided but `context` is not provided.
      context_tff_type = computation_types.to_type(tf.int32)
      federated_p13n_eval = p13n_eval.build_personalization_eval(
          model_fn,
          p13n_fn_dict,
          _evaluate_fn,
          context_tff_type=context_tff_type)
      federated_p13n_eval(zero_model_weights, [
          _create_client_input(train_scale=1.0, test_scale=1.0, context=None),
          _create_client_input(train_scale=1.0, test_scale=2.0, context=None)
      ])
 def test_construction_calls_model_fn(self):
   # Assert that the the process building does not call `model_fn` too many
   # times. `model_fn` can potentially be expensive (loading weights,
   # processing, etc).
   mock_model_fn = mock.Mock(side_effect=model_examples.LinearRegression)
   p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)
   p13n_eval.build_personalization_eval(
       mock_model_fn, p13n_fn_dict, _evaluate_fn, max_num_clients=1)
   # TODO(b/186451541): reduce the number of calls to model_fn.
   self.assertEqual(mock_model_fn.call_count, 3)
Пример #3
0
    def test_failure_with_invalid_baseline_eval_fn(self):
        def model_fn():
            return model_examples.LinearRegression(feature_dim=2)

        p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

        with self.assertRaises(TypeError):
            # `baseline_evaluate_fn` should be a callable.
            bad_baseline_evaluate_fn = 6
            p13n_eval.build_personalization_eval(model_fn, p13n_fn_dict,
                                                 bad_baseline_evaluate_fn)
  def test_failure_with_invalid_model_fn(self):
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)
    with self.assertRaises(TypeError):
      # `model_fn` should be a callable.
      bad_model_fn = 6
      p13n_eval.build_personalization_eval(bad_model_fn, p13n_fn_dict,
                                           _evaluate_fn)

    with self.assertRaises(TypeError):
      # `model_fn` should be a callable that returns a `tff.learning.Model`.
      bad_model_fn = lambda: 6
      p13n_eval.build_personalization_eval(bad_model_fn, p13n_fn_dict,
                                           _evaluate_fn)
  def test_failure_with_invalid_p13n_fns(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    with self.assertRaises(TypeError):
      # `personalize_fn_dict` should be a `OrderedDict`.
      bad_p13n_fn_dict = {'a': 6}
      p13n_eval.build_personalization_eval(model_fn, bad_p13n_fn_dict,
                                           _evaluate_fn)

    with self.assertRaises(TypeError):
      # `personalize_fn_dict` should be a `OrderedDict` that maps a `string` to
      # a `callable`.
      bad_p13n_fn_dict = collections.OrderedDict(a=6)
      p13n_eval.build_personalization_eval(model_fn, bad_p13n_fn_dict,
                                           _evaluate_fn)

    with self.assertRaises(TypeError):
      # `personalize_fn_dict` should be a `OrderedDict` that maps a `string` to
      # a `callable` that when called, gives another `callable`.
      bad_p13n_fn_dict = collections.OrderedDict(x=lambda: 2)
      p13n_eval.build_personalization_eval(model_fn, bad_p13n_fn_dict,
                                           _evaluate_fn)

    with self.assertRaises(ValueError):
      # `personalize_fn_dict` should not use `baseline_metrics` as a key.
      bad_p13n_fn_dict = collections.OrderedDict(baseline_metrics=lambda: 2)
      p13n_eval.build_personalization_eval(model_fn, bad_p13n_fn_dict,
                                           _evaluate_fn)
  def test_success_with_valid_context(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    # Build the p13n eval with an extra `context` argument.
    context_tff_type = computation_types.to_type(tf.int32)
    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn, context_tff_type=context_tff_type)

    # Perform p13n eval on two clients with different `context` values.
    results = federated_p13n_eval(zero_model_weights, [
        _create_client_input(train_scale=1.0, test_scale=1.0, context=2),
        _create_client_input(train_scale=1.0, test_scale=2.0, context=5)
    ])

    bs1_metrics = results['batch_size_1']
    bs2_metrics = results['batch_size_2']

    # Number of training examples is `3 + context` for both clients.
    # Note: the order is not preserved due to `federated_sample`, but the order
    # should be consistent across different personalization strategies.
    self.assertAllEqual(sorted(bs1_metrics['num_examples']), [5, 8])
    self.assertAllEqual(bs1_metrics['num_examples'],
                        bs2_metrics['num_examples'])
Пример #7
0
    def test_success_with_directly_constructed_model(self):
        def model_fn():
            return model_examples.LinearRegression(feature_dim=2)

        zero_model_weights = _create_zero_model_weights(model_fn)
        p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

        federated_p13n_eval = p13n_eval.build_personalization_eval(
            model_fn, p13n_fn_dict, _evaluate_fn)

        # Perform p13n eval on two clients: their train data are equivalent, but the
        # test data have different scales.
        results = federated_p13n_eval(zero_model_weights, [
            _create_client_input(train_scale=1.0, test_scale=1.0),
            _create_client_input(train_scale=1.0, test_scale=2.0)
        ])

        # Check if the baseline metrics are correct.
        baseline_metrics = results['baseline_metrics']
        # Number of test examples is 3 for both clients.
        self.assertAllEqual(baseline_metrics['num_examples'], [3, 3])
        # Number of test batches is 3 for both clients, because the function that
        # evaluates the baseline metrics `_evaluate_fn` uses a default batch size 1.
        self.assertAllEqual(sorted(baseline_metrics['num_batches']), [3, 3])
        # The initial weights are all zeros. The average loss can be computed as:
        # Client 1, 0.5*(1 + 1 + 1)/3 = 0.5; Client 2, 0.5*(4 + 4 + 4)/3 = 2.0.
        # Note: the order is not preserved due to `federated_sample`.
        self.assertAllEqual(sorted(baseline_metrics['loss']), [0.5, 2.0])
        if baseline_metrics['loss'][0] == 0.5:
            client_1_idx, client_2_idx = 0, 1
        else:
            client_1_idx, client_2_idx = 1, 0

        # Check if the metrics of `batch_size_1` are correct.
        bs1_metrics = results['batch_size_1']
        # Number of training examples is 3 for both clients.
        self.assertAllEqual(bs1_metrics['num_examples'], [3, 3])
        bs1_test_outputs = bs1_metrics['test_outputs']
        # Number of test examples is also 3 for both clients.
        self.assertAllEqual(bs1_test_outputs['num_examples'], [3, 3])
        # Number of test batches is 1 for both clients since test batch size is 3.
        self.assertAllEqual(bs1_test_outputs['num_batches'], [1, 1])
        # Both clients's weights become [-3, -3, -1] after training, which gives an
        # average loss 24 for Client 1 and 88.5 for Client 2.
        self.assertAlmostEqual(bs1_test_outputs['loss'][client_1_idx], 24.0)
        self.assertAlmostEqual(bs1_test_outputs['loss'][client_2_idx], 88.5)

        # Check if the metrics of `batch_size_2` are correct.
        bs2_metrics = results['batch_size_2']
        # Number of training examples is 3 for both clients.
        self.assertAllEqual(bs2_metrics['num_examples'], [3, 3])
        bs2_test_outputs = bs2_metrics['test_outputs']
        # Number of test examples is also 3 for both clients.
        self.assertAllEqual(bs2_test_outputs['num_examples'], [3, 3])
        # Number of test batches is 1 for both clients since test batch size is 3.
        self.assertAllEqual(bs2_test_outputs['num_batches'], [1, 1])
        # Both clients' weights become [0, 0, 1] after training, which gives an
        # average loss 0 for Client 1 and 0.5 for Client 2.
        self.assertAlmostEqual(bs2_test_outputs['loss'][client_1_idx], 0.0)
        self.assertAlmostEqual(bs2_test_outputs['loss'][client_2_idx], 0.5)
  def test_failure_with_invalid_sample_size(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    with self.assertRaises(TypeError):
      # `max_num_clients` should be an `int`.
      bad_num_clients = 1.0
      p13n_eval.build_personalization_eval(
          model_fn, p13n_fn_dict, _evaluate_fn, max_num_clients=bad_num_clients)

    with self.assertRaises(ValueError):
      # `max_num_clients` should be a positive `int`.
      bad_num_clients = 0
      p13n_eval.build_personalization_eval(
          model_fn, p13n_fn_dict, _evaluate_fn, max_num_clients=bad_num_clients)
Пример #9
0
  def test_success_with_directly_constructed_model(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn)

    # Perform p13n eval on two clients with different batch sizes.
    results = federated_p13n_eval(
        zero_model_weights,
        [_create_client_input(1, 1),
         _create_client_input(2, 3)])
    results = results._asdict(recursive=True)

    # Check if the baseline metrics are correct.
    baseline_metrics = results['baseline_metrics']
    # Average loss is 0.5 * (1 + 1 + 1)/3 = 0.5.
    self.assertAllEqual(baseline_metrics['loss'], [0.5, 0.5])
    # Number of test examples is 3 for both clients.
    self.assertAllEqual(baseline_metrics['num_examples'], [3, 3])
    # Number of test batches is 3 and 1.
    # Note: the order is not preserved due to `federated_sample`.
    self.assertAllEqual(sorted(baseline_metrics['num_batches']), [1, 3])
    if baseline_metrics['num_batches'][0] == 3:
      client_1_idx, client_2_idx = 0, 1
    else:
      client_1_idx, client_2_idx = 1, 0

    # Check if the metrics of `sgd_opt` are correct.
    sgd_metrics = results['sgd_opt']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(sgd_metrics['num_examples'], [3, 3])
    sgd_test_outputs = sgd_metrics['test_outputs']
    # Number of test examples is also 3 for both clients.
    self.assertAllEqual(sgd_test_outputs['num_examples'], [3, 3])
    # Client 1's weights become [-3, -3, -1], which gives average loss 24.
    # Client 2's weights become [0, 0, 1], which gives average loss 0.
    self.assertAlmostEqual(sgd_test_outputs['loss'][client_1_idx], 24.0)
    self.assertAlmostEqual(sgd_test_outputs['loss'][client_2_idx], 0.0)
    # Number of test batches should have the same order as baseline metrics.
    self.assertAllEqual(sgd_test_outputs['num_batches'],
                        baseline_metrics['num_batches'])

    # Check if the metrics of `adam_opt` are correct.
    adam_metrics = results['adam_opt']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(adam_metrics['num_examples'], [3, 3])
    adam_test_outputs = adam_metrics['test_outputs']
    # Number of test examples is also 3 for both clients.
    self.assertAllEqual(adam_test_outputs['num_examples'], [3, 3])
    # Number of test batches should have the same order as baseline metrics.
    self.assertAllEqual(adam_test_outputs['num_batches'],
                        baseline_metrics['num_batches'])
Пример #10
0
  def test_success_with_model_constructed_from_keras(self):

    def model_fn():
      inputs = tf.keras.Input(shape=(2,))  # feature dim = 2
      outputs = tf.keras.layers.Dense(1)(inputs)
      keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
      input_spec = collections.OrderedDict([
          ('x', tf.TensorSpec([None, 2], dtype=tf.float32)),
          ('y', tf.TensorSpec([None, 1], dtype=tf.float32))
      ])
      return keras_utils.from_keras_model(
          keras_model,
          input_spec=input_spec,
          loss=tf.keras.losses.MeanSquaredError())

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=0.5)

    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn)

    # Perform p13n eval on two clients: their train data are equivalent, but the
    # test data have different scales.
    results = federated_p13n_eval(zero_model_weights, [
        _create_client_input(train_scale=1.0, test_scale=1.0),
        _create_client_input(train_scale=1.0, test_scale=2.0)
    ])
    results = results._asdict(recursive=True)

    # Check if the baseline metrics are correct.
    baseline_metrics = results['baseline_metrics']
    # The initial weights are all zeros. The MeanSquredError(MSE) is:
    # Client 1, (1 + 1 + 1)/3 = 1.0; Client 2, (4 + 4 + 4)/3 = 4.0.
    # Note: the order is not preserved due to `federated_sample`.
    self.assertAllEqual(sorted(baseline_metrics['loss']), [1.0, 4.0])

    # Check if the metrics of `batch_size_1` are correct.
    bs1_metrics = results['batch_size_1']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(bs1_metrics['num_examples'], [3, 3])
    bs1_test_outputs = bs1_metrics['test_outputs']
    # Both clients' weights become [-3, -3, -1] after training, which gives MSE
    # 48 for Client 1 and 177 for Client 2.
    self.assertAlmostEqual(sorted(bs1_test_outputs['loss']), [48.0, 177.0])

    # Check if the metrics of `batch_size_2` are correct.
    bs2_metrics = results['batch_size_2']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(bs2_metrics['num_examples'], [3, 3])
    bs2_test_outputs = bs2_metrics['test_outputs']
    # Both clients' weights become [0, 0, 1] after training, which gives MSE 0
    # for Client 1 and 1.0 for Client 2.
    self.assertAlmostEqual(sorted(bs2_test_outputs['loss']), [0.0, 1.0])
Пример #11
0
    def test_failure_with_batched_datasets(self):
        def model_fn():
            return model_examples.LinearRegression(feature_dim=2)

        zero_model_weights = _create_zero_model_weights(model_fn)
        p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

        federated_p13n_eval = p13n_eval.build_personalization_eval(
            model_fn, p13n_fn_dict, _evaluate_fn)

        with self.assertRaises(TypeError):
            # client_input should not have batched datasets.
            bad_client_input = collections.OrderedDict(
                train_data=_create_dataset(scale=1.0).batch(1),
                test_data=_create_dataset(scale=1.0).batch(1))
            federated_p13n_eval(zero_model_weights, [bad_client_input])
Пример #12
0
  def test_success_with_model_constructed_from_keras(self):

    def model_fn():
      inputs = tf.keras.Input(shape=(2,))  # feature dim = 2
      outputs = tf.keras.layers.Dense(1)(inputs)
      keras_model = tf.keras.Model(inputs=inputs, outputs=outputs)
      dummy_batch = collections.OrderedDict([
          ('x', np.zeros([1, 2], dtype=np.float32)),
          ('y', np.zeros([1, 1], dtype=np.float32))
      ])
      return keras_utils.from_keras_model(keras_model, dummy_batch,
                                          tf.keras.losses.MeanSquaredError())

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=0.5)

    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn)

    # Perform p13n eval on two clients with different batch sizes.
    results = federated_p13n_eval(
        zero_model_weights,
        [_create_client_input(1, 1),
         _create_client_input(2, 3)])
    results = results._asdict(recursive=True)

    # Check if the baseline metrics are correct.
    baseline_metrics = results['baseline_metrics']
    # MeanSquredError(MSE) is (1 + 1 + 1)/3 = 1.0.
    self.assertAllEqual(baseline_metrics['loss'], [1.0, 1.0])

    # Check if the metrics of `sgd_opt` are correct.
    sgd_metrics = results['sgd_opt']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(sgd_metrics['num_examples'], [3, 3])
    sgd_test_outputs = sgd_metrics['test_outputs']
    # Client 1's weights become [-3, -3, -1], which gives MSE 48.
    # Client 2's weights become [0, 0, 1], which gives MSE 0.
    self.assertAlmostEqual(sorted(sgd_test_outputs['loss']), [0.0, 48.0])

    # Check if the metrics of `adam_opt` are correct.
    adam_metrics = results['adam_opt']
    # Number of training examples is 3 for both clients.
    self.assertAllEqual(adam_metrics['num_examples'], [3, 3])
  def test_success_with_small_sample_size(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn, max_num_clients=1)

    # Perform p13n eval on two clients.
    results = federated_p13n_eval(zero_model_weights, [
        _create_client_input(train_scale=1.0, test_scale=1.0),
        _create_client_input(train_scale=1.0, test_scale=2.0)
    ])
    # The results should only contain metrics from one client.
    self.assertAllEqual(len(results['baseline_metrics']['loss']), 1)
    self.assertAllEqual(len(results['batch_size_1']['test_outputs']['loss']), 1)
    self.assertAllEqual(len(results['batch_size_2']['test_outputs']['loss']), 1)
Пример #14
0
  def test_success_with_small_sample_size(self):

    def model_fn():
      return model_examples.LinearRegression(feature_dim=2)

    zero_model_weights = _create_zero_model_weights(model_fn)
    p13n_fn_dict = _create_p13n_fn_dict(learning_rate=1.0)

    federated_p13n_eval = p13n_eval.build_personalization_eval(
        model_fn, p13n_fn_dict, _evaluate_fn, max_num_samples=1)

    # Perform p13n eval on two clients with different batch sizes.
    results = federated_p13n_eval(
        zero_model_weights,
        [_create_client_input(1, 1),
         _create_client_input(2, 3)])
    results = results._asdict(recursive=True)

    # The results should only contain metrics from one client.
    self.assertAllEqual(len(results['baseline_metrics']['loss']), 1)
    self.assertAllEqual(len(results['sgd_opt']['test_outputs']['loss']), 1)
    self.assertAllEqual(len(results['adam_opt']['test_outputs']['loss']), 1)