def test_preprocess_element_spec(self): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=1, batch_size=20, shuffle_buffer_size=1, emnist_task='autoencoder') preprocessed_ds = preprocess_fn(ds) self.assertEqual(preprocessed_ds.element_spec, (tf.TensorSpec(shape=(None, 784), dtype=tf.float32), tf.TensorSpec(shape=(None, 784), dtype=tf.float32)))
def test_preprocess_returns_correct_element(self): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=1, batch_size=20, shuffle_buffer_size=1, emnist_task='digit_recognition') preprocessed_ds = preprocess_fn(ds) element = next(iter(preprocessed_ds)) expected_element = (tf.zeros(shape=(1, 28, 28, 1), dtype=tf.float32), tf.zeros(shape=(1,), dtype=tf.int32)) self.assertAllClose(self.evaluate(element), expected_element)
def test_preprocess_returns_correct_element(self): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=1, batch_size=20, shuffle_buffer_size=1, emnist_task='autoencoder') preprocessed_ds = preprocess_fn(ds) self.assertEqual(preprocessed_ds.element_spec, (tf.TensorSpec(shape=(None, 784), dtype=tf.float32), tf.TensorSpec(shape=(None, 784), dtype=tf.float32))) element = next(iter(preprocessed_ds)) expected_element = (tf.ones(shape=(1, 784), dtype=tf.float32), tf.ones(shape=(1, 784), dtype=tf.float32)) self.assertAllClose(self.evaluate(element), expected_element)
def configure_training( task_spec: training_specs.TaskSpec) -> training_specs.RunnerSpec: """Configures training for the EMNIST autoencoder task. This method will load and pre-process datasets and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process compatible with `federated_research.utils.training_loop`. Args: task_spec: A `TaskSpec` class for creating federated training tasks. Returns: A `RunnerSpec` containing attributes used for running the newly created federated task. """ emnist_task = 'autoencoder' emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False) _, emnist_test = emnist_dataset.get_centralized_datasets( only_digits=False, emnist_task=emnist_task) train_preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=task_spec.client_epochs_per_round, batch_size=task_spec.client_batch_size, emnist_task=emnist_task) input_spec = train_preprocess_fn.type_signature.result.element model_builder = emnist_ae_models.create_autoencoder_model loss_builder = functools.partial( tf.keras.losses.MeanSquaredError, reduction=tf.keras.losses.Reduction.SUM) metrics_builder = lambda: [tf.keras.metrics.MeanSquaredError()] def tff_model_fn() -> tff.learning.Model: return tff.learning.from_keras_model( keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) iterative_process = task_spec.iterative_process_builder(tff_model_fn) if hasattr(emnist_train, 'dataset_computation'): @tff.tf_computation(tf.string) def build_train_dataset_from_client_id(client_id): client_dataset = emnist_train.dataset_computation(client_id) return train_preprocess_fn(client_dataset) training_process = tff.simulation.compose_dataset_computation_with_iterative_process( build_train_dataset_from_client_id, iterative_process) client_ids_fn = training_utils.build_sample_fn( emnist_train.client_ids, size=task_spec.clients_per_round, replace=False, random_seed=task_spec.sampling_random_seed) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_sampling_fn = lambda x: list(client_ids_fn(x)) else: training_process = tff.simulation.compose_dataset_computation_with_iterative_process( train_preprocess_fn, iterative_process) client_sampling_fn = training_utils.build_client_datasets_fn( dataset=emnist_train, clients_per_round=task_spec.clients_per_round, random_seed=task_spec.sampling_random_seed) training_process.get_model_weights = iterative_process.get_model_weights test_fn = training_utils.build_centralized_evaluate_fn( eval_dataset=emnist_test, model_builder=model_builder, loss_builder=loss_builder, metrics_builder=metrics_builder) validation_fn = lambda model_weights, round_num: test_fn(model_weights) return training_specs.RunnerSpec( iterative_process=training_process, client_datasets_fn=client_sampling_fn, validation_fn=validation_fn, test_fn=test_fn)
def configure_training(task_spec: training_specs.TaskSpec, model: str = 'cnn') -> training_specs.RunnerSpec: """Configures training for the EMNIST character recognition task. This method will load and pre-process datasets and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process compatible with `federated_research.utils.training_loop`. Args: task_spec: A `TaskSpec` class for creating federated training tasks. model: A string specifying the model used for character recognition. Can be one of `cnn` and `2nn`, corresponding to a CNN model and a densely connected 2-layer model (respectively). Returns: A `RunnerSpec` containing attributes used for running the newly created federated task. """ emnist_task = 'digit_recognition' emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False) _, emnist_test = emnist_dataset.get_centralized_datasets( only_digits=False, emnist_task=emnist_task) train_preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=task_spec.client_epochs_per_round, batch_size=task_spec.client_batch_size, emnist_task=emnist_task) input_spec = train_preprocess_fn.type_signature.result.element if model == 'cnn': model_builder = functools.partial( emnist_models.create_conv_dropout_model, only_digits=False) elif model == '2nn': model_builder = functools.partial( emnist_models.create_two_hidden_layer_model, only_digits=False) else: raise ValueError( 'Cannot handle model flag [{!s}], must be one of {!s}.'.format( model, EMNIST_MODELS)) loss_builder = tf.keras.losses.SparseCategoricalCrossentropy metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()] def tff_model_fn() -> tff.learning.Model: return tff.learning.from_keras_model( keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) iterative_process = task_spec.iterative_process_builder(tff_model_fn) @tff.tf_computation(tf.string) def build_train_dataset_from_client_id(client_id): client_dataset = emnist_train.dataset_computation(client_id) return train_preprocess_fn(client_dataset) training_process = tff.simulation.compose_dataset_computation_with_iterative_process( build_train_dataset_from_client_id, iterative_process) client_ids_fn = tff.simulation.build_uniform_sampling_fn( emnist_train.client_ids, size=task_spec.clients_per_round, replace=False, random_seed=task_spec.client_datasets_random_seed) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_sampling_fn = lambda x: list(client_ids_fn(x)) training_process.get_model_weights = iterative_process.get_model_weights evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn) def test_fn(state): return evaluate_fn( iterative_process.get_model_weights(state), [emnist_test]) def validation_fn(state, round_num): del round_num return evaluate_fn( iterative_process.get_model_weights(state), [emnist_test]) return training_specs.RunnerSpec( iterative_process=training_process, client_datasets_fn=client_sampling_fn, validation_fn=validation_fn, test_fn=test_fn)
def main(argv): if len(argv) > 1: raise app.UsageError('Expected no command-line arguments, ' 'got: {}'.format(argv)) emnist_task = 'digit_recognition' emnist_train, _ = tff.simulation.datasets.emnist.load_data(only_digits=False) _, emnist_test = emnist_dataset.get_centralized_datasets( only_digits=False, emnist_task=emnist_task) train_preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=FLAGS.client_epochs_per_round, batch_size=FLAGS.client_batch_size, emnist_task=emnist_task) input_spec = train_preprocess_fn.type_signature.result.element if FLAGS.model == 'cnn': model_builder = functools.partial( emnist_models.create_conv_dropout_model, only_digits=FLAGS.only_digits) elif FLAGS.model == '2nn': model_builder = functools.partial( emnist_models.create_two_hidden_layer_model, only_digits=FLAGS.only_digits) elif FLAGS.model == '1m_cnn': model_builder = functools.partial( create_1m_cnn_model, only_digits=FLAGS.only_digits) else: raise ValueError('Cannot handle model flag [{!s}].'.format(FLAGS.model)) logging.info('Training model:') logging.info(model_builder().summary()) loss_builder = tf.keras.losses.SparseCategoricalCrossentropy metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()] compression_dict = utils_impl.lookup_flag_values(compression_flags) dp_dict = utils_impl.lookup_flag_values(dp_flags) # Most logic for deciding what baseline to run is here. aggregation_factory = fl_utils.build_aggregator( compression_flags=compression_dict, dp_flags=dp_dict, num_clients=len(emnist_train.client_ids), num_clients_per_round=FLAGS.clients_per_round, num_rounds=FLAGS.total_rounds, client_template=model_builder().trainable_variables) def tff_model_fn(): return tff.learning.from_keras_model( keras_model=model_builder(), loss=loss_builder(), input_spec=input_spec, metrics=metrics_builder()) server_optimizer_fn = lambda: utils_impl.create_optimizer_from_flags('server') client_optimizer_fn = lambda: utils_impl.create_optimizer_from_flags('client') iterative_process = tff.learning.build_federated_averaging_process( model_fn=tff_model_fn, server_optimizer_fn=server_optimizer_fn, client_weighting=tff.learning.ClientWeighting.UNIFORM, client_optimizer_fn=client_optimizer_fn, model_update_aggregation_factory=aggregation_factory) @tff.tf_computation(tf.string) def build_train_dataset_from_client_id(client_id): client_dataset = emnist_train.dataset_computation(client_id) return train_preprocess_fn(client_dataset) training_process = tff.simulation.compose_dataset_computation_with_iterative_process( build_train_dataset_from_client_id, iterative_process) training_process.get_model_weights = iterative_process.get_model_weights client_ids_fn = functools.partial( tff.simulation.build_uniform_sampling_fn( emnist_train.client_ids, replace=False, random_seed=FLAGS.client_datasets_random_seed), size=FLAGS.clients_per_round) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_sampling_fn = lambda x: list(client_ids_fn(x)) evaluate_fn = tff.learning.build_federated_evaluation(tff_model_fn) def test_fn(state): return evaluate_fn( iterative_process.get_model_weights(state), [emnist_test]) def validation_fn(state, round_num): del round_num return evaluate_fn( iterative_process.get_model_weights(state), [emnist_test]) training_loop.run( iterative_process=training_process, client_datasets_fn=client_sampling_fn, validation_fn=validation_fn, test_fn=test_fn, total_rounds=FLAGS.total_rounds, experiment_name=FLAGS.experiment_name, root_output_dir=FLAGS.root_output_dir, rounds_per_eval=FLAGS.rounds_per_eval, rounds_per_checkpoint=FLAGS.rounds_per_checkpoint)
def configure_training(task_spec: training_specs.TaskSpec, eval_spec: Optional[training_specs.EvalSpec] = None, model: str = 'cnn') -> training_specs.RunnerSpec: """Configures training for the EMNIST character recognition task. This method will load and pre-process datasets and construct a model used for the task. It then uses `iterative_process_builder` to create an iterative process compatible with `federated_research.utils.training_loop`. Args: task_spec: A `TaskSpec` class for creating federated training tasks. eval_spec: An `EvalSpec` class for configuring federated evaluation. If set to None, centralized evaluation is used for validation and testing instead. model: A string specifying the model used for character recognition. Can be one of `cnn` and `2nn`, corresponding to a CNN model and a densely connected 2-layer model (respectively). Returns: A `RunnerSpec` containing attributes used for running the newly created federated task. """ emnist_task = 'digit_recognition' emnist_train, emnist_test = tff.simulation.datasets.emnist.load_data( only_digits=False) train_preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=task_spec.client_epochs_per_round, batch_size=task_spec.client_batch_size, emnist_task=emnist_task) input_spec = train_preprocess_fn.type_signature.result.element if model == 'cnn': model_builder = functools.partial( emnist_models.create_conv_dropout_model, only_digits=False) elif model == '2nn': model_builder = functools.partial( emnist_models.create_two_hidden_layer_model, only_digits=False) else: raise ValueError( 'Cannot handle model flag [{!s}], must be one of {!s}.'.format( model, EMNIST_MODELS)) loss_builder = tf.keras.losses.SparseCategoricalCrossentropy metrics_builder = lambda: [tf.keras.metrics.SparseCategoricalAccuracy()] def tff_model_fn() -> tff.learning.Model: return tff.learning.from_keras_model( keras_model=model_builder(), input_spec=input_spec, loss=loss_builder(), metrics=metrics_builder()) iterative_process = task_spec.iterative_process_builder(tff_model_fn) clients_per_train_round = min(task_spec.clients_per_round, TOTAL_NUM_TRAIN_CLIENTS) if hasattr(emnist_train, 'dataset_computation'): @tff.tf_computation(tf.string) def build_train_dataset_from_client_id(client_id): client_dataset = emnist_train.dataset_computation(client_id) return train_preprocess_fn(client_dataset) training_process = tff.simulation.compose_dataset_computation_with_iterative_process( build_train_dataset_from_client_id, iterative_process) client_ids_fn = training_utils.build_sample_fn( emnist_train.client_ids, size=clients_per_train_round, replace=False, random_seed=task_spec.sampling_random_seed) # We convert the output to a list (instead of an np.ndarray) so that it can # be used as input to the iterative process. client_sampling_fn = lambda x: list(client_ids_fn(x)) else: training_process = tff.simulation.compose_dataset_computation_with_iterative_process( train_preprocess_fn, iterative_process) client_sampling_fn = training_utils.build_client_datasets_fn( dataset=emnist_train, clients_per_round=clients_per_train_round, random_seed=task_spec.sampling_random_seed) training_process.get_model_weights = iterative_process.get_model_weights if eval_spec: if eval_spec.clients_per_validation_round is None: clients_per_validation_round = TOTAL_NUM_TEST_CLIENTS else: clients_per_validation_round = min(eval_spec.clients_per_validation_round, TOTAL_NUM_TEST_CLIENTS) if eval_spec.clients_per_test_round is None: clients_per_test_round = TOTAL_NUM_TEST_CLIENTS else: clients_per_test_round = min(eval_spec.clients_per_test_round, TOTAL_NUM_TEST_CLIENTS) test_preprocess_fn = emnist_dataset.create_preprocess_fn( num_epochs=1, batch_size=eval_spec.client_batch_size, shuffle_buffer_size=1, emnist_task=emnist_task) emnist_test = emnist_test.preprocess(test_preprocess_fn) def eval_metrics_builder(): return [ tf.keras.metrics.SparseCategoricalCrossentropy(), tf.keras.metrics.SparseCategoricalAccuracy() ] federated_eval_fn = training_utils.build_federated_evaluate_fn( model_builder=model_builder, metrics_builder=eval_metrics_builder) validation_client_sampling_fn = training_utils.build_client_datasets_fn( emnist_test, clients_per_validation_round, random_seed=eval_spec.sampling_random_seed) test_client_sampling_fn = training_utils.build_client_datasets_fn( emnist_test, clients_per_test_round, random_seed=eval_spec.sampling_random_seed) def validation_fn(model_weights, round_num): validation_clients = validation_client_sampling_fn(round_num) return federated_eval_fn(model_weights, validation_clients) def test_fn(model_weights): # We fix the round number to get deterministic behavior test_round_num = 0 test_clients = test_client_sampling_fn(test_round_num) return federated_eval_fn(model_weights, test_clients) else: _, central_emnist_test = emnist_dataset.get_centralized_datasets( only_digits=False, emnist_task=emnist_task) test_fn = training_utils.build_centralized_evaluate_fn( eval_dataset=central_emnist_test, model_builder=model_builder, loss_builder=loss_builder, metrics_builder=metrics_builder) validation_fn = lambda model_weights, round_num: test_fn(model_weights) return training_specs.RunnerSpec( iterative_process=training_process, client_datasets_fn=client_sampling_fn, validation_fn=validation_fn, test_fn=test_fn)