def test_non_supported_task_raises(self): preprocess_spec = client_spec.ClientSpec(num_epochs=1, batch_size=1) with self.assertRaisesRegex( ValueError, 'emnist_task must be one of "character_recognition" or "autoencoder".' ): emnist_preprocessing.create_preprocess_fn(preprocess_spec, emnist_task='bad_task')
def test_non_supported_task_raises(self): with self.assertRaisesRegex( ValueError, 'emnist_task must be one of "digit_recognition" or "autoencoder".' ): emnist_preprocessing.create_preprocess_fn(num_epochs=1, batch_size=1, shuffle_buffer_size=1, emnist_task='bad_task')
def create_character_recognition_task_from_datasets( train_client_spec: client_spec.ClientSpec, eval_client_spec: Optional[client_spec.ClientSpec], model_id: Union[str, CharacterRecognitionModel], only_digits: bool, train_data: client_data.ClientData, test_data: client_data.ClientData) -> baseline_task.BaselineTask: """Creates a baseline task for character recognition on EMNIST. Args: train_client_spec: A `tff.simulation.baselines.ClientSpec` specifying how to preprocess train client data. eval_client_spec: An optional `tff.simulation.baselines.ClientSpec` specifying how to preprocess evaluation client data. If set to `None`, the evaluation datasets will use a batch size of 64 with no extra preprocessing. model_id: A string identifier for a character recognition model. Must be one of 'cnn_dropout', 'cnn', or '2nn'. These correspond respectively to a CNN model with dropout, a CNN model with no dropout, and a densely connected network with two hidden layers of width 200. only_digits: A boolean indicating whether to use the full EMNIST-62 dataset containing 62 alphanumeric classes (`True`) or the smaller EMNIST-10 dataset with only 10 numeric classes (`False`). train_data: A `tff.simulation.datasets.ClientData` used for training. test_data: A `tff.simulation.datasets.ClientData` used for testing. Returns: A `tff.simulation.baselines.BaselineTask`. """ emnist_task = 'character_recognition' if eval_client_spec is None: eval_client_spec = client_spec.ClientSpec( num_epochs=1, batch_size=64, shuffle_buffer_size=1) train_preprocess_fn = emnist_preprocessing.create_preprocess_fn( train_client_spec, emnist_task=emnist_task) eval_preprocess_fn = emnist_preprocessing.create_preprocess_fn( eval_client_spec, emnist_task=emnist_task) task_datasets = task_data.BaselineTaskDatasets( train_data=train_data, test_data=test_data, validation_data=None, train_preprocess_fn=train_preprocess_fn, eval_preprocess_fn=eval_preprocess_fn) def model_fn() -> model.Model: return keras_utils.from_keras_model( keras_model=_get_character_recognition_model(model_id, only_digits), loss=tf.keras.losses.SparseCategoricalCrossentropy(), input_spec=task_datasets.element_type_structure, metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]) return baseline_task.BaselineTask(task_datasets, model_fn)
def create_autoencoder_task_from_datasets( train_client_spec: client_spec.ClientSpec, eval_client_spec: Optional[client_spec.ClientSpec], train_data: client_data.ClientData, test_data: client_data.ClientData) -> baseline_task.BaselineTask: """Creates a baseline task for autoencoding on EMNIST. Args: train_client_spec: A `tff.simulation.baselines.ClientSpec` specifying how to preprocess train client data. eval_client_spec: An optional `tff.simulation.baselines.ClientSpec` specifying how to preprocess evaluation client data. If set to `None`, the evaluation datasets will use a batch size of 64 with no extra preprocessing. train_data: A `tff.simulation.datasets.ClientData` used for training. test_data: A `tff.simulation.datasets.ClientData` used for testing. Returns: A `tff.simulation.baselines.BaselineTask`. """ emnist_task = 'autoencoder' if eval_client_spec is None: eval_client_spec = client_spec.ClientSpec(num_epochs=1, batch_size=64, shuffle_buffer_size=1) train_preprocess_fn = emnist_preprocessing.create_preprocess_fn( train_client_spec, emnist_task=emnist_task) eval_preprocess_fn = emnist_preprocessing.create_preprocess_fn( eval_client_spec, emnist_task=emnist_task) task_datasets = task_data.BaselineTaskDatasets( train_data=train_data, test_data=test_data, validation_data=None, train_preprocess_fn=train_preprocess_fn, eval_preprocess_fn=eval_preprocess_fn) def model_fn() -> model.Model: return keras_utils.from_keras_model( keras_model=emnist_models.create_autoencoder_model(), loss=tf.keras.losses.MeanSquaredError(), input_spec=task_datasets.element_type_structure, metrics=[ tf.keras.metrics.MeanSquaredError(), tf.keras.metrics.MeanAbsoluteError() ]) return baseline_task.BaselineTask(task_datasets, model_fn)
def test_ds_length_with_max_elements(self, max_elements): repeat_size = 10 ds = tf.data.Dataset.from_tensor_slices(TEST_DATA).repeat(repeat_size) preprocess_spec = client_spec.ClientSpec(num_epochs=1, batch_size=1, max_elements=max_elements) preprocess_fn = emnist_preprocessing.create_preprocess_fn( preprocess_spec) preprocessed_ds = preprocess_fn(ds) self.assertEqual(_compute_length_of_dataset(preprocessed_ds), min(repeat_size, max_elements))
def test_ds_length_is_ceil_num_epochs_over_batch_size( self, num_epochs, batch_size): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_spec = client_spec.ClientSpec(num_epochs=num_epochs, batch_size=batch_size) preprocess_fn = emnist_preprocessing.create_preprocess_fn( preprocess_spec) preprocessed_ds = preprocess_fn(ds) self.assertEqual( _compute_length_of_dataset(preprocessed_ds), tf.cast(tf.math.ceil(num_epochs / batch_size), tf.int32))
def test_autoencoder_preprocess_returns_correct_elements(self): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_spec = client_spec.ClientSpec(num_epochs=1, batch_size=20, shuffle_buffer_size=1) preprocess_fn = emnist_preprocessing.create_preprocess_fn( preprocess_spec, emnist_task='autoencoder') preprocessed_ds = preprocess_fn(ds) self.assertEqual(preprocessed_ds.element_spec, (tf.TensorSpec(shape=(None, 784), dtype=tf.float32), tf.TensorSpec(shape=(None, 784), dtype=tf.float32))) element = next(iter(preprocessed_ds)) expected_element = (tf.ones(shape=(1, 784), dtype=tf.float32), tf.ones(shape=(1, 784), dtype=tf.float32)) self.assertAllClose(self.evaluate(element), expected_element)
def test_digit_recognition_preprocess_returns_correct_elements(self): ds = tf.data.Dataset.from_tensor_slices(TEST_DATA) preprocess_fn = emnist_preprocessing.create_preprocess_fn( num_epochs=1, batch_size=20, shuffle_buffer_size=1, emnist_task='digit_recognition') preprocessed_ds = preprocess_fn(ds) self.assertEqual( preprocessed_ds.element_spec, (tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32), tf.TensorSpec(shape=(None, ), dtype=tf.int32))) element = next(iter(preprocessed_ds)) expected_element = (tf.zeros(shape=(1, 28, 28, 1), dtype=tf.float32), tf.zeros(shape=(1, ), dtype=tf.int32)) self.assertAllClose(self.evaluate(element), expected_element)
def test_preprocess_fn_with_negative_epochs_raises(self): with self.assertRaisesRegex(ValueError, 'num_epochs must be a positive integer'): emnist_preprocessing.create_preprocess_fn(num_epochs=-2, batch_size=1, shuffle_buffer_size=1)