def test_metric_correctness(self, distribution): with self.cached_session(): keras.backend.set_image_data_format('channels_last') num_samples = 10000 x_train = np.random.randint(0, 2, num_samples) x_train = np.reshape(x_train, (num_samples, 1)) y_train = x_train x_train = x_train.astype('float32') y_train = y_train.astype('float32') # Create identity model. model = keras.Sequential() model.add( keras.layers.Dense(1, input_shape=(1,), kernel_initializer='ones')) model.compile( loss=keras.losses.mean_squared_error, optimizer=gradient_descent.GradientDescentOptimizer(0.5), metrics=[keras.metrics.BinaryAccuracy()], distribute=distribution) batch_size = 64 if not distributed_training_utils.global_batch_size_supported( distribution): batch_size //= distribution.num_replicas_in_sync train_dataset = dataset_ops.Dataset.from_tensor_slices((x_train, y_train)) train_dataset = batch_wrapper(train_dataset, batch_size, distribution) history = model.fit(x=train_dataset, epochs=2, steps_per_epoch=10) self.assertEqual(history.history['binary_accuracy'], [1.0, 1.0])
def test_calculating_input_params_no_steps_with_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): input_64_samples = np.zeros((64, 3), dtype=np.float32) # Computed steps is correct for specified batch size steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=16) self.assertEqual(batch_size, 16) self.assertEqual(steps, 4 // replica_scale_factor) # Computed steps is correct for specified batch size steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=32) self.assertEqual(batch_size, 32) self.assertEqual(steps, 2 // replica_scale_factor) # Number of samples is not divisible by the global batch size with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=20) # Number of samples is not divisible by the global batch size with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=3)
def test_calculating_input_params_no_steps_no_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): # Input samples of different sizes input_20_samples = np.zeros((20, 3), dtype=np.float32) input_63_samples = np.zeros((63, 3), dtype=np.float32) input_64_samples = np.zeros((64, 3), dtype=np.float32) # Default global batch size 32 for input with 64 samples run in 2 steps steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=None, batch_size=None) self.assertEqual(batch_size, 32 // replica_scale_factor) self.assertEqual(steps, 2) # Computed global batch size 20 is lower than 32 if we pass less samples. steps, batch_size = distributed_training_utils.get_input_params( distribution, input_20_samples, steps=None, batch_size=None) self.assertEqual(batch_size, 20 // replica_scale_factor) self.assertEqual(steps, 1) # Default global batch size 32 cannot be used with 63 samples. with self.assertRaisesRegexp(ValueError, 'not divisible by batch size'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=None, batch_size=None)
def get_batch_size(global_batch_size, distribution): batch_size = global_batch_size # TODO(b/118776054): Use global batch size for Keras/DS support. use_per_core_batch_size = ( distribution and not distributed_training_utils.global_batch_size_supported(distribution)) if use_per_core_batch_size: batch_size //= distribution.num_replicas_in_sync return batch_size
def test_calculating_input_params_with_steps_no_batch_size(self, distribution): # Calculate the per_replica_batch_size scaling factor for strategies # that use per_core_batch_size replica_scale_factor = 1.0 if not distributed_training_utils.global_batch_size_supported(distribution): replica_scale_factor = distribution.num_replicas_in_sync with self.cached_session(): # Input samples of different sizes input_63_samples = np.zeros((63, 3), dtype=np.float32) input_64_samples = np.zeros((64, 3), dtype=np.float32) # Computed global batch size is correct for number of specified 1 step steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=1, batch_size=None) self.assertEqual(batch_size, 64 // replica_scale_factor) self.assertEqual(steps, 1) # Computed global batch size is correct for number of specified 2 steps steps, batch_size = distributed_training_utils.get_input_params( distribution, input_64_samples, steps=2, batch_size=None) self.assertEqual(batch_size, 32 // replica_scale_factor) self.assertEqual(steps, 2) # All samples can not be consumed in specified number of steps with self.assertRaisesRegexp(ValueError, 'not divisible by steps'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=2, batch_size=None) # This cases is different for different strategies due to the # difference in supported batch size being global or per-replica. if replica_scale_factor == 1: # Computed global batch size is correct even if not sharadable steps, batch_size = distributed_training_utils.get_input_params( distribution, input_63_samples, steps=3, batch_size=None) self.assertEqual(batch_size, 21) self.assertEqual(steps, 3) else: # Computed global batch size can not be sharded across replicas with self.assertRaisesRegexp(ValueError, 'could not be sharded evenly ' 'across the sync replicas'): distributed_training_utils.get_input_params( distribution, input_63_samples, steps=1, batch_size=None)
def get_correctness_test_inputs(use_numpy, use_validation_data, with_distribution, x_train, y_train, x_predict): """Generates the inputs for correctness check when enable Keras with DS.""" training_epochs = 2 global_batch_size = 64 batch_size = global_batch_size # TODO(b/118776054): Use global batch size for Keras/DS support. use_per_core_batch_size = ( with_distribution and not distributed_training_utils.global_batch_size_supported( with_distribution)) if use_per_core_batch_size: batch_size //= with_distribution.num_replicas_in_sync if use_numpy: training_inputs = { 'batch_size': batch_size, 'x': x_train, 'y': y_train, 'epochs': training_epochs, 'shuffle': False, } if use_validation_data: eval_inputs = None training_inputs['validation_data'] = (x_train, y_train) else: eval_inputs = { 'batch_size': batch_size, 'x': x_train, 'y': y_train, } predict_inputs = { 'x': np.array(x_predict, dtype=np.float32), } else: # For dataset inputs, we do not pass batch_size to # keras.fit/evaluate/predict. The batch size is part of the dataset. train_dataset = dataset_ops.Dataset.from_tensor_slices( (x_train, y_train)) x = batch_wrapper( train_dataset, batch_size, with_distribution, repeat=training_epochs) training_inputs = { 'batch_size': None, 'x': x, 'y': None, 'epochs': training_epochs, 'shuffle': False, 'steps_per_epoch': len(x_train) // global_batch_size, } if use_validation_data: eval_inputs = None # Remove the eval_inputs eval_dataset = dataset_ops.Dataset.from_tensor_slices( (x_train, y_train)) x = batch_wrapper(eval_dataset, batch_size, with_distribution) training_inputs['validation_data'] = x training_inputs['validation_steps'] = 5 else: eval_inputs = { 'batch_size': None, 'x': x, 'y': None, 'steps': 20, } predict_batch_size = len(x_predict) if use_per_core_batch_size: predict_batch_size //= with_distribution.num_replicas_in_sync predict_dataset = dataset_ops.Dataset.from_tensor_slices(x_predict) predict_dataset = batch_wrapper(predict_dataset, predict_batch_size, with_distribution) predict_inputs = { 'steps': 1, 'x': predict_dataset, } return training_inputs, eval_inputs, predict_inputs