class ConvLSTM1DTest(keras_parameterized.TestCase): @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( data_format=['channels_first', 'channels_last'], return_sequences=[True, False])) def test_conv_lstm(self, data_format, return_sequences): num_row = 3 filters = 3 num_samples = 1 input_channel = 2 input_num_row = 5 sequence_len = 2 if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_channel) # test for return state: x = keras.Input(batch_shape=inputs.shape) kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': num_row, 'padding': 'valid', } layer = keras.layers.ConvLSTM1D(**kwargs) layer.build(inputs.shape) outputs = layer(x) _, states = outputs[0], outputs[1:] self.assertEqual(len(states), 2) model = keras.models.Model(x, states[0]) state = model.predict(inputs) self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) # test for output shape: testing_utils.layer_test(keras.layers.ConvLSTM1D, kwargs={ 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': num_row, 'padding': 'valid' }, input_shape=inputs.shape)
class MergeLayersTest(keras_parameterized.TestCase): def test_merge_add(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) add_layer = keras.layers.Add() o = add_layer([i1, i2, i3]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2, i3], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 + x2 + x3, atol=1e-4) self.assertEqual( add_layer.compute_mask([i1, i2, i3], [None, None, None]), None) self.assertTrue( np.all( K.eval( add_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): add_layer.compute_mask([i1, i2, i3], x1) with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): add_layer.compute_mask(i1, [None, None, None]) with self.assertRaisesRegex(ValueError, ' should have the same length.'): add_layer.compute_mask([i1, i2, i3], [None, None]) def test_merge_subtract(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) subtract_layer = keras.layers.Subtract() o = subtract_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 - x2, atol=1e-4) self.assertEqual(subtract_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( subtract_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): subtract_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): subtract_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegex( ValueError, 'layer should be called on exactly 2 inputs'): subtract_layer([i1, i2, i3]) with self.assertRaisesRegex( ValueError, 'layer should be called on exactly 2 inputs'): subtract_layer([i1]) def test_merge_multiply(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) i3 = keras.layers.Input(shape=(4, 5)) o = keras.layers.multiply([i1, i2, i3]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2, i3], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) x3 = np.random.random((2, 4, 5)) out = model.predict([x1, x2, x3]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, x1 * x2 * x3, atol=1e-4) def test_merge_average(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.average([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, 0.5 * (x1 + x2), atol=1e-4) def test_merge_maximum(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.maximum([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, np.maximum(x1, x2), atol=1e-4) def test_merge_minimum(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) o = keras.layers.minimum([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 4, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 4, 5)) self.assertAllClose(out, np.minimum(x1, x2), atol=1e-4) def test_merge_concatenate(self): i1 = keras.layers.Input(shape=(4, 5)) i2 = keras.layers.Input(shape=(4, 5)) concat_layer = keras.layers.Concatenate(axis=1) o = concat_layer([i1, i2]) self.assertListEqual(o.shape.as_list(), [None, 8, 5]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() x1 = np.random.random((2, 4, 5)) x2 = np.random.random((2, 4, 5)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 8, 5)) self.assertAllClose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) self.assertEqual(concat_layer.compute_mask([i1, i2], [None, None]), None) self.assertTrue( np.all( K.eval( concat_layer.compute_mask( [i1, i2], [K.variable(x1), K.variable(x2)])))) # Should work with unit-length input. unit_length_o = concat_layer([i1]) self.assertListEqual(unit_length_o.shape.as_list(), i1.shape.as_list()) with self.assertRaisesRegex(ValueError, '`mask` should be a list.'): concat_layer.compute_mask([i1, i2], x1) with self.assertRaisesRegex(ValueError, '`inputs` should be a list.'): concat_layer.compute_mask(i1, [None, None]) with self.assertRaisesRegex(ValueError, 'should have the same length'): concat_layer.compute_mask([i1, i2], [None]) with self.assertRaisesRegex( ValueError, 'layer should be called on a list of inputs'): concat_layer(i1) def test_merge_dot(self): i1 = keras.layers.Input(shape=(4, )) i2 = keras.layers.Input(shape=(4, )) o = keras.layers.dot([i1, i2], axes=1) self.assertListEqual(o.shape.as_list(), [None, 1]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() _ = keras.layers.Dot(axes=1).get_config() x1 = np.random.random((2, 4)) x2 = np.random.random((2, 4)) out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 1)) expected = np.zeros((2, 1)) expected[0, 0] = np.dot(x1[0], x2[0]) expected[1, 0] = np.dot(x1[1], x2[1]) self.assertAllClose(out, expected, atol=1e-4) # Test with negative tuple of axes. o = keras.layers.dot([i1, i2], axes=(-1, -1)) self.assertListEqual(o.shape.as_list(), [None, 1]) model = keras.models.Model([i1, i2], o) model.run_eagerly = testing_utils.should_run_eagerly() out = model.predict([x1, x2]) self.assertEqual(out.shape, (2, 1)) self.assertAllClose(out, expected, atol=1e-4) # test compute_output_shape layer = keras.layers.Dot(axes=-1) self.assertEqual(layer.compute_output_shape([(4, 5), (4, 5)]), (4, 1)) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name(layer=[ keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply, keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average, keras.layers.Concatenate ])) def test_merge_with_ragged_input(self, layer): ragged_data = tf.ragged.constant( [[1., 1., 1.], [1., 1.], [1., 1., 1., 1.]], ragged_rank=1) dense_data = ragged_data.to_tensor() input1 = keras.Input(shape=(None, ), ragged=True) input2 = keras.Input(shape=(None, ), ragged=True) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_ragged = model.predict([ragged_data, ragged_data], steps=1) out_ragged = ragged_tensor.convert_to_tensor_or_ragged_tensor( out_ragged).to_tensor() input1 = keras.Input(shape=(None, )) input2 = keras.Input(shape=(None, )) out = keras.layers.Add()([input1, input2]) model = keras.models.Model(inputs=[input1, input2], outputs=out) out_dense = model.predict([dense_data, dense_data], steps=1) self.assertAllEqual(out_dense, out_ragged) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name(layer=[ keras.layers.Add, keras.layers.Subtract, keras.layers.Multiply, keras.layers.Minimum, keras.layers.Maximum, keras.layers.Average ])) def test_merge_with_scalar_input(self, layer): x1 = np.array((1)) x2 = np.array((2)) out = layer()([x1, x2]) self.assertEqual(out.shape, ())
if use_dataset: if action == "predict": input_data = tf.data.Dataset.from_tensor_slices(input_data).batch( batch_size) else: input_data = tf.data.Dataset.from_tensor_slices( (input_data, expected_output)).batch(batch_size) expected_output = None return (input_data, expected_output) @keras_parameterized.run_with_all_model_types @keras_parameterized.run_all_keras_modes @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( use_dict=[True, False], use_dataset=[True, False], action=["predict", "evaluate", "fit"])) class SparseTensorInputTest(keras_parameterized.TestCase): def test_sparse_tensors(self, use_dict, use_dataset, action): data = [(tf.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]], [1, 2, 3], [2, 1, 3]), np.array([[[1, -1, -1]], [[2, 3, -1]]])), (tf.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]], [5, 6, 7, 8], [3, 1, 4]), np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]], [[-1, 8, -1, -1]]]))] # Prepare the model to test. input_name = get_input_name(use_dict) model_input = input_layer.Input(shape=(1, None), sparse=True, name=input_name,
class ConvLSTMTest(keras_parameterized.TestCase): @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( data_format=['channels_first', 'channels_last'], return_sequences=[True, False])) def test_conv_lstm(self, data_format, return_sequences): num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) # test for return state: x = keras.Input(batch_shape=inputs.shape) kwargs = { 'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' } layer = keras.layers.ConvLSTM2D(**kwargs) layer.build(inputs.shape) outputs = layer(x) _, states = outputs[0], outputs[1:] self.assertEqual(len(states), 2) model = keras.models.Model(x, states[0]) state = model.predict(inputs) self.assertAllClose(keras.backend.eval(layer.states[0]), state, atol=1e-4) # test for output shape: testing_utils.layer_test(keras.layers.ConvLSTM2D, kwargs={ 'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid' }, input_shape=inputs.shape) def test_conv_lstm_statefulness(self): # Tests for statefulness num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): model = keras.models.Sequential() kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same' } layer = keras.layers.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different self.assertNotEqual(out1.max(), out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out3.max(), out2.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) self.assertAllClose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) self.assertNotEqual(out4.max(), out5.max()) def test_conv_lstm_regularizers(self): # check regularizers num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) with self.cached_session(): kwargs = { 'data_format': 'channels_last', 'return_sequences': False, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': keras.regularizers.L1L2(l1=0.01), 'recurrent_regularizer': keras.regularizers.L1L2(l1=0.01), 'activity_regularizer': 'l2', 'bias_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same' } layer = keras.layers.ConvLSTM2D(**kwargs) layer.build(inputs.shape) self.assertEqual(len(layer.losses), 3) layer(keras.backend.variable(np.ones(inputs.shape))) self.assertEqual(len(layer.losses), 4) def test_conv_lstm_dropout(self): # check dropout with self.cached_session(): testing_utils.layer_test(keras.layers.ConvLSTM2D, kwargs={ 'data_format': 'channels_last', 'return_sequences': False, 'filters': 2, 'kernel_size': (3, 3), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1 }, input_shape=(1, 2, 5, 5, 2)) def test_conv_lstm_cloning(self): with self.cached_session(): model = keras.models.Sequential() model.add( keras.layers.ConvLSTM2D(5, 3, input_shape=(None, 5, 5, 3))) test_inputs = np.random.random((2, 4, 5, 5, 3)) reference_outputs = model.predict(test_inputs) weights = model.get_weights() # Use a new graph to clone the model with self.cached_session(): clone = keras.models.clone_model(model) clone.set_weights(weights) outputs = clone.predict(test_inputs) self.assertAllClose(reference_outputs, outputs, atol=1e-5) def test_conv_lstm_with_initial_state(self): num_samples = 32 sequence_len = 5 encoder_inputs = keras.layers.Input((None, 32, 32, 3)) encoder = keras.layers.ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False, return_state=True) _, state_h, state_c = encoder(encoder_inputs) encoder_states = [state_h, state_c] decoder_inputs = keras.layers.Input((None, 32, 32, 4)) decoder_lstm = keras.layers.ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False, return_state=False) decoder_outputs = decoder_lstm(decoder_inputs, initial_state=encoder_states) output = keras.layers.Conv2D(1, (3, 3), padding='same', activation='relu')(decoder_outputs) model = keras.Model([encoder_inputs, decoder_inputs], output) model.compile(optimizer='sgd', loss='mse', run_eagerly=testing_utils.should_run_eagerly()) x_1 = np.random.rand(num_samples, sequence_len, 32, 32, 3) x_2 = np.random.rand(num_samples, sequence_len, 32, 32, 4) y = np.random.rand(num_samples, 32, 32, 1) model.fit([x_1, x_2], y) model.predict([x_1, x_2])
class CuDNNTest(keras_parameterized.TestCase): @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], return_sequences=[True, False])) @test_util.run_gpu_only def test_cudnn_rnn_return_sequence(self, layer_class, return_sequences): input_size = 10 timesteps = 6 units = 2 num_samples = 32 testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'return_sequences': return_sequences }, input_shape=(num_samples, timesteps, input_size)) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( layer_class=[keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM], go_backwards=[True, False])) @test_util.run_gpu_only def test_cudnn_rnn_go_backward(self, layer_class, go_backwards): input_size = 10 timesteps = 6 units = 2 num_samples = 32 testing_utils.layer_test(layer_class, kwargs={ 'units': units, 'go_backwards': go_backwards }, input_shape=(num_samples, timesteps, input_size)) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_return_state(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) layer = layer_class(units, return_state=True, stateful=True) outputs = layer(inputs) _, state = outputs[0], outputs[1:] self.assertEqual(len(state), num_states) model = keras.models.Model(inputs, state[0]) model.run_eagerly = testing_utils.should_run_eagerly() inputs = np.random.random((num_samples, timesteps, input_size)) state = model.predict(inputs) np.testing.assert_allclose(keras.backend.eval(layer.states[0]), state, atol=1e-4) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_time_major_input(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 model = keras.models.Sequential() model.add( keras.layers.Lambda( lambda t: tf.compat.v1.transpose(t, [1, 0, 2]))) layer = layer_class(units, time_major=True, return_sequences=True) model.add(layer) model.add( keras.layers.Lambda( lambda t: tf.compat.v1.transpose(t, [1, 0, 2]))) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001)) model.fit(np.ones((num_samples, timesteps, input_size)), np.ones((num_samples, timesteps, units))) out = model.predict(np.ones((num_samples, timesteps, input_size))) self.assertEqual(out.shape, (num_samples, timesteps, units)) @parameterized.named_parameters( ('cudnngru', keras.layers.CuDNNGRU), ('cudnnlstm', keras.layers.CuDNNLSTM), ) @test_util.run_gpu_only def test_specify_initial_state_keras_tensor(self, layer_class): input_size = 10 timesteps = 6 units = 2 num_samples = 32 num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 inputs = keras.Input((timesteps, input_size)) initial_state = [keras.Input((units, )) for _ in range(num_states)] layer = layer_class(units) if len(initial_state) == 1: output = layer(inputs, initial_state=initial_state[0]) else: output = layer(inputs, initial_state=initial_state) self.assertTrue( any(initial_state[0] is t for t in layer._inbound_nodes[0].input_tensors)) model = keras.models.Model([inputs] + initial_state, output) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=0.001), run_eagerly=testing_utils.should_run_eagerly()) inputs = np.random.random((num_samples, timesteps, input_size)) initial_state = [ np.random.random((num_samples, units)) for _ in range(num_states) ] targets = np.random.random((num_samples, units)) model.fit([inputs] + initial_state, targets)
class CuDNNV1OnlyTest(keras_parameterized.TestCase): @test_util.run_gpu_only def test_trainability(self): input_size = 10 units = 2 for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: layer = layer_class(units) layer.build((None, None, input_size)) self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) layer.trainable = False self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.non_trainable_weights), 3) self.assertEqual(len(layer.trainable_weights), 0) layer.trainable = True self.assertEqual(len(layer.weights), 3) self.assertEqual(len(layer.trainable_weights), 3) self.assertEqual(len(layer.non_trainable_weights), 0) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False], bidirectional=[True, False], implementation=[1, 2], model_nest_level=[1, 2], model_type=['seq', 'func'])) @test_util.run_v1_only('b/120911602, b/112083752') @test_util.run_gpu_only def test_load_weights_between_noncudnn_rnn(self, rnn_type, to_cudnn, bidirectional, implementation, model_nest_level, model_type): input_size = 10 timesteps = 6 input_shape = (timesteps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', 'implementation': implementation } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) if bidirectional: layer = keras.layers.Bidirectional(layer) cudnn_layer = cudnn_rnn_layer_class(units) if bidirectional: cudnn_layer = keras.layers.Bidirectional(cudnn_layer) model = self._make_nested_model(input_shape, layer, model_nest_level, model_type) cudnn_model = self._make_nested_model(input_shape, cudnn_layer, model_nest_level, model_type) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) def _make_nested_model(self, input_shape, layer, level=1, model_type='func'): # example: make_nested_seq_model((1,), Dense(10), level=2).summary() def make_nested_seq_model(input_shape, layer, level=1): model = layer for i in range(1, level + 1): layers = [keras.layers.InputLayer(input_shape), model ] if (i == 1) else [model] model = keras.models.Sequential(layers) if i > 1: model.build((None, ) + input_shape) return model # example: make_nested_func_model((1,), Dense(10), level=2).summary() def make_nested_func_model(input_shape, layer, level=1): model_input = keras.layers.Input(input_shape) model = layer for _ in range(level): model = keras.models.Model(model_input, model(model_input)) return model if model_type == 'func': return make_nested_func_model(input_shape, layer, level) elif model_type == 'seq': return make_nested_seq_model(input_shape, layer, level) def _convert_model_weights(self, source_model, target_model): _, fname = tempfile.mkstemp('.h5') source_model.save_weights(fname) target_model.load_weights(fname) os.remove(fname) @parameterized.named_parameters( *testing_utils.generate_combinations_with_testcase_name( rnn_type=['LSTM', 'GRU'], to_cudnn=[True, False])) @test_util.run_v1_only('b/120911602') @test_util.run_gpu_only def test_load_weights_between_noncudnn_rnn_time_distributed( self, rnn_type, to_cudnn): # Similar test as test_load_weights_between_noncudnn_rnn() but has different # rank of input due to usage of TimeDistributed. Issue: #10356. input_size = 10 steps = 6 timesteps = 6 input_shape = (timesteps, steps, input_size) units = 2 num_samples = 32 inputs = np.random.random((num_samples, timesteps, steps, input_size)) rnn_layer_kwargs = { 'recurrent_activation': 'sigmoid', # ensure biases are non-zero and properly converted 'bias_initializer': 'random_uniform', } if rnn_type == 'LSTM': rnn_layer_class = keras.layers.LSTM cudnn_rnn_layer_class = keras.layers.CuDNNLSTM else: rnn_layer_class = keras.layers.GRU cudnn_rnn_layer_class = keras.layers.CuDNNGRU rnn_layer_kwargs['reset_after'] = True layer = rnn_layer_class(units, **rnn_layer_kwargs) layer = keras.layers.TimeDistributed(layer) cudnn_layer = cudnn_rnn_layer_class(units) cudnn_layer = keras.layers.TimeDistributed(cudnn_layer) model = self._make_nested_model(input_shape, layer) cudnn_model = self._make_nested_model(input_shape, cudnn_layer) if to_cudnn: self._convert_model_weights(model, cudnn_model) else: self._convert_model_weights(cudnn_model, model) self.assertAllClose(model.predict(inputs), cudnn_model.predict(inputs), atol=1e-4) @test_util.run_gpu_only def test_cudnnrnn_bidirectional(self): rnn = keras.layers.CuDNNGRU samples = 2 dim = 2 timesteps = 2 output_dim = 2 mode = 'concat' x = np.random.random((samples, timesteps, dim)) target_dim = 2 * output_dim if mode == 'concat' else output_dim y = np.random.random((samples, target_dim)) # test with Sequential model model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode, input_shape=(None, dim))) model.compile(loss='mse', optimizer='rmsprop') model.fit(x, y, epochs=1, batch_size=1) # test config model.get_config() model = keras.models.model_from_json(model.to_json()) model.summary() # test stacked bidirectional layers model = keras.Sequential() model.add( keras.layers.Bidirectional(rnn(output_dim, return_sequences=True), merge_mode=mode, input_shape=(None, dim))) model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) model.compile(loss='mse', optimizer=R'rmsprop') model.fit(x, y, epochs=1, batch_size=1) # test with functional API inputs = keras.Input((timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer=R'rmsprop') model.fit(x, y, epochs=1, batch_size=1) # Bidirectional and stateful inputs = keras.Input(batch_shape=(1, timesteps, dim)) outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True), merge_mode=mode)(inputs) model = keras.Model(inputs, outputs) model.compile(loss='mse', optimizer='rmsprop') model.fit(x, y, epochs=1, batch_size=1) @test_util.run_gpu_only def test_preprocess_weights_for_loading_gru_incompatible(self): """Test loading weights between incompatible layers. Should fail fast with an exception. """ input_shape = (3, 5) def gru(cudnn=False, **kwargs): layer_class = keras.layers.CuDNNGRU if cudnn else keras.layers.GRUV1 return layer_class(2, input_shape=input_shape, **kwargs) def get_layer_weights(layer): layer.build(input_shape=input_shape) return layer.get_weights() def assert_not_compatible(src, dest, message): with self.assertRaises(ValueError) as ex: keras.saving.hdf5_format.preprocess_weights_for_loading( dest, get_layer_weights(src)) self.assertIn(message, str(ex.exception)) assert_not_compatible( gru(), gru(cudnn=True), 'GRU(reset_after=False) is not compatible with CuDNNGRU') assert_not_compatible( gru(cudnn=True), gru(), 'CuDNNGRU is not compatible with GRU(reset_after=False)') assert_not_compatible( gru(), gru(reset_after=True), 'GRU(reset_after=False) is not compatible with ' 'GRU(reset_after=True)') assert_not_compatible( gru(reset_after=True), gru(), 'GRU(reset_after=True) is not compatible with ' 'GRU(reset_after=False)')