def create_duel_q_network(observation_spec, action_spec, fc_layer_params, a_fc_layer_params, v_fc_layer_params): return duel_q_network.DuelQNetwork(observation_spec, action_spec, fc_layer_params=fc_layer_params, a_fc_layer_params=a_fc_layer_params, v_fc_layer_params=v_fc_layer_params)
def testCorrectOutputShape(self): batch_size = 3 num_state_dims = 5 num_actions = 2 states = tf.random.uniform([batch_size, num_state_dims]) network = duel_q_network.DuelQNetwork( input_tensor_spec=tensor_spec.TensorSpec([num_state_dims], tf.float32), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)) q_values, _ = network(states) self.assertAllEqual(q_values.shape.as_list(), [batch_size, num_actions])
def testPreprocessingLayersSingleObservations(self): """Tests using preprocessing_layers without preprocessing_combiner.""" num_state_dims = 5 network = duel_q_network.DuelQNetwork( input_tensor_spec=tensor_spec.TensorSpec([num_state_dims], tf.float32), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1), preprocessing_layers=tf.keras.layers.Lambda(lambda x: x), preprocessing_combiner=None) q_logits, _ = network(tf.ones((3, num_state_dims))) self.assertAllEqual(q_logits.shape.as_list(), [3, 2])
def testChangeHiddenLayers(self): batch_size = 3 num_state_dims = 5 num_actions = 2 states = tf.random.uniform([batch_size, num_state_dims]) network = duel_q_network.DuelQNetwork( input_tensor_spec=tensor_spec.TensorSpec([num_state_dims], tf.float32), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1), a_fc_layer_params=(20,), v_fc_layer_params=(20,)) q_values, _ = network(states) self.assertAllEqual(q_values.shape.as_list(), [batch_size, num_actions]) self.assertEqual(len(network.trainable_variables), 8)
def testNetworkVariablesAreReused(self): batch_size = 3 num_state_dims = 5 states = tf.ones([batch_size, num_state_dims]) next_states = tf.ones([batch_size, num_state_dims]) network = duel_q_network.DuelQNetwork( input_tensor_spec=tensor_spec.TensorSpec([num_state_dims], tf.float32), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)) q_values, _ = network(states) next_q_values, _ = network(next_states) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(q_values, next_q_values)
def testCombinedFeatureColumnInput(self): columns = {} state_tensors = {} state_specs = {} expected_dim = 0 indicator_key = 'indicator_key' vocab_list = [2, 3, 4] column1 = tf.feature_column.categorical_column_with_vocabulary_list( indicator_key, vocab_list) columns[indicator_key] = tf.feature_column.indicator_column(column1) state_tensors[indicator_key] = tf.expand_dims([3, 2, 2, 4, 3], -1) state_specs[indicator_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += len(vocab_list) embedding_key = 'embedding_key' embedding_dim = 3 vocab_list = [2, 3, 4] column2 = tf.feature_column.categorical_column_with_vocabulary_list( embedding_key, vocab_list) columns[embedding_key] = tf.feature_column.embedding_column( column2, embedding_dim) state_tensors[embedding_key] = tf.expand_dims([3, 2, 2, 4, 3], -1) state_specs[embedding_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += embedding_dim numeric_key = 'numeric_key' batch_size = 5 state_dims = 3 input_shape = (batch_size, state_dims) columns[numeric_key] = tf.feature_column.numeric_column( numeric_key, [state_dims]) state_tensors[numeric_key] = tf.ones(input_shape, tf.int32) state_specs[numeric_key] = tensor_spec.TensorSpec([state_dims], tf.int32) expected_dim += state_dims num_actions = 4 action_spec = tensor_spec.BoundedTensorSpec( [1], tf.int32, 0, num_actions - 1) dense_features = tf.compat.v2.keras.layers.DenseFeatures(columns.values()) online_network = duel_q_network.DuelQNetwork( state_specs, action_spec, preprocessing_combiner=dense_features) target_network = online_network.copy(name='TargetNetwork') q_online, _ = online_network(state_tensors) q_target, _ = target_network(state_tensors) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.initializers.tables_initializer()) expected_shape = (batch_size, num_actions) self.assertEqual(expected_shape, q_online.shape) self.assertEqual(expected_shape, q_target.shape) self.assertAllClose(q_online, q_target, rtol=1.0, atol=1.0)
def testNumericFeatureColumnInput(self): key = 'feature_key' batch_size = 3 state_dims = 5 column = tf.feature_column.numeric_column(key, [state_dims]) state = {key: tf.ones([batch_size, state_dims], tf.int32)} state_spec = {key: tensor_spec.TensorSpec([state_dims], tf.int32)} dense_features = tf.compat.v2.keras.layers.DenseFeatures([column]) online_network = duel_q_network.DuelQNetwork( input_tensor_spec=state_spec, action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1), preprocessing_combiner=dense_features) target_network = online_network.copy(name='TargetNetwork') q_online, _ = online_network(state) q_target, _ = target_network(state) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(q_online, q_target, rtol=1.0, atol=1.0)
def testEmbeddingFeatureColumnInput(self): key = 'feature_key' vocab_list = ['a', 'b'] column = tf.feature_column.categorical_column_with_vocabulary_list( key, vocab_list) column = tf.feature_column.embedding_column(column, 3) feature_tensor = tf.convert_to_tensor(['a', 'b', 'c', 'a', 'c']) state = {key: tf.expand_dims(feature_tensor, -1)} state_spec = {key: tensor_spec.TensorSpec([1], tf.string)} dense_features = tf.compat.v2.keras.layers.DenseFeatures([column]) online_network = duel_q_network.DuelQNetwork( input_tensor_spec=state_spec, action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1), preprocessing_combiner=dense_features) target_network = online_network.copy(name='TargetNetwork') q_online, _ = online_network(state) q_target, _ = target_network(state) self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.initializers.tables_initializer()) self.assertAllClose(q_online, q_target, rtol=1.0, atol=1.0)
def testAddPreprocessingLayers(self): batch_size = 3 num_actions = 2 states = (tf.random.uniform([batch_size, 1]), tf.random.uniform([batch_size])) preprocessing_layers = ( tf.keras.layers.Dense(4), tf.keras.Sequential([ tf.keras.layers.Reshape((1,)), tf.keras.layers.Dense(4)])) network = duel_q_network.DuelQNetwork( input_tensor_spec=( tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)), preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add(), action_spec=tensor_spec.BoundedTensorSpec( [1], tf.int32, 0, num_actions - 1)) q_values, _ = network(states) self.assertAllEqual(q_values.shape.as_list(), [batch_size, num_actions]) # At least 2 variables each for the preprocessing layers. self.assertGreater(len(network.trainable_variables), 6)