def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a sequential critic network.""" # Split the inputs into observations and actions. def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} # Create an observation network. obs_network = (create_fc_network(obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()) # Create an action network. action_network = (create_fc_network(action_fc_layer_units) if action_fc_layer_units else create_identity_layer()) # Create a joint network. joint_network = (create_fc_network(joint_fc_layer_units) if joint_fc_layer_units else create_identity_layer()) # Final layer. value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform') return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_layer, inner_reshape.InnerReshape(current_shape=[1], new_shape=[]) ], name='sequential_critic')
def create_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a critic network for DDPG.""" def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} obs_network = create_fc_network( obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer() action_network = create_fc_network( action_fc_layer_units ) if action_fc_layer_units else create_identity_layer() joint_network = create_fc_network( joint_fc_layer_units ) if joint_fc_layer_units else create_identity_layer() value_fc_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)) return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_fc_layer, inner_reshape.InnerReshape([1], []) ])
def testCreateAndCall(self): net = sequential.Sequential([ nest_map.NestMap( {'inp1': tf.keras.layers.Dense(8), 'inp2': sequential.Sequential([ tf.keras.layers.Conv2D(2, 3), # Convert 3 inner dimensions to [8] for RNN. inner_reshape.InnerReshape([None] * 3, [8]), ]), 'inp3': tf.keras.layers.LSTM( 8, return_state=True, return_sequences=True)}), nest_map.NestFlatten(), tf.keras.layers.Add()]) self.assertEqual( net.state_spec, ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],), },)) output_spec = net.create_variables( { 'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32), 'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32), 'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32), }) self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32)) inputs = { 'inp1': tf.ones((8, 10, 3), dtype=tf.float32), 'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32), 'inp3': tf.ones((8, 10, 3), dtype=tf.float32) } output, next_state = net(inputs) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },)) # Test passing in a state. output, next_state = net(inputs, next_state) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },))
def create_sequential_actor_net(fc_layer_units, action_tensor_spec): """Helper function for creating the actor network.""" def create_dist(loc_and_scale): ndims = action_tensor_spec.shape.num_elements() return tfp.distributions.MultivariateNormalDiag( loc=loc_and_scale[..., :ndims], scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]), validate_args=True) def means_layers(): # TODO(b/179510447): align these parameters with Schulman 17. return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=0.1), name='means_projection_layer') def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_kernel_initializer_scale = 0.1 std_bias_initializer_value = np.log(np.exp(0.35) - 1) return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=std_kernel_initializer_scale), bias_initializer=tf.keras.initializers.Constant( value=std_bias_initializer_value)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.Orthogonal()) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_units] + [tf.keras.layers.Lambda(lambda x: { 'loc': x, 'scale': x })] + [nest_map.NestMap({ 'loc': means_layers(), 'scale': std_layers() })] + [nest_map.NestFlatten()] + # Concatenate the maen and standard deviation output to feed into the # distribution layer. [tf.keras.layers.Concatenate(axis=-1)] + # Create the output distribution from the mean and standard deviation. [tf.keras.layers.Lambda(create_dist)])
def testNestFlatten(self): layer = nest_map.NestFlatten() outputs = layer({'a': 1, 'b': 2}) self.assertEqual(self.evaluate(outputs), [1, 2])