def testIncompatibleStructureInputs(self): with self.assertRaisesRegex( TypeError, r'`nested_layers` and `input_spec` do not have matching structures' ): nest_map.NestMap([tf.keras.layers.Dense(8)], input_spec={'ick': tf.TensorSpec(8, tf.float32)}) with self.assertRaisesRegex( TypeError, r'`self.nested_layers` and `inputs` do not have matching structures' ): net = nest_map.NestMap([tf.keras.layers.Dense(8)]) net.create_variables( {'ick': tf.TensorSpec((1, ), dtype=tf.float32)}) with self.assertRaisesRegex( TypeError, r'`self.nested_layers` and `inputs` do not have matching structures' ): net = nest_map.NestMap([tf.keras.layers.Dense(8)]) net({'ick': tf.constant([[1.0]])}) with self.assertRaisesRegex( ValueError, r'`network_state` and `state_spec` do not have matching structures' ): net = nest_map.NestMap( tf.keras.layers.LSTM(8, return_state=True, return_sequences=True)) net(tf.ones((1, 2)), network_state=(tf.ones((1, 1)), ()))
def testNestedNest(self): # layer structure: {'a': {'b': .}} net = nest_map.NestMap( {'a': nest_map.NestMap({'b': tf.keras.layers.Dense(8)})}) net.create_variables( {'a': { 'b': tf.TensorSpec((1, ), dtype=tf.float32) }})
def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a sequential critic network.""" # Split the inputs into observations and actions. def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} # Create an observation network. obs_network = (create_fc_network(obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()) # Create an action network. action_network = (create_fc_network(action_fc_layer_units) if action_fc_layer_units else create_identity_layer()) # Create a joint network. joint_network = (create_fc_network(joint_fc_layer_units) if joint_fc_layer_units else create_identity_layer()) # Final layer. value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform') return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_layer, inner_reshape.InnerReshape(current_shape=[1], new_shape=[]) ], name='sequential_critic')
def testPolicySaverCompatibility(self): observation_spec = { 'a': tf.TensorSpec(4, tf.float32), 'b': tf.TensorSpec(3, tf.float32) } time_step_tensor_spec = ts.time_step_spec(observation_spec) net = nest_map.NestMap({ 'a': tf.keras.layers.LSTM(8, return_state=True, return_sequences=True), 'b': tf.keras.layers.Dense(8) }) net.create_variables(observation_spec) policy = MyPolicy(time_step_tensor_spec, net) sample = tensor_spec.sample_spec_nest(time_step_tensor_spec, outer_dims=(5, )) step = policy.action(sample) self.assertEqual(step.action.shape.as_list(), [5, 8]) train_step = common.create_variable('train_step') saver = policy_saver.PolicySaver(policy, train_step=train_step) self.initialize_v1_variables() with self.cached_session(): saver.save(os.path.join(FLAGS.test_tmpdir, 'nest_map_model'))
def create_sequential_actor_net(self, fc_layer_units, action_tensor_spec, seed=None): """Helper method for creating the actor network.""" self._seed_stream = self.seed_stream_class( seed=seed, salt='tf_agents_sequential_layers') def _get_seed(): seed = self._seed_stream() if seed is not None: seed = seed % sys.maxsize return seed def create_dist(loc_and_scale): loc = loc_and_scale['loc'] loc = tanh_and_scale_to_spec(loc, action_tensor_spec) scale = loc_and_scale['scale'] scale = tf.math.softplus(scale) return tfp.distributions.MultivariateNormalDiag( loc=loc, scale_diag=scale, validate_args=True) def means_layers(): # TODO(b/179510447): align these parameters with Schulman 17. return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=0.1, seed=_get_seed()), name='means_projection_layer') def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_bias_initializer_value = np.log(np.exp(0.35) - 1) return bias_layer.BiasLayer( bias_initializer=tf.constant_initializer( value=std_bias_initializer_value)) def no_op_layers(): return tf.keras.layers.Lambda(lambda x: x) dense = functools.partial( tf.keras.layers.Dense, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.Orthogonal( seed=_get_seed())) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_units] + [means_layers()] + [tf.keras.layers.Lambda( lambda x: {'loc': x, 'scale': tf.zeros_like(x)})] + [nest_map.NestMap({ 'loc': no_op_layers(), 'scale': std_layers(), })] + # Create the output distribution from the mean and standard deviation. [tf.keras.layers.Lambda(create_dist)])
def create_sequential_critic_net(): value_layer_dict = { "patch": patch_pre_layer, "color": color_pre_layer, "motion": motion_pre_layer } # value_layer = sequential.Sequential([ # value_layer_dict, # tf.keras.layers.Lambda(tf.nest.flatten), # tf.keras.layers.Concatenate(), # tf.keras.layers.Dense(1)]) action_layer = tf.keras.layers.Dense(81) def sum_value_and_action_out(value_and_action_out): value_out_dict, action_out = value_and_action_out value_out = tf.concat(tf.nest.flatten(value_out_dict), axis=-1) # value_out = value_out_dict return tf.reshape(value_out + action_out, [1, -1]) return sequential.Sequential([ nest_map.NestMap((value_layer_dict, action_layer)), tf.keras.layers.Lambda(sum_value_and_action_out), tf.keras.layers.Dense(1) ])
def create_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a critic network for DDPG.""" def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} obs_network = create_fc_network( obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer() action_network = create_fc_network( action_fc_layer_units ) if action_fc_layer_units else create_identity_layer() joint_network = create_fc_network( joint_fc_layer_units ) if joint_fc_layer_units else create_identity_layer() value_fc_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)) return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_fc_layer, inner_reshape.InnerReshape([1], []) ])
def testAllZeroLengthStateSpecsShowAsEmptyState(self): sequential = sequential_lib.Sequential([ nest_map.NestMap({ 'a': tf.keras.layers.Dense(2), 'b': tf.keras.layers.Dense(3), }) ]) self.assertEqual(sequential.state_spec, ())
def testNestedNestWithNestedState(self): # layer structure: (., {'a': {'b': .}}) net = nest_map.NestMap((tf.keras.layers.Dense(7), { 'a': nest_map.NestMap({ 'b': tf.keras.layers.LSTM(8, return_state=True, return_sequences=True) }) })) # TODO(b/177337002): remove the forced tuple wrapping the LSTM # state once we make a generic KerasWrapper network and clean up # Sequential and NestMap to use that instead of singleton Sequential. out, state = net((tf.ones((1, 2)), { 'a': { 'b': tf.ones((1, 2)) } }), network_state=((), { 'a': { 'b': ((tf.ones((1, 8)), tf.ones((1, 8))), ) } })) nest_utils.assert_matching_dtypes_and_inner_shapes( out, (tf.TensorSpec(dtype=tf.float32, shape=(7, )), { 'a': { 'b': tf.TensorSpec(dtype=tf.float32, shape=(8, )) } }), caller=self, tensors_name='out', specs_name='out_expected') nest_utils.assert_matching_dtypes_and_inner_shapes( state, ((), { 'a': { 'b': ((tf.TensorSpec(dtype=tf.float32, shape=(8, )), tf.TensorSpec(dtype=tf.float32, shape=(8, ))), ) } }), caller=self, tensors_name='state', specs_name='state_expected')
def testCreateAndCall(self): net = sequential.Sequential([ nest_map.NestMap( {'inp1': tf.keras.layers.Dense(8), 'inp2': sequential.Sequential([ tf.keras.layers.Conv2D(2, 3), # Convert 3 inner dimensions to [8] for RNN. inner_reshape.InnerReshape([None] * 3, [8]), ]), 'inp3': tf.keras.layers.LSTM( 8, return_state=True, return_sequences=True)}), nest_map.NestFlatten(), tf.keras.layers.Add()]) self.assertEqual( net.state_spec, ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],), },)) output_spec = net.create_variables( { 'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32), 'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32), 'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32), }) self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32)) inputs = { 'inp1': tf.ones((8, 10, 3), dtype=tf.float32), 'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32), 'inp3': tf.ones((8, 10, 3), dtype=tf.float32) } output, next_state = net(inputs) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },)) # Test passing in a state. output, next_state = net(inputs, next_state) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },))
def create_sequential_actor_network(actor_fc_layers, action_tensor_spec): """Create a sequential actor network.""" def tile_as_nest(non_nested_output): return tf.nest.map_structure(lambda _: non_nested_output, action_tensor_spec) return sequential.Sequential( [dense(num_units) for num_units in actor_fc_layers] + [tf.keras.layers.Lambda(tile_as_nest)] + [ nest_map.NestMap( tf.nest.map_structure(_TanhNormalProjectionNetworkWrapper, action_tensor_spec)) ])
def create_sequential_actor_net(fc_layer_units, action_tensor_spec): """Helper function for creating the actor network.""" def create_dist(loc_and_scale): ndims = action_tensor_spec.shape.num_elements() return tfp.distributions.MultivariateNormalDiag( loc=loc_and_scale[..., :ndims], scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]), validate_args=True) def means_layers(): # TODO(b/179510447): align these parameters with Schulman 17. return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=0.1), name='means_projection_layer') def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_kernel_initializer_scale = 0.1 std_bias_initializer_value = np.log(np.exp(0.35) - 1) return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=std_kernel_initializer_scale), bias_initializer=tf.keras.initializers.Constant( value=std_bias_initializer_value)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.Orthogonal()) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_units] + [tf.keras.layers.Lambda(lambda x: { 'loc': x, 'scale': x })] + [nest_map.NestMap({ 'loc': means_layers(), 'scale': std_layers() })] + [nest_map.NestFlatten()] + # Concatenate the maen and standard deviation output to feed into the # distribution layer. [tf.keras.layers.Concatenate(axis=-1)] + # Create the output distribution from the mean and standard deviation. [tf.keras.layers.Lambda(create_dist)])
def create_sequential_critic_net(l2_regularization_weight=0.0, shared_layer=None): value_layer = tf.keras.layers.Dense( 1, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight), kernel_initializer=tf.initializers.constant([[0], [1]]), bias_initializer=tf.initializers.constant([[0]])) if shared_layer: value_layer = sequential.Sequential([value_layer, shared_layer]) action_layer = tf.keras.layers.Dense( 1, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight), kernel_initializer=tf.initializers.constant([[1]]), bias_initializer=tf.initializers.constant([[0]])) def sum_value_and_action_out(value_and_action_out): value_out, action_out = value_and_action_out return tf.reshape(value_out + action_out, [-1]) return sequential.Sequential([ nest_map.NestMap((value_layer, action_layer)), tf.keras.layers.Lambda(sum_value_and_action_out) ])