def make_networks( action_spec: specs.Array, policy_layer_sizes: Sequence[int] = (300, 200), critic_layer_sizes: Sequence[int] = (400, 300), ) -> Dict[str, snt.Module]: """Creates networks used by the agent.""" num_dimensions = np.prod(action_spec.shape, dtype=int) critic_layer_sizes = list(critic_layer_sizes) policy_network = snt.Sequential([ networks.LayerNormMLP(policy_layer_sizes), networks.MultivariateNormalDiagHead(num_dimensions), ]) # The multiplexer concatenates the (maybe transformed) observations/actions. critic_network = snt.Sequential([ networks.CriticMultiplexer( critic_network=networks.LayerNormMLP(critic_layer_sizes)), networks.DiscreteValuedHead(0., 1., 10), ]) return { 'policy': policy_network, 'critic': critic_network, }
def test_snapshot_distribution(self): """Test that snapshotter correctly calls saves/restores snapshots.""" # Create a test network. net1 = snt.Sequential([ networks.LayerNormMLP([10, 10]), networks.MultivariateNormalDiagHead(1) ]) spec = specs.Array([10], dtype=np.float32) tf2_utils.create_variables(net1, [spec]) # Save the test network. directory = self.get_tempdir() objects_to_save = {'net': net1} snapshotter = tf2_savers.Snapshotter(objects_to_save, directory=directory) snapshotter.save() # Reload the test network. net2 = tf.saved_model.load(os.path.join(snapshotter.directory, 'net')) inputs = tf2_utils.add_batch_dim(tf2_utils.zeros_like(spec)) with tf.GradientTape() as tape: dist1 = net1(inputs) loss1 = tf.math.reduce_sum(dist1.mean() + dist1.variance()) grads1 = tape.gradient(loss1, net1.trainable_variables) with tf.GradientTape() as tape: dist2 = net2(inputs) loss2 = tf.math.reduce_sum(dist2.mean() + dist2.variance()) grads2 = tape.gradient(loss2, net2.trainable_variables) assert all(tree.map_structure(np.allclose, list(grads1), list(grads2)))
def make_networks( action_spec: types.NestedSpec, policy_layer_sizes: Sequence[int] = (10, 10), critic_layer_sizes: Sequence[int] = (10, 10), ) -> Dict[str, snt.Module]: """Creates networks used by the agent.""" num_dimensions = np.prod(action_spec.shape, dtype=int) policy_layer_sizes = list(policy_layer_sizes) + [num_dimensions] critic_layer_sizes = list(critic_layer_sizes) + [1] policy_network = snt.Sequential( [networks.LayerNormMLP(policy_layer_sizes), tf.tanh]) # The multiplexer concatenates the (maybe transformed) observations/actions. critic_network = networks.CriticMultiplexer( critic_network=networks.LayerNormMLP(critic_layer_sizes)) return { 'policy': policy_network, 'critic': critic_network, }
def make_networks( action_spec, policy_layer_sizes=(10, 10), critic_layer_sizes=(10, 10), ): """Creates networks used by the agent.""" num_dimensions = np.prod(action_spec.shape, dtype=int) critic_layer_sizes = list(critic_layer_sizes) + [1] policy_network = snt.Sequential([ networks.LayerNormMLP(policy_layer_sizes), networks.MultivariateNormalDiagHead(num_dimensions) ]) critic_network = networks.CriticMultiplexer( critic_network=networks.LayerNormMLP(critic_layer_sizes)) return { 'policy': policy_network, 'critic': critic_network, }
def make_networks( action_spec: specs.BoundedArray, policy_layer_sizes: Sequence[int] = (256, 256, 256), critic_layer_sizes: Sequence[int] = (512, 512, 256), vmin: float = -150., vmax: float = 150., num_atoms: int = 51, ) -> Dict[str, types.TensorTransformation]: """Creates networks used by the agent.""" # Get total number of action dimensions from action spec. num_dimensions = np.prod(action_spec.shape, dtype=int) # Create the shared observation network; here simply a state-less operation. observation_network = tf2_utils.batch_concat # Create the policy network. policy_network = snt.Sequential([ networks.LayerNormMLP(policy_layer_sizes), networks.MultivariateNormalDiagHead(num_dimensions) ]) # The multiplexer transforms concatenates the observations/actions. multiplexer = networks.CriticMultiplexer( critic_network=networks.LayerNormMLP(critic_layer_sizes), action_network=networks.ClipToSpec(action_spec)) # Create the critic network. critic_network = snt.Sequential([ multiplexer, networks.DiscreteValuedHead(vmin, vmax, num_atoms), ]) return { 'policy': policy_network, 'critic': critic_network, 'observation': observation_network, }
return env.environment.render(mode='rgb_array') environment_spec = specs.make_environment_spec(env) # Create D4PG Agent: # Get total number of action dimensions from action spec num_dimensions = np.prod(environment_spec.actions.shape, dtype=int) # Create shared observation network: observation_network = tf2_utils.batch_concat policy_network = snt.Sequential([ networks.LayerNormMLP((256, 256, 256), activate_final=True), networks.NearZeroInitializedLinear(num_dimensions), networks.TanhToSpec(environment_spec.actions) ]) # Create the distributional critic network: critic_network = snt.Sequential([ networks.CriticMultiplexer(), networks.LayerNormMLP((512, 512, 256), activate_final=True), networks.DiscreteValuedHead(vmin=-150., vmax=150., num_atoms=51) ]) # Create logger for agent diagnostics: agent_logger = loggers.TerminalLogger(label='agent', time_delta=10) # Create D4PG Agent: