def test_initialisation_multiple_heads(): """ Test network initialisation with multiple action subspaces i.e. multiple heads. """ # Set up the (IO) space shapes. observation_dim = 10 # Ensure that the action subspace dimensions sum to the overall num_actions action_subspace_dimensions = (3, 5) input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape( (observation_dim, )), dtype=tf.dtypes.float32, name="input") output_tensor_spec = (BoundedTensorSpec(shape=tf.TensorShape((3, )), dtype=tf.dtypes.float32, name="action_subspace_1", minimum=0, maximum=1), BoundedTensorSpec(shape=tf.TensorShape((5, )), dtype=tf.dtypes.float32, name="action_subspace_2", minimum=0, maximum=1)) # Use TensorSpecs to be compatible with TensorFlow. network = MultiHeadedCategoricalActionNetwork( input_tensor_spec, output_tensor_spec, action_subspace_dimensions=action_subspace_dimensions, hidden_units=(64, )) # Ensure that the network has set up some layers. assert hasattr(network, "_shared_layers") and network._shared_layers is not None del network
def __init__(self, batch_size, episode_length, obs_dim=1, action_type=ActionType.Discrete): """Initializes the environment. Args: batch_size (int): The batch size expected for the actions and observations. episode_length (int): length of each episode action_type: ActionType """ self._steps = 0 self._episode_length = episode_length super(UnittestEnv, self).__init__() self._action_type = action_type if action_type == ActionType.Discrete: self._action_spec = BoundedTensorSpec(shape=(1, ), dtype=tf.int64, minimum=0, maximum=1) else: self._action_spec = BoundedTensorSpec(shape=(1, ), dtype=tf.float32, minimum=[0], maximum=[1]) self._observation_spec = TensorSpec(shape=(obs_dim, ), dtype=tf.float32) self._batch_size = batch_size self.reset()
def save_model(): optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate = 1e-3); obs_spec = TensorSpec((7,), dtype = tf.float32, name = 'observation'); action_spec = BoundedTensorSpec((1,), dtype = tf.int32, minimum = 0, maximum = 3, name = 'action'); actor_net = ActorDistributionRnnNetwork(obs_spec, action_spec, lstm_size = (100,100)); value_net = ValueRnnNetwork(obs_spec); agent = ppo_agent.PPOAgent( time_step_spec = time_step_spec(obs_spec), action_spec = action_spec, optimizer = optimizer, actor_net = actor_net, value_net = value_net, normalize_observations = True, normalize_rewards = True, use_gae = True, num_epochs = 1, ); checkpointer = Checkpointer( ckpt_dir = 'checkpoints/policy', max_to_keep = 1, agent = agent, policy = agent.policy, global_step = tf.compat.v1.train.get_or_create_global_step()); checkpointer.initialize_or_restore(); saver = policy_saver.PolicySaver(agent.policy); saver.save('final_policy');
def test_forward_pass_single_head(): """ Test a forward pass through a single headed action network. """ # Set up the network as in the single-headed test above. num_actions = 5 observation_dim = 10 input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape( (observation_dim, )), dtype=tf.dtypes.float32, name="input") output_tensor_spec = BoundedTensorSpec(shape=tf.TensorShape((5, )), dtype=tf.dtypes.float32, minimum=0, maximum=1, name="action") network = MultiHeadedCategoricalActionNetwork( input_tensor_spec, output_tensor_spec, action_subspace_dimensions=(num_actions, ), hidden_units=(64, )) # Test that zeros as input yields zeros as output. This follows from the biases being # initialised to zero. zeros_input = np.zeros((1, observation_dim)) zeros_output = network(zeros_input, step_type=None)[0].logits assert np.all(zeros_output == 0) # Test that random inputs yield non-zero outputs. random_input = np.random.random((1, observation_dim)) random_output = network(random_input, step_type=None)[0].logits assert np.all(random_output != 0)
def test_one_hot_categorical_projection_network(): """ Test the networks used as action heads. This tests initialisation and the forward pass. """ # Set up for a single action head with 5 actions in the subspace. num_actions = 5 sample_spec = BoundedTensorSpec(shape=tf.TensorShape((num_actions, )), dtype=tf.dtypes.float32, minimum=0, maximum=1, name="action") action_head = OneHotCategoricalProjectionNetwork(sample_spec, num_actions) # Test the initialisation. assert hasattr( action_head, "_projection_layer") and action_head._projection_layer is not None assert hasattr(action_head, "_output_spec") and isinstance( action_head._output_spec, DistributionSpec) # Test the forward pass (assuming a final output of the shared layers of dimension 64). inputs = tf.convert_to_tensor(np.random.random((1, 100, 64))) num_batch_dims = 2 action_dist = action_head(inputs, num_batch_dims) assert isinstance(action_dist, tfp.distributions.OneHotCategorical) assert action_dist.event_shape == num_actions # Ensure that there are two trainable weights, the weights matrix and a bias of a single linear # layer. assert len(action_head.trainable_weights) == 2 assert action_head.trainable_weights[0].shape == (64, 5) assert action_head.trainable_weights[1].shape == (5, )
def test_initialisation_single_head(): """ Test network initialisation with a single action subspace i.e. one head. """ # Set up some (IO) space shapes. num_actions = 5 observation_dim = 10 # Use TensorSpecs to be compatible with TensorFlow. input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape( (observation_dim, )), dtype=tf.dtypes.float32, name="input") output_tensor_spec = BoundedTensorSpec(shape=tf.TensorShape((5, )), dtype=tf.dtypes.float32, minimum=0, maximum=1, name="action") # Instantiate the network. network = MultiHeadedCategoricalActionNetwork( input_tensor_spec, output_tensor_spec, action_subspace_dimensions=(num_actions, ), hidden_units=(64, )) # Ensure that the network has set up some layers. assert hasattr(network, "_shared_layers") and network._shared_layers is not None del network
def test_forward_pass_multiple_heads(): """ Test a forward pass through a multi-headed action network. """ # Set up the network as in the multi-headed test above. batch_size = 1 observation_dim = 10 action_subspace_dimensions = (3, 5) input_tensor_spec = tf.TensorSpec(shape=tf.TensorShape( (observation_dim, )), dtype=tf.dtypes.float32, name="input") output_tensor_spec = (BoundedTensorSpec(shape=tf.TensorShape((3, )), dtype=tf.dtypes.float32, name="action_subspace_1", minimum=0, maximum=1), BoundedTensorSpec(shape=tf.TensorShape((5, )), dtype=tf.dtypes.float32, name="action_subspace_2", minimum=0, maximum=1)) network = MultiHeadedCategoricalActionNetwork( input_tensor_spec, output_tensor_spec, action_subspace_dimensions=action_subspace_dimensions, hidden_units=(64, )) # Test that zeros input yields zeros output as per the biases being zero. # Also test that the network returns values for each head and that the shapes of the outputs # are as we would expect. We check logits as these are the network's raw outputs. zeros_input = np.zeros((1, observation_dim)) zeros_output = network(zeros_input, step_type=None)[0] assert len(zeros_output) == 2 assert zeros_output[0].logits.shape == (batch_size, 1, 3) assert zeros_output[1].logits.shape == (batch_size, 1, 5) assert np.all(zeros_output[0].logits == 0) and np.all( zeros_output[1].logits == 0) # Perform the same tests with random inputs ensuring non-zero outputs. random_input = np.random.random((1, observation_dim)) random_output = network(random_input, step_type=None)[0] assert len(random_output) == 2 assert random_output[0].logits.shape == (batch_size, 1, 3) assert random_output[1].logits.shape == (batch_size, 1, 5) assert np.all(random_output[0].logits != 0) and np.all( random_output[1].logits != 0)
def __init__(self, gym_env, n_agents, discount=1.0, spec_dtype_map=None, match_obs_space_dtype=True, auto_reset=True, simplify_box_bounds=True): self.n_agents = n_agents super(MultiagentGymWrapper, self).__init__( gym_env, discount, spec_dtype_map, match_obs_space_dtype, auto_reset, simplify_box_bounds) # Create a single-agent version of the action spec and then tile it to # comply with tf-agents spec requirements. single_action_spec = BoundedTensorSpec( shape=(), dtype=self._action_spec.dtype, name=self._action_spec.name, minimum=self._action_spec.minimum, maximum=self._action_spec.maximum) self._action_spec = (single_action_spec,) * n_agents
NUM_EVAL_EPISODES = 1 AGENT_NAMES = ['inter0', 'inter1', 'inter2', 'coordinate'] NUM_ITERATIONS = 5 RANDOM_COLLECT_STEPS_INITIAL = 60 AGENTS_COLLECT_STEPS_INITIAL = 60 AGENTS_COLLECT_STEPS_PER_ITERATION = 0 RANDOM_COLLECT_STEPS_PER_ITERATION = 0 LOG_INTERVAL = 1 EVAL_INTERVAL = 5 # create some specifications for single agent and coordinate agent FIXED_STEP_TYPE = tf.convert_to_tensor(0, dtype='int32') obs_spec4independent_agent = BoundedTensorSpec(shape=(8, ), dtype=tf.float32, minimum=0, maximum=3.4028235e+38, name='observation') act_spec4independent_agent = BoundedTensorSpec(shape=(4, ), dtype=tf.float32, minimum=0, maximum=10, name='action') q_spec4independent_agent = BoundedTensorSpec(shape=(), dtype=tf.float32, minimum=-120, maximum=0, name='q_value') ts_spec4independent_agent = time_step_spec(obs_spec4independent_agent) obs_spec4coordinate_agent = BoundedTensorSpec(shape=(12, ),