def test_reinforce_agent_init_with_multiple_resource_sets(): """ Tests agent set up and initialisation with multiple action subspaces (multiple resource sets). """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent for the environment. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Validate initialisation by checking some properties of the initalised agent. assert isinstance(reinforce_agent.action_spec, tuple) assert len(reinforce_agent.action_spec) == 2 assert isinstance(reinforce_agent.action_spec[0], BoundedTensorSpec) assert isinstance(reinforce_agent.action_spec[1], BoundedTensorSpec) assert reinforce_agent.action_spec[0].shape == tf.TensorShape((1, 3)) assert reinforce_agent.action_spec[1].shape == tf.TensorShape((1, 3)) assert reinforce_agent.name == "reinforce_agent" assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_reinforce_agent_play(env_name): """ Extension of the agent set up and initialisation test to include playing episodes. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Reset the environment tf_env.reset() # Play 5 time steps in the environment. for _ in range(5): # Since we do not have the state stored at this point we capture it from the environment # fresh each time step as a TimeStep object (a named tuple). time_step = tf_env.current_time_step() # Attain our agent's action. action_step = reinforce_agent.collect_policy.action(time_step) if isinstance(action_step.action, tuple): action = tf.concat(action_step.action, axis=-1) else: action = action_step.action # Ensure that the action is binary as expected. assert snc.is_binary(action) # Play the action out in the environment. tf_env.step(action_step.action)
def test_reinforce_agent_learning(env_name): """ Extension of the test for an agent playing in the environment to include training. Note: This does not test that training improves the policy. It simply tests that the training loop runs effectively. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Set up a training step counter. global_step = tf.compat.v1.train.get_or_create_global_step() # Instantiate a REINFORCE agent reinforce_agent = create_reinforce_agent(tf_env, training_step_counter=global_step) # Instantiate a replay buffer. replay_buffer = TFUniformReplayBuffer( data_spec=reinforce_agent.collect_data_spec, batch_size=tf_env.batch_size, max_length=1000) # Initialise the action network weights etc. reinforce_agent.initialize() # Use a driver to handle data collection for the agent. This handles a lot of the backend # TensorFlow set up and solves previous errors with episodes of differing lengths. collect_driver = DynamicEpisodeDriver(tf_env, reinforce_agent.collect_policy, observers=[replay_buffer.add_batch], num_episodes=2) # Get the initial states of the agent and environment before training. time_step = tf_env.reset() policy_state = reinforce_agent.collect_policy.get_initial_state( tf_env.batch_size) # Take a copy of the variables in order to ensure that training does lead to parameter changes. initial_vars = deepcopy(reinforce_agent.trainable_variables) assert len(initial_vars) > 0, "Agent has no trainable variables." # Set up a minimal training loop to simply test training mechanics work. for _ in range(5): # Collect experience. time_step, policy_state = collect_driver.run(time_step=time_step, policy_state=policy_state) # Now the replay buffer should have data in it so we can collect the data and train the # agent. experience = replay_buffer.gather_all() reinforce_agent.train(experience) # Clear the replay buffer and return to play. replay_buffer.clear() # Check that training has had some effect for v1, v2 in zip(initial_vars, reinforce_agent.trainable_variables): assert not np.allclose(v1.numpy(), v2.numpy())
def get_reinforce_agent( env: TFPyEnvironment, discount_factor: float, debug: bool = False, agent_params: Optional[Dict[str, Any]] = None ) -> ReinforceAgent: """ Builds and initialises a REINFORCE learning agent for the environment. :param env: The TensorFlow environment used to set up the agent with correct action spaces etc. :param discount_factor: The discount applied to future rewards. :param debug: Flag which determines whether to include extra TensorBoard logs for debugging. :param agent_params: A dictionary of possible overrides for the default TF-Agents agent set up. :return: An initialised REINFORCE agent. """ # Set up a training step counter. global_step = tf.compat.v1.train.get_or_create_global_step() agent = create_reinforce_agent( env, gamma=discount_factor, debug=debug, training_step_counter=global_step, agent_params=agent_params ) agent.initialize() agent.train = tf.function(agent.train) return agent
def test_rl_simulation_agent_serialisation(): """ Test the custom serialisation of the agent used when saving the state of the SNC simulator. The customised serialisation was required due to the inability to serialise TensorFlow objects. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Attain the dictionary representation of the agent and test that all the attributes expected # are present. serialised_agent = sim_agent.to_serializable() assert all(attr in serialised_agent for attr in [ "_rl_env", "_rl_agent", "_policy", "_is_eval_policy", "env", "buffer_processing_matrix", "constituency_matrix", "demand_rate", "list_boundary_constraint_matrices", "name" ]) # Ensure that the dictionary representation is compatible with the json module and the chosen # encoder. json_string = json.dumps(serialised_agent, cls=NumpyEncoder, indent=4, sort_keys=True) assert bool(json_string)
def test_rl_simulation_agent_string_representation(): """ Tests that the string representation of the simulation agent is as expected. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Ensure that the string representation of the agent contains the instance name at the end. assert str(sim_agent)[-len(sim_agent.name):] == sim_agent.name
def test_rl_simulation_agent_discount_factor_reinforce(): """ Tests that the discount factor is passed from a REINFORCE agent to an RLSimulationAgent correctly. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) reinforce_agent = create_reinforce_agent(rl_env, gamma=0.97) reinforce_agent.initialize() del rl_env reinforce_sim_agent = RLSimulationAgent(env, reinforce_agent, normalise_obs=True) assert reinforce_sim_agent.discount_factor == 0.97
def test_reinforce_agent_init(env_name, expected_action_spec_shape): """ Tests agent set up and initialisation. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent for the environment. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Validate initialisation by checking some properties of the initalised agent. assert isinstance(reinforce_agent.action_spec, BoundedTensorSpec) assert reinforce_agent.action_spec.shape == expected_action_spec_shape assert reinforce_agent.name == "reinforce_agent" assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_action_mapping(): """ Tests that the RL Simulation Agent with the SNC interface is able to receive states and produce actions both of the expected type and form. """ # Set up the agent as above seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Attain a state and form an action. state = env.reset() action = sim_agent.map_state_to_actions(state) # Ensure that the action is as expected first with a formal assertion and then by passing it # to the environment. assert isinstance(action, snc_types.ActionProcess) env.step(action)
def test_rl_simulation_agent_init(): """ Test the intitalisation of an RL agent with an interface compatible with the SNC simulator. """ # To instantiate an agent from tf_agents we need an RL environment which itself requires a # standard SNC environment. We therefore set up an SNC environment and then wrap it for the # TensorFlow agent. This TF environment is later deleted since it is no longer required and to # ensure that it is not used inadvertently. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env # Wrapping the agent for the SNC simulator using information from the environment and the agent. sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Test that the agent has all of the attributed we want and that they are of the right type. assert hasattr(sim_agent, "_rl_env") and isinstance( sim_agent._rl_env, RLControlledRandomWalk) assert hasattr(sim_agent, "_rl_agent") and isinstance( sim_agent._rl_agent, TFAgent) assert hasattr(sim_agent, "_policy") and isinstance( sim_agent._policy, tf_policy.Base) assert hasattr(sim_agent, "_is_eval_policy") and isinstance( sim_agent._is_eval_policy, bool) assert hasattr(sim_agent, "env") and isinstance(sim_agent.env, ControlledRandomWalk) assert hasattr(sim_agent, "buffer_processing_matrix") and isinstance( sim_agent.buffer_processing_matrix, snc_types.BufferMatrix) assert hasattr(sim_agent, "constituency_matrix") and isinstance( sim_agent.constituency_matrix, snc_types.ConstituencyMatrix) assert hasattr(sim_agent, "demand_rate") and isinstance( sim_agent.demand_rate, np.ndarray) assert hasattr(sim_agent, "list_boundary_constraint_matrices") and isinstance( sim_agent.list_boundary_constraint_matrices, list) assert hasattr(sim_agent, "name") and isinstance(sim_agent.name, str)
def load_rl_agent( env: ControlledRandomWalk, rl_algorithm: str, load_path: str, discount_factor: float = 0.99, agent_params: Optional[Dict[str, Any]] = None) -> RLSimulationAgent: """ Instantiates an RL agent in the RLSimulationAgent interface for compatibility and loads the weights from training into it. :param env: The controlled random walk environment for which the agent is required. :param rl_algorithm: The name of the RL algorithm used to train the agent. :param load_path: Path to a directory where TensorFlow checkpoints have been saved (i.e. where the model's weights are saved). :param discount_factor: A scalar discount factor to pass to the agent. :param agent_params: A dictionary of possible overrides for the default TF-Agents agent set up. :return: An RL agent initialised with saved weights ready for evaluation. """ # Lazy import of TensorFlow as if no RL agent is run then it isn't needed. import tensorflow as tf # Attain a TensorFlow compatible version of the environment. # We need a TensorFlow environment to initialise the agent correctly. # First determine whether or not to normalise observations, PPO has its own normalisation so we # only normalise for reinforce agents or PPO agents where normalisation is turned off. normalise_obs = rl_algorithm == 'reinforce' or \ (rl_algorithm == 'ppo' and not agent_params.get('normalize_observations', True)) tf_env, _ = rl_env.rl_env_from_snc_env( env, discount_factor, normalise_observations=normalise_obs) # Set up an enumeration of functions which build agents to allow for extending to new agents. # Pick out the correct RL agent from those we have implemented. if rl_algorithm.lower() == 'reinforce': agent = create_reinforce_agent(tf_env, gamma=discount_factor, agent_params=agent_params) elif rl_algorithm.lower() == 'ppo': agent = create_ppo_agent(tf_env, gamma=discount_factor, agent_params=agent_params) else: raise NotImplementedError( "An agent using the RL algorithm requested is not yet implemented") # Initialise the agent and load in parameters from the most recent save. # Note that this can be adjusted to load in weights from any point in training (so long as they # have been saved). agent.initialize() restorer = tf.train.Checkpoint(agent=agent) restore_manager = tf.train.CheckpointManager(restorer, directory=load_path, max_to_keep=20) restorer.listed = agent.trainable_variables restoration = restorer.restore(restore_manager.latest_checkpoint) restoration.run_restore_ops() # Check that the weights have been loaded and that the model from which the weights were saved # matches the model which they are being loaded into. restoration.assert_nontrivial_match() restoration.assert_existing_objects_matched() # We name the agent in line with the checkpoint used to restore the weights. This aids in # identifying which experiment run is being looked at from log files. agent_name = f"RLSimulationAgent - {restore_manager.latest_checkpoint}" # Finally wrap the agent for compatibility with the SNC simulator. simulation_agent = RLSimulationAgent(env, agent, normalise_obs, name=agent_name) return simulation_agent