Пример #1
0
def test_rl_env_from_snc_env_action_space_dims_simple():
    """
    Tests the formation and stability of the action space dimensions of the environment through
    the RL environment pipeline in a simple setting.
    """
    # Set up the environment parameters.
    cost_per_buffer = np.ones((1, 1))
    initial_state = (0,)
    capacity = np.ones((1, 1)) * np.inf
    demand_rate_val = 0.7
    job_conservation_flag = True
    seed = 72

    demand_rate = np.array([demand_rate_val])[:, None]
    buffer_processing_matrix = - np.ones((1, 1))
    constituency_matrix = np.ones((1, 1))
    list_boundary_constraint_matrices = [constituency_matrix]

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    assert job_generator.routes == {}
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 list_boundary_constraint_matrices)

    _, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True)
    assert action_space_dims == action_space_dims_tf
    assert len(env.action_vectors) == sum(action_space_dims)
Пример #2
0
def test_rl_env_from_snc_env_action_space_dims_multiple_resource_sets():
    """
    Tests the formation and stability of the action space dimensions of the environment through
    the RL environment pipeline in a more complex setting.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name, job_gen_seed=10).env
    rl_env, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True)
    assert action_space_dims == action_space_dims_tf
    assert len(rl_env.action_vectors) == sum(action_space_dims)
def test_rl_simulation_agent_serialisation():
    """
    Test the custom serialisation of the agent used when saving the state of the SNC simulator.
    The customised serialisation was required due to the inability to serialise TensorFlow objects.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain the dictionary representation of the agent and test that all the attributes expected
    # are present.
    serialised_agent = sim_agent.to_serializable()
    assert all(attr in serialised_agent for attr in [
        "_rl_env", "_rl_agent", "_policy", "_is_eval_policy", "env",
        "buffer_processing_matrix", "constituency_matrix", "demand_rate",
        "list_boundary_constraint_matrices", "name"
    ])
    # Ensure that the dictionary representation is compatible with the json module and the chosen
    # encoder.
    json_string = json.dumps(serialised_agent,
                             cls=NumpyEncoder,
                             indent=4,
                             sort_keys=True)
    assert bool(json_string)
def get_environment(env_name: str,
                    agent_name: str,
                    episode_len_to_min_drain_time_ratio: float,
                    terminal_discount_factor: float = 0.7,
                    action_repetitions: int = 1,
                    parallel_environments: int = 8,
                    env_overload_params: Optional[Dict] = None,
                    agent_params: Optional[Dict] = None,
                    seed: Optional[int] = None) \
        -> Tuple[TFPyEnvironment, float, float, int, Tuple[int, ...]]:
    """
    Builds and initialises a TensorFlow environment implementation of the Single Server Queue.

    :param env_name: The name of the scenario to load. Must be in the list of implemented scenarios.
    :param agent_name: The name of the RL agent the environment is to be set up for.
    :param episode_len_to_min_drain_time_ratio: Maximum number of time steps per episode as a
        proportion of the minimal draining time.
    :param terminal_discount_factor: The discount applied to the final time step from which a
        per-step discount factor is calculated.
    :param action_repetitions: Number of time steps each selected action is repeated for.
    :param parallel_environments: Number of environments to run in parallel.
    :param env_overload_params: Dictionary of parameters to override the scenario defaults.
    :param agent_params: Optional dictionary of agent parameters the environment can be adapted for.
    :param seed: Random seed used to initialise the environment.
    :return: The environment wrapped and ready for TensorFlow Agents.
    """
    # Handle some default argument clean up.
    if env_overload_params is None:
        env_overload_params = {}

    env = scenarios.load_scenario(env_name, seed, env_overload_params).env

    if np.all(env.state_initialiser.initial_state == 0):
        env.max_episode_length = 450
    else:
        if env.state_initialiser.initial_state.ndim == 1:
            initial_state = env.state_initialiser.initial_state.reshape((-1, 1))
        else:
            initial_state = env.state_initialiser.initial_state
        minimal_draining_time = compute_minimal_draining_time_from_env_cvxpy(initial_state, env)
        env.max_episode_length = int(episode_len_to_min_drain_time_ratio * minimal_draining_time)
    discount_factor = np.exp(np.log(terminal_discount_factor) / env.max_episode_length)
    load = np.max(compute_load_workload_matrix(env).load)
    max_ep_len = env.max_episode_length

    # Allow toggling of observation normalisation in the environment.
    # The typical behaviour for PPO is that PPO normalises observations internally as necessary so
    # normalisation in the environment is not necessary.
    if agent_name == 'ppo' and agent_params.get('normalize_observations', True):
        normalise_obs_in_env = False
    else:
        normalise_obs_in_env = True

    # Wrap and parallelise environment for tf agents.
    tf_env, action_dims = rl_env_from_snc_env(env,
                                              discount_factor,
                                              action_repetitions,
                                              parallel_environments,
                                              normalise_observations=normalise_obs_in_env)
    return tf_env, discount_factor, load, max_ep_len, action_dims
Пример #5
0
def test_bellman_pets_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    bellman_pets_agent = create_bellman_pets_agent(
        env=tf_env,
        reward_model_class=CRWRewardModel,
        initial_state_distribution_model_class=CRWInitialStateModel,
    )

    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(bellman_pets_agent.action_spec, tuple)
    assert len(bellman_pets_agent.action_spec) == 2
    assert isinstance(bellman_pets_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(bellman_pets_agent.action_spec[1], BoundedTensorSpec)
    assert bellman_pets_agent.action_spec[0].shape == tf.TensorShape((1, 3))
    assert bellman_pets_agent.action_spec[1].shape == tf.TensorShape((1, 3))
    assert bellman_pets_agent.name == "PETS_Agent"
    assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
Пример #6
0
def test_ppo_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, action_dims = rl_env_from_snc_env(load_scenario(
        env_name, job_gen_seed=10)[1],
                                              discount_factor=0.99,
                                              normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = ppo_agent.collect_policy.action(time_step)
        # Ensure that the action is one-hot as expected
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)
        # Play the action out in the environment.
        tf_env.step(action_step.action)
Пример #7
0
def test_reinforce_agent_learning(env_name):
    """
    Extension of the test for an agent playing in the environment to include training.
    Note: This does not test that training improves the policy. It simply tests that the training
    loop runs effectively.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Set up a training step counter.
    global_step = tf.compat.v1.train.get_or_create_global_step()
    # Instantiate a REINFORCE agent
    reinforce_agent = create_reinforce_agent(tf_env,
                                             training_step_counter=global_step)

    # Instantiate a replay buffer.
    replay_buffer = TFUniformReplayBuffer(
        data_spec=reinforce_agent.collect_data_spec,
        batch_size=tf_env.batch_size,
        max_length=1000)

    # Initialise the action network weights etc.
    reinforce_agent.initialize()

    # Use a driver to handle data collection for the agent. This handles a lot of the backend
    # TensorFlow set up and solves previous errors with episodes of differing lengths.
    collect_driver = DynamicEpisodeDriver(tf_env,
                                          reinforce_agent.collect_policy,
                                          observers=[replay_buffer.add_batch],
                                          num_episodes=2)

    # Get the initial states of the agent and environment before training.
    time_step = tf_env.reset()
    policy_state = reinforce_agent.collect_policy.get_initial_state(
        tf_env.batch_size)

    # Take a copy of the variables in order to ensure that training does lead to parameter changes.
    initial_vars = deepcopy(reinforce_agent.trainable_variables)
    assert len(initial_vars) > 0, "Agent has no trainable variables."

    # Set up a minimal training loop to simply test training mechanics work.
    for _ in range(5):
        # Collect experience.
        time_step, policy_state = collect_driver.run(time_step=time_step,
                                                     policy_state=policy_state)
        # Now the replay buffer should have data in it so we can collect the data and train the
        # agent.
        experience = replay_buffer.gather_all()
        reinforce_agent.train(experience)
        # Clear the replay buffer and return to play.
        replay_buffer.clear()

    # Check that training has had some effect
    for v1, v2 in zip(initial_vars, reinforce_agent.trainable_variables):
        assert not np.allclose(v1.numpy(), v2.numpy())
Пример #8
0
def test_reinforce_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'

    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, tuple)
    assert len(reinforce_agent.action_spec) == 2
    assert isinstance(reinforce_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(reinforce_agent.action_spec[1], BoundedTensorSpec)
    assert reinforce_agent.action_spec[0].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.action_spec[1].shape == tf.TensorShape((1, 3))
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_ppo_agent_init_with_multiple_resource_sets():
    """
    Tests agent set up and initialisation with multiple action subspaces (multiple resource sets).
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()
    # Validate initialisation by checking some properties of the initialised agent.
    assert isinstance(ppo_agent.action_spec, tuple)
    assert len(ppo_agent.action_spec) == 2
    assert isinstance(ppo_agent.action_spec[0], BoundedTensorSpec)
    assert isinstance(ppo_agent.action_spec[1], BoundedTensorSpec)
    assert ppo_agent.action_spec[0].shape == tf.TensorShape((3))
    assert ppo_agent.action_spec[1].shape == tf.TensorShape((3))
    assert ppo_agent.name == "PPO_Agent"
    assert ppo_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_normalise_obs_usage_with_normalisation():
    """Ensure that the _normalise_obs property of RLSimulationAgent is used correctly."""
    # Set up the agent as before.
    seed = 72
    state = np.array([100, 100, 100, 100])
    env = load_scenario("klimov_model",
                        job_gen_seed=seed,
                        override_env_params={
                            "initial_state": state
                        }).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=True)
    ppo_agent = MagicMock()
    ppo_agent.discount_factor = 0.99
    ppo_agent._gamma = 0.99
    policy = MagicMock()
    ppo_agent.collect_policy = policy
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True)
    ppo_sim_agent._rl_env.preprocess_action = MagicMock()
    ppo_sim_agent.map_state_to_actions(state)
    expected_timestep = TimeStep(step_type=StepType(0),
                                 reward=None,
                                 discount=0.99,
                                 observation=state.reshape(1, -1) /
                                 state.sum())
    assert policy.action.call_count == 1
    call_timestep = policy.action.call_args[0][0]
    assert (call_timestep.observation == expected_timestep.observation).all()
Пример #11
0
def test_reinforce_agent_play(env_name):
    """
    Extension of the agent set up and initialisation test to include playing episodes.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()

    # Reset the environment
    tf_env.reset()
    # Play 5 time steps in the environment.
    for _ in range(5):
        # Since we do not have the state stored at this point we capture it from the environment
        # fresh each time step as a TimeStep object (a named tuple).
        time_step = tf_env.current_time_step()
        # Attain our agent's action.
        action_step = reinforce_agent.collect_policy.action(time_step)
        if isinstance(action_step.action, tuple):
            action = tf.concat(action_step.action, axis=-1)
        else:
            action = action_step.action

        # Ensure that the action is binary as expected.
        assert snc.is_binary(action)

        # Play the action out in the environment.
        tf_env.step(action_step.action)
Пример #12
0
def test_rl_env_normalise_obs_property():
    """
    Ensure that the normalise_obs property of RLControlledRandomWalk is set and updated correctly.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'double_reentrant_line_shared_res_homogeneous_cost'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name, job_gen_seed=10).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    assert rl_env.normalise_obs is True
    rl_env.normalise_obs = False
    assert rl_env.normalise_obs is False
def test_rl_simulation_agent_string_representation():
    """
    Tests that the string representation of the simulation agent is as expected.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)
    # Ensure that the string representation of the agent contains the instance name at the end.
    assert str(sim_agent)[-len(sim_agent.name):] == sim_agent.name
def test_rl_simulation_agent_discount_factor_ppo():
    """
    Tests that the discount factor is passed from a PPO agent to an RLSimulationAgent correctly.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=False)
    ppo_agent = create_ppo_agent(rl_env, gamma=0.90)
    ppo_agent.initialize()
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False)
    assert ppo_sim_agent.discount_factor == 0.90
def test_rl_simulation_agent_normalise_obs_property():
    """Ensure that the _normalise_obs property of RLSimulationAgent is set correctly."""
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env,
                                    discount_factor=0.99,
                                    normalise_observations=False)
    ppo_agent = create_ppo_agent(rl_env, gamma=0.90)
    ppo_agent.initialize()
    del rl_env
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False)
    assert ppo_sim_agent._normalise_obs is False
    ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True)
    assert ppo_sim_agent._normalise_obs is True
def test_rl_simulation_agent_discount_factor_reinforce():
    """
    Tests that the discount factor is passed from a REINFORCE agent to an RLSimulationAgent
    correctly.
    """
    # Set up the agent as before.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    reinforce_agent = create_reinforce_agent(rl_env, gamma=0.97)
    reinforce_agent.initialize()
    del rl_env
    reinforce_sim_agent = RLSimulationAgent(env,
                                            reinforce_agent,
                                            normalise_obs=True)
    assert reinforce_sim_agent.discount_factor == 0.97
Пример #17
0
def test_rl_env_normalise_obs_action():
    """
    Ensure that the normalise_obs property of RLControlledRandomWalk is used correctly.
    """
    # Set the environment name for this case as the asserts are difficult to make as variables.
    env_name = 'klimov_model'
    # Set up the environment parameters.
    # Environment parameters do not affect the test result here.
    env = load_scenario(env_name,
                        job_gen_seed=10,
                        override_env_params={"initial_state": [100, 100, 100, 100]}).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False)
    assert rl_env.normalise_obs is True
    s0_normalised = rl_env.reset()
    assert s0_normalised.tolist() == [0.25, 0.25, 0.25, 0.25]
    rl_env.normalise_obs = False
    s0_unnormalised = rl_env.reset()
    assert s0_unnormalised.tolist() == [100, 100, 100, 100]
Пример #18
0
def test_ppo_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PPO agent for the environment.
    ppo_agent = create_ppo_agent(tf_env, num_epochs=10)
    ppo_agent.initialize()
    # Validate initialisation by checking relevant properties of the initalised agent.
    assert isinstance(ppo_agent.action_spec, BoundedTensorSpec)
    assert ppo_agent.action_spec.shape == expected_action_spec_shape
    assert ppo_agent.name == "PPO_Agent"
    assert ppo_agent.time_step_spec == tf_env.time_step_spec()
Пример #19
0
def test_reinforce_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(
        env_name,
        job_gen_seed=10,
        override_env_params={'max_episode_length': 25})[1],
                                    discount_factor=0.99)

    # Instantiate and initialise a REINFORCE agent for the environment.
    reinforce_agent = create_reinforce_agent(tf_env)
    reinforce_agent.initialize()
    # Validate initialisation by checking some properties of the initalised agent.
    assert isinstance(reinforce_agent.action_spec, BoundedTensorSpec)
    assert reinforce_agent.action_spec.shape == expected_action_spec_shape
    assert reinforce_agent.name == "reinforce_agent"
    assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_action_mapping():
    """
    Tests that the RL Simulation Agent with the SNC interface is able to receive states and produce
    actions both of the expected type and form.
    """
    # Set up the agent as above
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Attain a state and form an action.
    state = env.reset()
    action = sim_agent.map_state_to_actions(state)
    # Ensure that the action is as expected first with a formal assertion and then by passing it
    # to the environment.
    assert isinstance(action, snc_types.ActionProcess)
    env.step(action)
def test_rl_simulation_agent_init():
    """
    Test the intitalisation of an RL agent with an interface compatible with the SNC simulator.
    """
    # To instantiate an agent from tf_agents we need an RL environment which itself requires a
    # standard SNC environment. We therefore set up an SNC environment and then wrap it for the
    # TensorFlow agent. This TF environment is later deleted since it is no longer required and to
    # ensure that it is not used inadvertently.
    seed = 72
    env = load_scenario("single_server_queue", job_gen_seed=seed).env
    rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99)
    rl_agent = create_reinforce_agent(rl_env)
    rl_agent.initialize()
    del rl_env
    # Wrapping the agent for the SNC simulator using information from the environment and the agent.
    sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True)

    # Test that the agent has all of the attributed we want and that they are of the right type.
    assert hasattr(sim_agent, "_rl_env") and isinstance(
        sim_agent._rl_env, RLControlledRandomWalk)
    assert hasattr(sim_agent, "_rl_agent") and isinstance(
        sim_agent._rl_agent, TFAgent)
    assert hasattr(sim_agent, "_policy") and isinstance(
        sim_agent._policy, tf_policy.Base)
    assert hasattr(sim_agent, "_is_eval_policy") and isinstance(
        sim_agent._is_eval_policy, bool)
    assert hasattr(sim_agent, "env") and isinstance(sim_agent.env,
                                                    ControlledRandomWalk)
    assert hasattr(sim_agent, "buffer_processing_matrix") and isinstance(
        sim_agent.buffer_processing_matrix, snc_types.BufferMatrix)
    assert hasattr(sim_agent, "constituency_matrix") and isinstance(
        sim_agent.constituency_matrix, snc_types.ConstituencyMatrix)
    assert hasattr(sim_agent, "demand_rate") and isinstance(
        sim_agent.demand_rate, np.ndarray)
    assert hasattr(sim_agent,
                   "list_boundary_constraint_matrices") and isinstance(
                       sim_agent.list_boundary_constraint_matrices, list)
    assert hasattr(sim_agent, "name") and isinstance(sim_agent.name, str)
Пример #22
0
def test_bellman_pets_agent_init(env_name, expected_action_spec_shape):
    """
    Tests agent set up and initialisation.
    """
    # Set up environment using default parameters.
    # Environment parameters do not affect the test result here.
    tf_env, _ = rl_env_from_snc_env(load_scenario(env_name,
                                                  job_gen_seed=10)[1],
                                    discount_factor=0.99,
                                    normalise_observations=False)

    # Instantiate and initialise a PETS agent for the environment.
    bellman_pets_agent = create_bellman_pets_agent(
        env=tf_env,
        reward_model_class=CRWRewardModel,
        initial_state_distribution_model_class=CRWInitialStateModel,
    )

    # Validate initialisation by checking relevant properties of the initalised agent.
    assert isinstance(bellman_pets_agent.action_spec, BoundedTensorSpec)
    assert bellman_pets_agent.action_spec.shape == expected_action_spec_shape
    assert bellman_pets_agent.name == "PETS_Agent"
    assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
    def __init__(self,
                 env: ControlledRandomWalk,
                 agent: TFAgent,
                 normalise_obs: bool,
                 name: str = "RLSimulationAgent",
                 evaluation: bool = False):
        """
        Sets up the simulation agent from an environment and a standard TensorFlow Agent.

        Note: The environment is not used for simulation, simply for interpreting RL Agent actions.

        :param env: The SNC environment for the simulation.
        :param agent: The fully initialised (and trained) TensorFlow agent.
        :param name: Agent identifier.
        :param evaluation: Determines whether to use the greedy policy or not. Defaults to True i.e.
            use greedy policy.
        """
        # Attain an RLControlledRandomWalk instance from the ControlledRandomWalk provided.
        # This is used to interpret actions from an RL Agent.
        self.discount_factor = agent._discount_factor if isinstance(agent, PPOAgent) \
            else agent._gamma
        self._rl_env, _ = rl_env_from_snc_env(
            env,
            discount_factor=self.discount_factor,
            for_tf_agent=False,
            normalise_observations=normalise_obs)

        # Set up private properties required for map_state_to_actions
        self._rl_agent = agent
        self._is_eval_policy = evaluation
        self._normalise_obs = normalise_obs
        self._policy = self._rl_agent.policy if self._is_eval_policy \
            else self._rl_agent.collect_policy

        # Call the standard initialiser.
        super().__init__(env, name)
Пример #24
0
def load_rl_agent(
        env: ControlledRandomWalk,
        rl_algorithm: str,
        load_path: str,
        discount_factor: float = 0.99,
        agent_params: Optional[Dict[str, Any]] = None) -> RLSimulationAgent:
    """
    Instantiates an RL agent in the RLSimulationAgent interface for compatibility and loads the
    weights from training into it.

    :param env: The controlled random walk environment for which the agent is required.
    :param rl_algorithm: The name of the RL algorithm used to train the agent.
    :param load_path: Path to a directory where TensorFlow checkpoints have been saved (i.e. where
        the model's weights are saved).
    :param discount_factor: A scalar discount factor to pass to the agent.
    :param agent_params: A dictionary of possible overrides for the default TF-Agents agent set up.
    :return: An RL agent initialised with saved weights ready for evaluation.
    """
    # Lazy import of TensorFlow as if no RL agent is run then it isn't needed.
    import tensorflow as tf

    # Attain a TensorFlow compatible version of the environment.
    # We need a TensorFlow environment to initialise the agent correctly.
    # First determine whether or not to normalise observations, PPO has its own normalisation so we
    # only normalise for reinforce agents or PPO agents where normalisation is turned off.
    normalise_obs = rl_algorithm == 'reinforce' or \
                    (rl_algorithm == 'ppo' and not agent_params.get('normalize_observations', True))
    tf_env, _ = rl_env.rl_env_from_snc_env(
        env, discount_factor, normalise_observations=normalise_obs)

    # Set up an enumeration of functions which build agents to allow for extending to new agents.
    # Pick out the correct RL agent from those we have implemented.
    if rl_algorithm.lower() == 'reinforce':
        agent = create_reinforce_agent(tf_env,
                                       gamma=discount_factor,
                                       agent_params=agent_params)
    elif rl_algorithm.lower() == 'ppo':
        agent = create_ppo_agent(tf_env,
                                 gamma=discount_factor,
                                 agent_params=agent_params)
    else:
        raise NotImplementedError(
            "An agent using the RL algorithm requested is not yet implemented")

    # Initialise the agent and load in parameters from the most recent save.
    # Note that this can be adjusted to load in weights from any point in training (so long as they
    # have been saved).
    agent.initialize()
    restorer = tf.train.Checkpoint(agent=agent)
    restore_manager = tf.train.CheckpointManager(restorer,
                                                 directory=load_path,
                                                 max_to_keep=20)
    restorer.listed = agent.trainable_variables
    restoration = restorer.restore(restore_manager.latest_checkpoint)
    restoration.run_restore_ops()
    # Check that the weights have been loaded and that the model from which the weights were saved
    # matches the model which they are being loaded into.
    restoration.assert_nontrivial_match()
    restoration.assert_existing_objects_matched()

    # We name the agent in line with the checkpoint used to restore the weights. This aids in
    # identifying which experiment run is being looked at from log files.
    agent_name = f"RLSimulationAgent - {restore_manager.latest_checkpoint}"

    # Finally wrap the agent for compatibility with the SNC simulator.
    simulation_agent = RLSimulationAgent(env,
                                         agent,
                                         normalise_obs,
                                         name=agent_name)
    return simulation_agent