def test_rl_simulation_agent_normalise_obs_usage_with_normalisation(): """Ensure that the _normalise_obs property of RLSimulationAgent is used correctly.""" # Set up the agent as before. seed = 72 state = np.array([100, 100, 100, 100]) env = load_scenario("klimov_model", job_gen_seed=seed, override_env_params={ "initial_state": state }).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, normalise_observations=True) ppo_agent = MagicMock() ppo_agent.discount_factor = 0.99 ppo_agent._gamma = 0.99 policy = MagicMock() ppo_agent.collect_policy = policy del rl_env ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True) ppo_sim_agent._rl_env.preprocess_action = MagicMock() ppo_sim_agent.map_state_to_actions(state) expected_timestep = TimeStep(step_type=StepType(0), reward=None, discount=0.99, observation=state.reshape(1, -1) / state.sum()) assert policy.action.call_count == 1 call_timestep = policy.action.call_args[0][0] assert (call_timestep.observation == expected_timestep.observation).all()
def get_environment(env_name: str, agent_name: str, episode_len_to_min_drain_time_ratio: float, terminal_discount_factor: float = 0.7, action_repetitions: int = 1, parallel_environments: int = 8, env_overload_params: Optional[Dict] = None, agent_params: Optional[Dict] = None, seed: Optional[int] = None) \ -> Tuple[TFPyEnvironment, float, float, int, Tuple[int, ...]]: """ Builds and initialises a TensorFlow environment implementation of the Single Server Queue. :param env_name: The name of the scenario to load. Must be in the list of implemented scenarios. :param agent_name: The name of the RL agent the environment is to be set up for. :param episode_len_to_min_drain_time_ratio: Maximum number of time steps per episode as a proportion of the minimal draining time. :param terminal_discount_factor: The discount applied to the final time step from which a per-step discount factor is calculated. :param action_repetitions: Number of time steps each selected action is repeated for. :param parallel_environments: Number of environments to run in parallel. :param env_overload_params: Dictionary of parameters to override the scenario defaults. :param agent_params: Optional dictionary of agent parameters the environment can be adapted for. :param seed: Random seed used to initialise the environment. :return: The environment wrapped and ready for TensorFlow Agents. """ # Handle some default argument clean up. if env_overload_params is None: env_overload_params = {} env = scenarios.load_scenario(env_name, seed, env_overload_params).env if np.all(env.state_initialiser.initial_state == 0): env.max_episode_length = 450 else: if env.state_initialiser.initial_state.ndim == 1: initial_state = env.state_initialiser.initial_state.reshape((-1, 1)) else: initial_state = env.state_initialiser.initial_state minimal_draining_time = compute_minimal_draining_time_from_env_cvxpy(initial_state, env) env.max_episode_length = int(episode_len_to_min_drain_time_ratio * minimal_draining_time) discount_factor = np.exp(np.log(terminal_discount_factor) / env.max_episode_length) load = np.max(compute_load_workload_matrix(env).load) max_ep_len = env.max_episode_length # Allow toggling of observation normalisation in the environment. # The typical behaviour for PPO is that PPO normalises observations internally as necessary so # normalisation in the environment is not necessary. if agent_name == 'ppo' and agent_params.get('normalize_observations', True): normalise_obs_in_env = False else: normalise_obs_in_env = True # Wrap and parallelise environment for tf agents. tf_env, action_dims = rl_env_from_snc_env(env, discount_factor, action_repetitions, parallel_environments, normalise_observations=normalise_obs_in_env) return tf_env, discount_factor, load, max_ep_len, action_dims
def test_reinforce_agent_play(env_name): """ Extension of the agent set up and initialisation test to include playing episodes. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Reset the environment tf_env.reset() # Play 5 time steps in the environment. for _ in range(5): # Since we do not have the state stored at this point we capture it from the environment # fresh each time step as a TimeStep object (a named tuple). time_step = tf_env.current_time_step() # Attain our agent's action. action_step = reinforce_agent.collect_policy.action(time_step) if isinstance(action_step.action, tuple): action = tf.concat(action_step.action, axis=-1) else: action = action_step.action # Ensure that the action is binary as expected. assert snc.is_binary(action) # Play the action out in the environment. tf_env.step(action_step.action)
def test_ppo_agent_init_with_multiple_resource_sets(): """ Tests agent set up and initialisation with multiple action subspaces (multiple resource sets). """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up the environment parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario(env_name, job_gen_seed=10)[1], discount_factor=0.99, normalise_observations=False) # Instantiate and initialise a PPO agent for the environment. ppo_agent = create_ppo_agent(tf_env, num_epochs=10) ppo_agent.initialize() # Validate initialisation by checking some properties of the initialised agent. assert isinstance(ppo_agent.action_spec, tuple) assert len(ppo_agent.action_spec) == 2 assert isinstance(ppo_agent.action_spec[0], BoundedTensorSpec) assert isinstance(ppo_agent.action_spec[1], BoundedTensorSpec) assert ppo_agent.action_spec[0].shape == tf.TensorShape((3)) assert ppo_agent.action_spec[1].shape == tf.TensorShape((3)) assert ppo_agent.name == "PPO_Agent" assert ppo_agent.time_step_spec == tf_env.time_step_spec()
def test_ppo_agent_play(env_name): """ Extension of the agent set up and initialisation test to include playing episodes. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, action_dims = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10)[1], discount_factor=0.99, normalise_observations=False) # Instantiate and initialise a PPO agent for the environment. ppo_agent = create_ppo_agent(tf_env, num_epochs=10) ppo_agent.initialize() # Reset the environment tf_env.reset() # Play 5 time steps in the environment. for _ in range(5): # Since we do not have the state stored at this point we capture it from the environment # fresh each time step as a TimeStep object (a named tuple). time_step = tf_env.current_time_step() # Attain our agent's action. action_step = ppo_agent.collect_policy.action(time_step) # Ensure that the action is one-hot as expected if isinstance(action_step.action, tuple): action = tf.concat(action_step.action, axis=-1) else: action = action_step.action # Ensure that the action is binary as expected. assert snc.is_binary(action) # Play the action out in the environment. tf_env.step(action_step.action)
def test_reinforce_agent_learning(env_name): """ Extension of the test for an agent playing in the environment to include training. Note: This does not test that training improves the policy. It simply tests that the training loop runs effectively. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Set up a training step counter. global_step = tf.compat.v1.train.get_or_create_global_step() # Instantiate a REINFORCE agent reinforce_agent = create_reinforce_agent(tf_env, training_step_counter=global_step) # Instantiate a replay buffer. replay_buffer = TFUniformReplayBuffer( data_spec=reinforce_agent.collect_data_spec, batch_size=tf_env.batch_size, max_length=1000) # Initialise the action network weights etc. reinforce_agent.initialize() # Use a driver to handle data collection for the agent. This handles a lot of the backend # TensorFlow set up and solves previous errors with episodes of differing lengths. collect_driver = DynamicEpisodeDriver(tf_env, reinforce_agent.collect_policy, observers=[replay_buffer.add_batch], num_episodes=2) # Get the initial states of the agent and environment before training. time_step = tf_env.reset() policy_state = reinforce_agent.collect_policy.get_initial_state( tf_env.batch_size) # Take a copy of the variables in order to ensure that training does lead to parameter changes. initial_vars = deepcopy(reinforce_agent.trainable_variables) assert len(initial_vars) > 0, "Agent has no trainable variables." # Set up a minimal training loop to simply test training mechanics work. for _ in range(5): # Collect experience. time_step, policy_state = collect_driver.run(time_step=time_step, policy_state=policy_state) # Now the replay buffer should have data in it so we can collect the data and train the # agent. experience = replay_buffer.gather_all() reinforce_agent.train(experience) # Clear the replay buffer and return to play. replay_buffer.clear() # Check that training has had some effect for v1, v2 in zip(initial_vars, reinforce_agent.trainable_variables): assert not np.allclose(v1.numpy(), v2.numpy())
def test_rl_simulation_agent_serialisation(): """ Test the custom serialisation of the agent used when saving the state of the SNC simulator. The customised serialisation was required due to the inability to serialise TensorFlow objects. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Attain the dictionary representation of the agent and test that all the attributes expected # are present. serialised_agent = sim_agent.to_serializable() assert all(attr in serialised_agent for attr in [ "_rl_env", "_rl_agent", "_policy", "_is_eval_policy", "env", "buffer_processing_matrix", "constituency_matrix", "demand_rate", "list_boundary_constraint_matrices", "name" ]) # Ensure that the dictionary representation is compatible with the json module and the chosen # encoder. json_string = json.dumps(serialised_agent, cls=NumpyEncoder, indent=4, sort_keys=True) assert bool(json_string)
def test_reinforce_agent_init_with_multiple_resource_sets(): """ Tests agent set up and initialisation with multiple action subspaces (multiple resource sets). """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent for the environment. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Validate initialisation by checking some properties of the initalised agent. assert isinstance(reinforce_agent.action_spec, tuple) assert len(reinforce_agent.action_spec) == 2 assert isinstance(reinforce_agent.action_spec[0], BoundedTensorSpec) assert isinstance(reinforce_agent.action_spec[1], BoundedTensorSpec) assert reinforce_agent.action_spec[0].shape == tf.TensorShape((1, 3)) assert reinforce_agent.action_spec[1].shape == tf.TensorShape((1, 3)) assert reinforce_agent.name == "reinforce_agent" assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_bellman_pets_agent_init_with_multiple_resource_sets(): """ Tests agent set up and initialisation with multiple action subspaces (multiple resource sets). """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up the environment parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario(env_name, job_gen_seed=10)[1], discount_factor=0.99, normalise_observations=False) # Instantiate and initialise a PPO agent for the environment. bellman_pets_agent = create_bellman_pets_agent( env=tf_env, reward_model_class=CRWRewardModel, initial_state_distribution_model_class=CRWInitialStateModel, ) # Validate initialisation by checking some properties of the initalised agent. assert isinstance(bellman_pets_agent.action_spec, tuple) assert len(bellman_pets_agent.action_spec) == 2 assert isinstance(bellman_pets_agent.action_spec[0], BoundedTensorSpec) assert isinstance(bellman_pets_agent.action_spec[1], BoundedTensorSpec) assert bellman_pets_agent.action_spec[0].shape == tf.TensorShape((1, 3)) assert bellman_pets_agent.action_spec[1].shape == tf.TensorShape((1, 3)) assert bellman_pets_agent.name == "PETS_Agent" assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_env_normalise_obs_property(): """ Ensure that the normalise_obs property of RLControlledRandomWalk is set and updated correctly. """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up the environment parameters. # Environment parameters do not affect the test result here. env = load_scenario(env_name, job_gen_seed=10).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False) assert rl_env.normalise_obs is True rl_env.normalise_obs = False assert rl_env.normalise_obs is False
def test_rl_simulation_agent_string_representation(): """ Tests that the string representation of the simulation agent is as expected. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Ensure that the string representation of the agent contains the instance name at the end. assert str(sim_agent)[-len(sim_agent.name):] == sim_agent.name
def test_rl_env_from_snc_env_action_space_dims_multiple_resource_sets(): """ Tests the formation and stability of the action space dimensions of the environment through the RL environment pipeline in a more complex setting. """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'double_reentrant_line_shared_res_homogeneous_cost' # Set up the environment parameters. # Environment parameters do not affect the test result here. env = load_scenario(env_name, job_gen_seed=10).env rl_env, action_space_dims = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False) _, action_space_dims_tf = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=True) assert action_space_dims == action_space_dims_tf assert len(rl_env.action_vectors) == sum(action_space_dims)
def test_rl_simulation_agent_discount_factor_ppo(): """ Tests that the discount factor is passed from a PPO agent to an RLSimulationAgent correctly. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, normalise_observations=False) ppo_agent = create_ppo_agent(rl_env, gamma=0.90) ppo_agent.initialize() del rl_env ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False) assert ppo_sim_agent.discount_factor == 0.90
def test_rl_simulation_agent_normalise_obs_property(): """Ensure that the _normalise_obs property of RLSimulationAgent is set correctly.""" # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, normalise_observations=False) ppo_agent = create_ppo_agent(rl_env, gamma=0.90) ppo_agent.initialize() del rl_env ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=False) assert ppo_sim_agent._normalise_obs is False ppo_sim_agent = RLSimulationAgent(env, ppo_agent, normalise_obs=True) assert ppo_sim_agent._normalise_obs is True
def test_rl_simulation_agent_discount_factor_reinforce(): """ Tests that the discount factor is passed from a REINFORCE agent to an RLSimulationAgent correctly. """ # Set up the agent as before. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) reinforce_agent = create_reinforce_agent(rl_env, gamma=0.97) reinforce_agent.initialize() del rl_env reinforce_sim_agent = RLSimulationAgent(env, reinforce_agent, normalise_obs=True) assert reinforce_sim_agent.discount_factor == 0.97
def test_rl_env_normalise_obs_action(): """ Ensure that the normalise_obs property of RLControlledRandomWalk is used correctly. """ # Set the environment name for this case as the asserts are difficult to make as variables. env_name = 'klimov_model' # Set up the environment parameters. # Environment parameters do not affect the test result here. env = load_scenario(env_name, job_gen_seed=10, override_env_params={"initial_state": [100, 100, 100, 100]}).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99, for_tf_agent=False) assert rl_env.normalise_obs is True s0_normalised = rl_env.reset() assert s0_normalised.tolist() == [0.25, 0.25, 0.25, 0.25] rl_env.normalise_obs = False s0_unnormalised = rl_env.reset() assert s0_unnormalised.tolist() == [100, 100, 100, 100]
def test_ppo_agent_init(env_name, expected_action_spec_shape): """ Tests agent set up and initialisation. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario(env_name, job_gen_seed=10)[1], discount_factor=0.99, normalise_observations=False) # Instantiate and initialise a PPO agent for the environment. ppo_agent = create_ppo_agent(tf_env, num_epochs=10) ppo_agent.initialize() # Validate initialisation by checking relevant properties of the initalised agent. assert isinstance(ppo_agent.action_spec, BoundedTensorSpec) assert ppo_agent.action_spec.shape == expected_action_spec_shape assert ppo_agent.name == "PPO_Agent" assert ppo_agent.time_step_spec == tf_env.time_step_spec()
def get_simple_link_constrained_model(): cost_per_buffer = np.array([3, 1, 3, 1.5, 3]).reshape(-1, 1) param_overrides = dict(alpha1=4.8, mu12=2., mu13=4., mu25=2., mu32=4.5, mu34=1.8, mu35=2., mu45=1., mu5=7., cost_per_buffer=cost_per_buffer) _, env = scenarios.load_scenario('simple_link_constrained_model', 0, param_overrides) _, workload_mat, nu = workload.compute_load_workload_matrix(env, 6) env.workload_mat = workload_mat env.nu = nu return env
def test_reinforce_agent_init(env_name, expected_action_spec_shape): """ Tests agent set up and initialisation. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario( env_name, job_gen_seed=10, override_env_params={'max_episode_length': 25})[1], discount_factor=0.99) # Instantiate and initialise a REINFORCE agent for the environment. reinforce_agent = create_reinforce_agent(tf_env) reinforce_agent.initialize() # Validate initialisation by checking some properties of the initalised agent. assert isinstance(reinforce_agent.action_spec, BoundedTensorSpec) assert reinforce_agent.action_spec.shape == expected_action_spec_shape assert reinforce_agent.name == "reinforce_agent" assert reinforce_agent.time_step_spec == tf_env.time_step_spec()
def test_rl_simulation_agent_action_mapping(): """ Tests that the RL Simulation Agent with the SNC interface is able to receive states and produce actions both of the expected type and form. """ # Set up the agent as above seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Attain a state and form an action. state = env.reset() action = sim_agent.map_state_to_actions(state) # Ensure that the action is as expected first with a formal assertion and then by passing it # to the environment. assert isinstance(action, snc_types.ActionProcess) env.step(action)
def test_scenario(scenario_name, agent_class): """ Run a brief integration test on a given scenario """ skip_tests = SKIPPED_TESTS + PULL_MODELS + PUSH_PULL_MODELS if scenario_name in skip_tests: pytest.skip() np.random.seed(SEED_NO) _, env = scenarios.load_scenario(scenario_name, SEED_NO) # Update parameters for quick tests. overrides = { "HedgehogHyperParams": { "theta_0": 0.5, "horizon_drain_time_ratio": 0.1, "horizon_mpc_ratio": 0.1, "minimum_horizon": 10 }, "AsymptoticCovarianceParams": { "num_presimulation_steps": 100, "num_batch": 20 } } if scenario_name in MIP_REQUIRED_MODELS: overrides["HedgehogHyperParams"]["mpc_policy_class_name"] = "FeedbackMipFeasibleMpcPolicy" ac_params, wk_params, si_params, po_params, hh_params, si_class, dp_params, name \ = load_agents.get_hedgehog_hyperparams(**overrides) discount_factor = 0.95 if agent_class == BigStepHedgehogAgent: agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params, si_params, po_params, si_class, dp_params, name) elif agent_class == BigStepHedgehogGTOAgent: agent = agent_class(env, discount_factor, wk_params, hh_params, ac_params, po_params, dp_params, name) else: assert False, f"Not recognised agent: {agent_class}" simulator = ps.SncSimulator(env, agent, discount_factor=discount_factor) simulator.run(num_simulation_steps=SIM_STEPS)
def test_rl_simulation_agent_init(): """ Test the intitalisation of an RL agent with an interface compatible with the SNC simulator. """ # To instantiate an agent from tf_agents we need an RL environment which itself requires a # standard SNC environment. We therefore set up an SNC environment and then wrap it for the # TensorFlow agent. This TF environment is later deleted since it is no longer required and to # ensure that it is not used inadvertently. seed = 72 env = load_scenario("single_server_queue", job_gen_seed=seed).env rl_env, _ = rl_env_from_snc_env(env, discount_factor=0.99) rl_agent = create_reinforce_agent(rl_env) rl_agent.initialize() del rl_env # Wrapping the agent for the SNC simulator using information from the environment and the agent. sim_agent = RLSimulationAgent(env, rl_agent, normalise_obs=True) # Test that the agent has all of the attributed we want and that they are of the right type. assert hasattr(sim_agent, "_rl_env") and isinstance( sim_agent._rl_env, RLControlledRandomWalk) assert hasattr(sim_agent, "_rl_agent") and isinstance( sim_agent._rl_agent, TFAgent) assert hasattr(sim_agent, "_policy") and isinstance( sim_agent._policy, tf_policy.Base) assert hasattr(sim_agent, "_is_eval_policy") and isinstance( sim_agent._is_eval_policy, bool) assert hasattr(sim_agent, "env") and isinstance(sim_agent.env, ControlledRandomWalk) assert hasattr(sim_agent, "buffer_processing_matrix") and isinstance( sim_agent.buffer_processing_matrix, snc_types.BufferMatrix) assert hasattr(sim_agent, "constituency_matrix") and isinstance( sim_agent.constituency_matrix, snc_types.ConstituencyMatrix) assert hasattr(sim_agent, "demand_rate") and isinstance( sim_agent.demand_rate, np.ndarray) assert hasattr(sim_agent, "list_boundary_constraint_matrices") and isinstance( sim_agent.list_boundary_constraint_matrices, list) assert hasattr(sim_agent, "name") and isinstance(sim_agent.name, str)
def test_bellman_pets_agent_init(env_name, expected_action_spec_shape): """ Tests agent set up and initialisation. """ # Set up environment using default parameters. # Environment parameters do not affect the test result here. tf_env, _ = rl_env_from_snc_env(load_scenario(env_name, job_gen_seed=10)[1], discount_factor=0.99, normalise_observations=False) # Instantiate and initialise a PETS agent for the environment. bellman_pets_agent = create_bellman_pets_agent( env=tf_env, reward_model_class=CRWRewardModel, initial_state_distribution_model_class=CRWInitialStateModel, ) # Validate initialisation by checking relevant properties of the initalised agent. assert isinstance(bellman_pets_agent.action_spec, BoundedTensorSpec) assert bellman_pets_agent.action_spec.shape == expected_action_spec_shape assert bellman_pets_agent.name == "PETS_Agent" assert bellman_pets_agent.time_step_spec == tf_env.time_step_spec()
def run_validation(arguments: argparse.Namespace) -> Dict[str, str]: """ Run the validation on a particular scenario. :param arguments: Namespace of experiment parameters. """ assert arguments.env_param_overrides['job_gen_seed'] is not None assert arguments.seed is not None # Note that if job_gen_seed was not in env_param_overrides, then at this point we will have: # arguments.env_param_overrides['job_gen_seed'] == arguments.seed. job_gen_seed = arguments.env_param_overrides['job_gen_seed'] global_seed = arguments.seed + 100 agent_seed = arguments.seed + 200 mpc_seed = arguments.seed + 300 np.random.seed(global_seed) print(f"job_gen_seed {job_gen_seed}") print(f"global_seed {global_seed}") print(f"agent_seed {agent_seed}") print(f"mpc_seed {mpc_seed}") save_locations = dict() # Get Scenario _, env = scenarios.load_scenario(arguments.env_name, job_gen_seed, arguments.env_param_overrides) # Initialise an agent counter to ensure that the right checkpoint is loaded for each agent. rl_agent_count = 0 for agent_name in arguments.agents: env.reset_with_random_state(job_gen_seed) agent_args = {} name_alias = agent_name # Set name of folder storing results to agent_name by default. if agent_name in load_agents.HEDGEHOG_AGENTS: if arguments.hedgehog_param_overrides is None: arguments.hedgehog_param_overrides = dict() agent_args['hh_overrides'] = arguments.hedgehog_param_overrides agent_args['discount_factor'] = arguments.discount_factor agent_args['debug_info'] = arguments.debug_info agent_args['agent_seed'] = agent_seed agent_args['mpc_seed'] = mpc_seed # Replace directory name if name passed as an agent parameter. name_alias = arguments.hedgehog_param_overrides.get( 'name', agent_name) elif agent_name == 'distribution_with_rebalancing_heuristic': agent_args['safety_stocks'] = 20 * np.ones(env.state.shape) elif agent_name in ['reinforce', 'ppo']: agent_args['discount_factor'] = arguments.discount_factor if arguments.rl_agent_params: # Update agent_args accordingly. if rl_agent_count < len(arguments.rl_agent_params): if 'discount_factor' in arguments.rl_agent_params[ rl_agent_count]: warn( 'WARNING: Overriding provided discount factor with agent specific ' 'discount factor for {agent_name} agent') agent_args.update(arguments.rl_agent_params[rl_agent_count]) else: if agent_name == "ppo": raise ValueError( "When running a PPO agent you must provide agent parameters." ) else: warn("REINFORCE agent being run default agent parameters.") agent_args['rl_checkpoint'] = arguments.rl_checkpoints[ rl_agent_count] rl_agent_count += 1 elif agent_name == 'maxweight' or agent_name == 'scheduling_maxweight': if arguments.maxweight_param_overrides is None: arguments.maxweight_param_overrides = dict() agent_args['overrides'] = arguments.maxweight_param_overrides agent_args['agent_seed'] = agent_seed agent_args['mpc_seed'] = mpc_seed # Replace directory name if name passed as an agent parameter. name_alias = arguments.maxweight_param_overrides.get( 'name', agent_name) else: agent_args['agent_seed'] = agent_seed agent = load_agents.get_agent(agent_name, env, **agent_args) sim = ps.SncSimulator(env, agent, **arguments.__dict__) print(f'\nSimulating {agent.name}...') validation_utils.print_agent_params(agent) is_hedgehog = isinstance( agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent, PureFeedbackMIPHedgehogAgent)) save_location = f'{arguments.logdir}/{name_alias}' run_policy(sim, arguments.num_steps, arguments.server_mode, is_hedgehog, save_location, job_gen_seed) if is_hedgehog: assert isinstance( agent, (BigStepHedgehogAgent, PureFeedbackStationaryHedgehogAgent, PureFeedbackMIPHedgehogAgent)) validation_utils.print_workload_to_physical_resources_indexes( agent.workload_tuple.nu) save_locations[agent.name] = save_location print(f'Data stored at: {save_location}.') print(f'Finished simulating {agent.name}.\n') print(f"job_gen_seed: {arguments.env_param_overrides.get('job_gen_seed')}") print("End of simulation!") if not arguments.server_mode: plt.ioff() plt.show() return save_locations