def create_env(env, render=False, shared=False, maddpg=False, evaluate=False): """Return, and potentially create, the environment. Parameters ---------- env : str or gym.Env the environment, or the name of a registered environment. render : bool whether to render the environment shared : bool specifies whether agents in an environment are meant to share policies. This is solely used by multi-agent Flow environments. maddpg : bool whether to use an environment variant that is compatible with the MADDPG algorithm evaluate : bool specifies whether this is a training or evaluation environment Returns ------- gym.Env or list of gym.Env gym-compatible environment(s) """ if env is None: # No environment (for evaluation environments). return None elif isinstance(env, str): if env in ENV_ATTRIBUTES.keys(): env = ENV_ATTRIBUTES[env]["env"]( evaluate, render, False, shared, maddpg) elif env.startswith("multiagent"): # multi-agent environments env_name = env[11:] env = ENV_ATTRIBUTES[env_name]["env"]( evaluate, render, True, shared, maddpg) elif env in ["bottleneck0", "bottleneck1", "bottleneck2", "grid0", "grid1"]: # Import the benchmark and fetch its flow_params benchmark = __import__("flow.benchmarks.{}".format(env), fromlist=["flow_params"]) flow_params = benchmark.flow_params # Get the env name and a creator for the environment. creator, _ = make_create_env(flow_params, version=0, render=render) # Create the environment. env = creator() else: # This is assuming the environment is registered with OpenAI gym. env = gym.make(env) # Reset the environment. if env is not None: if isinstance(env, list): for next_env in env: next_env.reset() else: env.reset() return env
def ray_runner(self, num_runs, flow_params, version): alg_run = 'PPO' HORIZON = 10 agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = 1 config['sample_batch_size'] = 50 # arbitrary config['train_batch_size'] = 50 # arbitrary config['sgd_minibatch_size'] = 10 config['num_sgd_iter'] = 1 config['horizon'] = HORIZON # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=version) # Register as rllib env register_env(env_name, create_env) alg = ppo.PPOAgent(env=env_name, config=config) for i in range(num_runs): alg.train() checkpoint_path = alg.save('benchmark_tmp') self.assertTrue('%s.index' % os.path.exists(checkpoint_path))
def setup_exps(): alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON * N_ROLLOUTS config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [16, 16]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 # 0.01# 0.02 config["num_sgd_iter"] = 10 config["horizon"] = HORIZON config[ "clip_param"] = 0.15 #default was 0.3, 0.2 was reported to work well, but maybe too large? #config["vf_loss_coeff"] = 1#0.2 #default was 1, this is when using multitask, saw that vf_loss drops to ~10 while policy loss is around 0.01, but with 0.01 coeff didn't estimate loss well # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def __init__(self, flow_params, custom_callables=None): """Instantiate the Experiment class. Parameters ---------- flow_params : dict flow-specific parameters custom_callables : dict < str, lambda > strings and lambda functions corresponding to some information we want to extract from the environment. The lambda will be called at each step to extract information from the env and it will be stored in a dict keyed by the str. """ self.custom_callables = custom_callables or {} # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params) # Create the environment. self.env = create_env() logging.info(" Starting experiment {} at {}".format( self.env.network.name, str(datetime.datetime.utcnow()))) logging.info("Initializing environment.")
def setup_exps(): alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON * N_ROLLOUTS config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [16, 16]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 config["horizon"] = HORIZON # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_DQN_exp(): alg_run = 'DQN' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = n_cpus config['train_batch_size'] = horizon * rollouts config['gamma'] = discount_rate config['clip_actions'] = False # FIXME(ev) temporary ray bug config['horizon'] = horizon config["hiddens"] = [256] config['model'].update({'fcnet_hiddens': [32, 32]}) # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(): """Return the relevant components of an RLlib experiment. Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['lr'] = tune.grid_search([1e-5]) config['horizon'] = HORIZON config['clip_actions'] = False # FIXME(ev) temporary ray bug config['observation_filter'] = 'NoFilter' # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space def gen_policy(): return PPOTFPolicy, obs_space, act_space, {} # Setup PG with an ensemble of `num_policies` different policy graphs policy_graphs = {'av': gen_policy()} def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policies': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def setup_exps(): alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON * N_ROLLOUTS config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [16, 16]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.01 # 0.02 config["num_sgd_iter"] = 10 config["horizon"] = HORIZON config[ "clip_param"] = 0.15 #default was 0.3, 0.2 was reported to work well, but maybe too large? config[ "vf_loss_coeff"] = 0 #default was 1, why use it at all? use vf loss when fitting advantage not when improving policy # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(): alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['use_gae'] = True config['lambda'] = 0.97 config['kl_target'] = 0.02 config['num_sgd_iter'] = 10 config['clip_actions'] = False # FIXME(ev) temporary ray bug config['horizon'] = HORIZON # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(use_inflows=False): """Return the relevant components of an RLlib experiment. Parameters ---------- use_inflows : bool, optional set to True if you would like to run the experiment with inflows of vehicles from the edges, and False otherwise Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ # collect the initialization and network-specific parameters based on the # choice to use inflows or not if use_inflows: initial_config, net_params = get_flow_params( col_num=N_COLUMNS, row_num=N_ROWS, additional_net_params=additional_net_params) else: initial_config, net_params = get_non_flow_params( enter_speed=V_ENTER, add_net_params=additional_net_params) # add the new parameters to flow_params flow_params['initial'] = initial_config flow_params['net'] = net_params alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['use_gae'] = True config['lambda'] = 0.97 config['kl_target'] = 0.02 config['num_sgd_iter'] = 10 config['clip_actions'] = False # FIXME(ev) temporary ray bug config['horizon'] = HORIZON # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(seeds_file=None): alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [100, 50, 25]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 config['clip_actions'] = False # FIXME(ev) temporary ray bug config["horizon"] = HORIZON config["entropy_coeff"] = 0.001 config["lr"] = 1e-5 # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0, seeds_file=seeds_file) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(): initial_config, net_params = get_non_flow_params( add_net_params=additional_net_params) flow_params['initial'] = initial_config flow_params['net'] = net_params alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['use_gae'] = True config['lambda'] = 0.97 config['kl_target'] = 0.02 config['num_sgd_iter'] = 10 config['clip_actions'] = False # FIXME(ev) temporary ray bug config['horizon'] = HORIZON config['observation_filter'] = 'NoFilter' flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) register_env(gym_name, create_env) test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space def gen_policy(): return (PPOPolicyGraph, obs_space, act_space, {}) policy_graphs = {'av': gen_policy()} def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policy_graphs': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, gym_name, config
def setup_exps_rllib(flow_params, n_cpus, n_rollouts): """Return the relevant components of an RLlib experiment. Parameters ---------- flow_params : dict flow-specific parameters (see flow/utils/registry.py) n_cpus : int number of CPUs to run the experiment over n_rollouts : int number of rollouts per training iteration Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ horizon = flow_params['env'].horizon alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) config["num_workers"] = n_cpus config["num_cpus_per_worker"] = 1 config["use_pytorch"] = False config["num_gpus"] = 0 config["train_batch_size"] = horizon * n_rollouts config["gamma"] = 0.999 # discount rate # config["model"].update({"fcnet_hiddens": [32, 32, 32]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 config['clip_actions'] = True # FIXME(ev) temporary ray bug config["horizon"] = horizon config["callbacks"] = MyCallbacks # save the flow params for reply flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(): # Import the benchmark and fetch its flow_params benchmark = __import__("flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"]) flow_params = benchmark.flow_params alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['lr'] = tune.grid_search([1e-5]) config['horizon'] = HORIZON config['clip_actions'] = False config['observation_filter'] = 'NoFilter' # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space def gen_policy(): return (PPOPolicyGraph, obs_space, act_space, {}) # Setup PG with an ensemble of `num_policies` different policy graphs policy_graphs = {'av': gen_policy()} def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policy_graphs': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def __init__(self, flow_params=flow_params): """Instantiate Experiment.""" # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params) # Create the environment. self.env = create_env() logging.info(" Starting experiment {} at {}".format( self.env.network.name, str(datetime.datetime.utcnow()))) logging.info("Initializing environment.")
def flow_env(use_inflows=False, render=True, sim_step=1, horizon=2000): global flow_params flow_params['sim'] = SumoParams( sim_step=sim_step, render=render, restart_instance=True, # for long horizon and visualization print_warnings=False, seed=8021 ) flow_params['env'] = EnvParams( horizon=horizon, additional_params=additional_env_params, ) """ Parameters ---------- use_inflows : bool, optional set to True if you would like to run the experiment with inflows of vehicles from the edges, and False otherwise Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ # collect the initialization and network-specific parameters based on the # choice to use inflows or not if use_inflows: initial_config, net_params = get_flow_params( col_num=N_COLUMNS, row_num=N_ROWS, additional_net_params=additional_net_params) else: initial_config, net_params = get_non_flow_params( enter_speed=V_ENTER, add_net_params=additional_net_params) # add the new parameters to flow_params flow_params['initial'] = initial_config flow_params['net'] = net_params # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) create_env, gym_name = make_create_env(params=flow_params, version=0) env = create_env() return env, gym_name
def setup_exps(): alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [100, 50, 25]}) config['use_gae'] = True config['lambda'] = 0.97 config['sgd_minibatch_size'] = 128 config['kl_target'] = 0.02 config['num_sgd_iter'] = 10 config['horizon'] = HORIZON config['clip_actions'] = False # FIXME(ev) temporary ray bug config['observation_filter'] = 'NoFilter' # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space def gen_policy(): return (PPOPolicyGraph, obs_space, act_space, {}) # Setup PG with an ensemble of `num_policies` different policy graphs policy_graphs = {'av': gen_policy(), 'adversary': gen_policy()} def policy_mapping_fn(agent_id): return agent_id config.update({ 'multiagent': { 'policy_graphs': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn) } }) return alg_run, env_name, config
def __init__(self, flow_params, custom_callables=None): self.custom_callables = custom_callables or {} # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params) # Create the environment. self.env = create_env() logging.info(" Starting experiment {} at {}".format( self.env.network.name, str(datetime.datetime.utcnow()))) logging.info("Initializing environment.")
def __init__(self, flow_params, multiagent=False, shared=False, maddpg=False, render=False, version=0): """Create the environment. Parameters ---------- flow_params : dict environment-specific parameters multiagent : bool whether the environment is a multi-agent environment shared : bool whether the policies in the environment are shared or independent. This is only relevant if `shared` is set to True. render : bool whether to render the environment version : int environment version number, needed for testing purposes """ # Initialize some variables. self.multiagent = multiagent self.shared = shared self.maddpg = maddpg if "full_observation_fn" in flow_params["env"].additional_params: self.full_observation_fn = deepcopy( flow_params["env"].additional_params["full_observation_fn"]) del flow_params["env"].additional_params["full_observation_fn"] else: self.full_observation_fn = None # Create the wrapped environment. create_env, _ = make_create_env(flow_params, version, render) self.wrapped_env = create_env() # Collect the IDs of individual vehicles if using a multi-agent env. if self.multiagent: self.agents = list(self.wrapped_env.reset().keys()) # for tracking the time horizon self.step_number = 0 self.horizon = self.wrapped_env.env_params.horizon
def __init__(self, env_name, env_params=None, render=False, version=0): """Create the environment. Parameters ---------- env_name : str the name of the environment to create env_params : dict environment-specific parameters render : bool whether to render the environment version : int environment version number, needed for testing purposes Returns ------- gym.Env the environment Raises ------ AssertionError if the `env_name` parameter is not valid """ assert env_name in ["ring", "merge", "figure_eight"] # default to empty dictionary if not passed env_params = env_params or {} # get flow-specific parameters flow_params = dict() if env_name == "merge": flow_params = merge(**env_params) elif env_name == "ring": flow_params = ring(**env_params) elif env_name == "figure_eight": flow_params = figure_eight(**env_params) # create the wrapped environment create_env, _ = make_create_env(flow_params, version, render) self.wrapped_env = create_env() # for tracking the time horizon self.step_number = 0 self.horizon = self.wrapped_env.env_params.horizon
def setup_exps(seeds_file=None): alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON * N_ROLLOUTS config["sgd_minibatch_size"] = 128 config["num_gpus"] = args.num_gpus config["gamma"] = 0.998 # discount rate config["model"].update({"fcnet_hiddens": [100, 50, 25]}) #config['lr_schedule'] = [ # [0, 1e-4], # [2000000,5e-5] # ] config["use_gae"] = True config["lambda"] = 0.97 #config["kl_target"] = 0.02 config["vf_clip_param"] = 1e6 config["num_sgd_iter"] = 1 config['clip_actions'] = False # FIXME(ev) temporary ray bug config["horizon"] = HORIZON #config["grad_clip"] = 0.5 #config["entropy_coeff"] = 0.0001 config["lr"] = 0.0 #config["vf_share_layers"] = True #config["vf_loss_coeff"] = 0.5 # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0, seeds_file=seeds_file) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def __init__(self, flow_params=flow_params): """Instantiate Experiment.""" # Get the env name and a creator for the environment. self.create_env, self.env_name = make_create_env(flow_params) # Create the environment. self.env = self.create_env() self.flow_params = flow_params # Register as rllib env register_env(self.env_name,self.create_env) self.obs_space = self.env.observation_space self.act_space = self.env.action_space logging.info(" Starting experiment {} at {}".format( self.env.network.name, str(datetime.datetime.utcnow()))) logging.info("Initializing environment.")
def get_compute_action_rllib(path_to_dir, checkpoint_num, alg): """Collect the compute_action method from RLlib's serialized files. Parameters ---------- path_to_dir : str RLlib directory containing training results checkpoint_num : int checkpoint number / training iteration of the learned policy alg : str name of the RLlib algorithm that was used during the training procedure Returns ------- method the compute_action method from the algorithm along with the trained parameters """ # collect the configuration information from the RLlib checkpoint result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1] config = get_rllib_config(result_dir) # run on only one cpu for rendering purposes ray.init(num_cpus=1) config["num_workers"] = 1 # create and register a gym+rllib env flow_params = get_flow_params(config) create_env, env_name = make_create_env(params=flow_params, version=9999, render=False) register_env(env_name, create_env) # recreate the agent agent_cls = get_agent_class(alg) agent = agent_cls(env=env_name, registry=get_registry(), config=config) # restore the trained parameters into the policy checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num) agent._restore(checkpoint) return agent.compute_action
def setup_exps(): """Return the relevant components of an RLlib experiment. Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = "PPO" agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["train_batch_size"] = HORIZON * N_ROLLOUTS config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [20, 15]}) config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 config["num_sgd_iter"] = 20 config['lr'] = 1e-4 config['sgd_minibatch_size'] = 128 config['clip_actions'] = False # FIXME(ev) temporary ray bug config["horizon"] = HORIZON # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def setup_exps(use_inflows=False): """Return the relevant components of an RLlib experiment. Parameters ---------- use_inflows : bool, optional set to True if you would like to run the experiment with inflows of vehicles from the edges, and False otherwise Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ # collect the initialization and network-specific parameters based on the # choice to use inflows or not if use_inflows: initial_config, net_params = get_flow_params( col_num=N_COLUMNS, row_num=N_ROWS, additional_net_params=additional_net_params) else: initial_config, net_params = get_non_flow_params( enter_speed=V_ENTER, add_net_params=additional_net_params) # add the new parameters to flow_params flow_params['initial'] = initial_config flow_params['net'] = net_params create_env, gym_name = make_create_env(params=flow_params, version=0) TheEnv=create_env() return TheEnv
config["kl_target"] = 0.02 # target KL divergence config["num_sgd_iter"] = 10 # number of SGD iterations config["horizon"] = HORIZON # rollout horizon # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) # generating a string version of flow_params config['env_config'][ 'flow_params'] = flow_json # adding the flow_params to config dict config['env_config']['run'] = alg_run # Call the utility function make_create_env to be able to # register the Flow env for this experiment create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env with Gym register_env(gym_name, create_env) trials = run_experiments({ flow_params["exp_tag"]: { "run": alg_run, "env": gym_name, "config": { **config }, "checkpoint_freq": 1, # number of iterations between checkpoints
def test_make_create_env(self): """Tests that the make_create_env methods generates an environment with the expected flow parameters.""" # use a flow_params dict derived from flow/benchmarks/figureeight0.py vehicles = VehicleParams() vehicles.add(veh_id="human", acceleration_controller=(IDMController, { "noise": 0.2 }), routing_controller=(ContinuousRouter, {}), car_following_params=SumoCarFollowingParams( speed_mode="obey_safe_speed", ), num_vehicles=13) vehicles.add(veh_id="rl", acceleration_controller=(RLController, {}), routing_controller=(ContinuousRouter, {}), car_following_params=SumoCarFollowingParams( speed_mode="obey_safe_speed", ), num_vehicles=1) flow_params = dict( exp_tag="figure_eight_0", env_name="AccelEnv", scenario="Figure8Scenario", simulator='traci', sim=SumoParams( sim_step=0.1, render=False, ), env=EnvParams( horizon=1500, additional_params={ "target_velocity": 20, "max_accel": 3, "max_decel": 3, "sort_vehicles": False }, ), net=NetParams( no_internal_links=False, additional_params={ "radius_ring": 30, "lanes": 1, "speed_limit": 30, "resolution": 40, }, ), veh=vehicles, initial=InitialConfig(), tls=TrafficLightParams(), ) # some random version number for testing v = 23434 # call make_create_env create_env, env_name = make_create_env(params=flow_params, version=v) # check that the name is correct self.assertEqual(env_name, '{}-v{}'.format(flow_params["env_name"], v)) # create the gym environment env = create_env() # Note that we expect the port number in sim_params to change, and # that this feature is in fact needed to avoid race conditions flow_params["sim"].port = env.sim_params.port # check that each of the parameter match self.assertEqual(env.env_params.__dict__, flow_params["env"].__dict__) self.assertEqual(env.sim_params.__dict__, flow_params["sim"].__dict__) self.assertEqual(env.scenario.traffic_lights.__dict__, flow_params["tls"].__dict__) self.assertEqual(env.net_params.__dict__, flow_params["net"].__dict__) self.assertEqual(env.initial_config.__dict__, flow_params["initial"].__dict__) self.assertEqual(env.__class__.__name__, flow_params["env_name"]) self.assertEqual(env.scenario.__class__.__name__, flow_params["scenario"])
def GetTrafficLightEnv(inflow_probability, render=False, evaluate=False): initial_config, net_params = get_inflow_params( col_num=N_COLUMNS, row_num=N_ROWS, additional_net_params=additional_net_params, inflow_probability=inflow_probability) flow_params = dict( # name of the experiment exp_tag='traffic_light_grid', # name of the flow environment the experiment is running on env_name=TrafficLightGridPOEnv, # name of the network class the experiment is running on network=DoubleLaneNetwork, # simulator that is used by the experiment simulator='traci', # sumo-related parameters (see flow.core.params.SumoParams) sim=SumoParams(sim_step=1, render=render, emission_path="Results", restart_instance=True), # environment related parameters (see flow.core.params.EnvParams) env=EnvParams(horizon=HORIZON, additional_params=additional_env_params, evaluate=evaluate), # network-related parameters (see flow.core.params.NetParams and the # network's documentation or ADDITIONAL_NET_PARAMS component). This is # filled in by the setup_exps method below. net=net_params, # vehicles to be placed in the network at the start of a rollout (see # flow.core.params.VehicleParams) veh=vehicles, # parameters specifying the positioning of vehicles upon initialization/ # reset (see flow.core.params.InitialConfig). This is filled in by the # setup_exps method below. initial=initial_config, ) # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params) # Create the environment. env = create_env() return env # flow_params = dict( # # name of the experiment # exp_tag='green_wave', # # name of the flow environment the experiment is running on # env_name=TrafficLightGridPOEnv, # # name of the scenario class the experiment is running on # network=DoubleLaneNetwork, # # simulator that is used by the experiment # simulator='traci', # # sumo-related parameters (see flow.core.params.SumoParams) # sim=SumoParams( # sim_step=0.1, # #render=False, # render=True, # restart_instance=True # ), # # environment related parameters (see flow.core.params.EnvParams) # env=EnvParams( # horizon=HORIZON, # additional_params=additional_env_params, # ), # # network-related parameters (see flow.core.params.NetParams and the # # scenario's documentation or ADDITIONAL_NET_PARAMS component) # net=net_params, # # vehicles to be placed in the network at the start of a rollout (see # # flow.core.vehicles.Vehicles) # veh=vehicles, # # parameters specifying the positioning of vehicles upon initialization/ # # reset (see flow.core.params.InitialConfig) # initial=initial_config, # #tls = traffic_lights # ) # exp = Experiment(flow_params) # exp.run(1, convert_to_csv=False)
def setup_exps(flow_params): """Create the relevant components of a multiagent RLlib experiment. Parameters ---------- flow_params : dict input flow-parameters Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS #config['simple_optimizer'] = True config['gamma'] = 0.9995 # discount rate config['model'].update({'fcnet_hiddens': [100, 50, 25]}) config['lr'] = tune.grid_search([1e-4, 5e-5]) config['horizon'] = HORIZON config['clip_actions'] = False config['observation_filter'] = 'NoFilter' gae_lambda = 0.97 config["use_gae"] = True config["vf_clip_param"] = 1e10 config["num_sgd_iter"] = 10 # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # register as rllib env register_env(env_name, create_env) # multiagent configuration temp_env = create_env() policy_graphs = { 'av': (PPOTFPolicy, temp_env.observation_space, temp_env.action_space, {}) } def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policies': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation( sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)