def get_compute_action_rllib(path_to_dir, checkpoint_num, alg): """Collect the compute_action method from RLlib's serialized files. Parameters ---------- path_to_dir : str RLlib directory containing training results checkpoint_num : int checkpoint number / training iteration of the learned policy alg : str name of the RLlib algorithm that was used during the training procedure Returns ------- method the compute_action method from the algorithm along with the trained parameters """ # collect the configuration information from the RLlib checkpoint result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1] config = get_rllib_config(result_dir) # run on only one cpu for rendering purposes ray.init(num_cpus=1) config["num_workers"] = 1 # create and register a gym+rllib env flow_params = get_flow_params(config) create_env, env_name = make_create_env(params=flow_params, version=9999, render=False) register_env(env_name, create_env) # recreate the agent agent_cls = get_agent_class(alg) agent = agent_cls(env=env_name, registry=get_registry(), config=config) # restore the trained parameters into the policy checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num) agent._restore(checkpoint) return agent.compute_action
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation( sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = False dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.multiagent_envs' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policy_graphs'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation(sim_params=sim_params, render=sim_params.render) final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] #bmil edit acc_rollout = [] power = 0 for i in range(args.num_rollouts): vel = [] # bmil list for collecting data timerange = [] vel_dict = defaultdict(list) rl_acc = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle # speeds = vehicles.get_speed(vehicles.get_ids()) ids = vehicles.get_ids() speeds = vehicles.get_speed(ids) # BMIL EDIT FOR COLLECTING DATA OF ACCELERATION AND VELOCITY rl = vehicles.get_rl_ids()[0] act = vehicles.get_realized_accel(rl) rl_acc.append(act or 0) timerange.append(vehicles.get_timestep(ids[-1]) / 100000) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) # bmil edit for veh_id, speed in zip(ids, speeds): vel_dict[veh_id].append(speed) if vehicles.get_timestep(ids[0]) >= 100000: M = 1200 # mass of average sized vehicle (kg) g = 9.81 # gravitational acceleration (m/s^2) Cr = 0.005 # rolling resistance coefficient Ca = 0.3 # aerodynamic drag coefficient rho = 1.225 # air density (kg/m^3) A = 2.6 # vehicle cross sectional area (m^2) speed = vehicles.get_speed(veh_id) prev_speed = vehicles.get_previous_speed(veh_id) accel = abs(speed - prev_speed) / env.sim_step power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed**3 if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) # BMIL EDIT FOR COLLECTING DATA FROM 100000 TO 375000 # Because 0 - 75000 steps are warm up and 75000 - 100000 steps are process of stabilizing if vehicles.get_timestep(rl) >= 100000: acc_rollout.append(act) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) # BMIL EDIT FOR PLOT DATA veh = list(vel_dict.keys()) plt.subplot(3, 1, 1) plt.title('Results') for v in veh[:-1]: plt.plot(timerange, vel_dict[v]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[:-1], fontsize=9) plt.grid(True) # plt.show() plt.subplot(3, 1, 2) plt.plot(timerange, vel_dict[veh[-1]], color='r') plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(['lc'] + veh[-1:]) plt.grid(True) plt.subplot(3, 1, 3) plt.plot(timerange, rl_acc, color='b') plt.xlabel('timestep(s)') plt.ylabel('acceleration(m/s^2)') plt.grid(True) # BASE_DIR = '/home/bmil/BMIL_FLOW_CODE/Graph/' # plt.savefig(f'{BASE_DIR}{"__".join(name)}', dpi=400) plt.show() # BMIL EDIT FOR COMPUTING ACCELERATION's MEAN AND VAR acc_rollout1 = [accarr for accarr in acc_rollout] mean_acc_rollout = [np.mean(acc_rollout1)] variance_acc_rollout = [np.var(acc_rollout1)] print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed))) # BMIL Edit FOR PRINT ACCEL's MEAN AND VAR print("\nAccel, mean (m/s^2):") print(mean_acc_rollout) print("\nAccel, var (m/s^2):") print(variance_acc_rollout) print("\nTotal Power Consumption (kgᐧm^2/s^3):") print(power) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) import matplotlib.pyplot as plt from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter fig = plt.figure() h = np.linspace(0, 60, 100) Deltav = np.linspace(-6, 12, 100) Headway, DELTAV = np.meshgrid(h, Deltav) # fix v=20m/s xn, yn = Headway.shape geta = np.array(Headway) for xk in range(xn): for yk in range(yn): #输入状态 #Headway[xk,yk] #DELTAV[xk,yk] geta[xk, yk] = agent.compute_action( np.array( [3.8 / 30, DELTAV[xk, yk] / 30, Headway[xk, yk] / 260])) surf = plt.contourf(DELTAV, Headway, geta, 20, cmap=cm.coolwarm) plt.colorbar() #C = plt.contour(DELTAV, Headway, geta, 20, colors='black') # plt.clabel(C, inline = True, fontsize = 10) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # Run on only one cpu for rendering purposes config['num_workers'] = 1 flow_params = get_flow_params(config) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] if args.evaluate: env_params.evaluate = True sumo_params = flow_params['sumo'] if args.no_render: sumo_params.render = False else: sumo_params.render = True sumo_params.emission_path = './test_time_rollout/' env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) # Run the environment in the presence of the pre-trained RL agent for the # requested number of time steps / rollouts rets = [] final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) action = agent.compute_action(state) state, reward, done, _ = env.step(action) ret += reward if done: break rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) print('Round {}, Return: {}'.format(i, ret)) print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env( params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class( name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = 6000 #可以考虑改成6000 env_params.horizon = 6000 # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper(env_class( env_params=env_params, sumo_params=sumo_params, scenario=scenario)) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] final_outflows = [] mean_speed = [] for i in range(1):#args.num_rollouts): vel = [] state = env.reset() done = False if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(vehicles.get_speed(vehicles.get_ids())[0])#这里是整体平均速度 if multiagent: action = {} for agent_id in state.keys(): action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: print(type(state),state) action = agent.compute_action(state) print(type(action),action) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) #mean_speed.append(np.mean(vel))#注意这里 print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format( np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format( np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format( np.mean(final_outflows), np.std(final_outflows))) import matplotlib.pyplot as plt plt.figure() plt.plot(vel) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True # specify emission file path dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # this will be set to true after creating agent and gym print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to true after initializing agent and env # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params) use_lstm = config['model'].get('use_lstm', False) if use_lstm: state_size = config['model']['lstm_cell_size'] lstm_state = [np.zeros(state_size), np.zeros(state_size)] if multiagent: lstm_state = { key: deepcopy(lstm_state) for key in config['multiagent']['policies'].keys() } rewards = [] if multiagent: rewards = defaultdict(list) policy_map_fn = config['multiagent']['policy_mapping_fn'].func # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): obs = env.reset() kv = env.k.vehicle rollout_speeds = [] rollout_reward = 0 if multiagent: rollout_reward = defaultdict(int) for _ in range(env_params.horizon): rollout_speeds.append(np.mean(kv.get_speed(kv.get_ids()))) if multiagent: action = {} for agent_id in obs.keys(): if use_lstm: action[agent_id], obs[ agent_id], logits = agent.compute_action( obs[agent_id], obs=lstm_state[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( obs[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(obs) obs, reward, done, _ = env.step(action) if multiagent: done = done['__all__'] for agent_id, agent_reward in reward.items(): rollout_reward[policy_map_fn(agent_id)] += agent_reward else: rollout_reward += reward if done: break if multiagent: for agent_id, reward in rollout_reward.items(): rewards[agent_id].append(reward) print('rollout %s, agent %s reward: %.5g' % (i, agent_id, reward)) else: rewards.append(rollout_reward) print('rollout %s, reward: %.5g' % (i, rollout_reward)) mean_speed.append(np.nanmean(rollout_speeds)) std_speed.append(np.nanstd(rollout_speeds)) # Compute rate of inflow / outflow in the last 500 steps final_outflows.append(kv.get_outflow_rate(500)) final_inflows.append(kv.get_inflow_rate(500)) print( '\n==== Summary of results: mean (std) [rollout1, rollout2, ...] ====') mean, std = np.mean, np.std if multiagent: for agent_id, agent_rewards in rewards.items(): print('agent %s rewards: %.4g (%.4g) %s' % (agent_id, mean(agent_rewards), std(agent_rewards), agent_rewards)) else: print('rewards: %.4g (%.4g) %s' % (mean(rewards), std(rewards), rewards)) print('mean speeds (m/s): %.4g (%.4g) %s' % (mean(mean_speed), std(mean_speed), mean_speed)) print('std speeds: %.4g (%.4g) %s' % (mean(std_speed), std(std_speed), std_speed)) print('inflows (veh/hr): %.4g (%.4g) %s' % (mean(final_inflows), std(final_inflows), final_inflows)) print('outflows (veh/hr): %.4g (%.4g) %s' % (mean(final_outflows), std(final_outflows), final_outflows)) # Compute throughput efficiency in the last 500 sec of the throughput = [o / i for o, i in zip(final_outflows, final_inflows)] print('throughput efficiency: %.4g (%.4g) %s' % (mean(throughput), std(throughput), throughput)) # terminate the environment env.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def visualizer_rllib(args, seed=None): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False config['callbacks'] = MyCallbacks # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) #flow_params['env'].additional_params["use_seeds"]=args.use_seeds # print(args.use_seeds) seed_tmp = None if seed: with open(seed, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) elif args.use_seeds: with open(args.use_seeds, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) if seed_tmp: #setattr(sim_params, 'seed', seed_tmp['sumo_seed']) sim_params.seed = int(int(seed_tmp['sumo_seed']) / 10**6) print(sim_params.seed) #import IPython #IPython.embed() # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) config = merge_dicts(config, evaluation_config) if args.run: agent_cls = get_trainable_cls(args.run) elif config_run: agent_cls = get_trainable_cls(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True #if seed is not None: # print(seed) # flow_params["env"].additional_params["use_seeds"] = seed # input() #else: # flow_params["env"].additional_params["use_seeds"] = args.use_seeds if args.horizon: config['horizon'] = args.horizon flow_params['env'].horizon = args.horizon # Create and register a gym+rllib env register_time = time.time() create_env, env_name = make_create_env(params=flow_params, version=0, seeds_file=seed) register_env(env_name, create_env) register_time = time.time() - register_time print("Register Time:", register_time) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = True #False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) create_time = time.time() if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) create_time = time.time() - create_time print("Create time:", create_time) if multiagent: rets = {} # map the agent id to its policy print(config['multiagent']['policy_mapping_fn']) policy_map_fn = config['multiagent']['policy_mapping_fn'] #.func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False restart_time = time.time() env.restart_simulation(sim_params=sim_params, render=sim_params.render) restart_time = time.time() - restart_time print("Restart Time:", restart_time) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] if PRINT_TO_SCREEN: pp = pprint.PrettyPrinter(indent=2) print("config ") pp.pprint(config) print("flow_params ") pp.pprint(flow_params) if REALTIME_PLOTS: # prepare plots # You probably won't need this if you're embedding things in a tkinter plot... plt.ion() fig = plt.figure() axA = fig.add_subplot(331) axA.set_title("Actions") axR = fig.add_subplot(332) axR.set_title("Rewards") axS = fig.add_subplot(333) axS.set_title("States") axS0 = fig.add_subplot(334) axS0.set_title("S0") axS1 = fig.add_subplot(335) axS1.set_title("S1") axS2 = fig.add_subplot(336) axS2.set_title("S2") axA_hist = fig.add_subplot(337) axA_hist.set_title("Actions") axR_hist = fig.add_subplot(338) axR_hist.set_title("Rewards") axS_hist = fig.add_subplot(339) axS_hist.set_title("States") axS.set_ylim((-2, 3)) axA.set_ylim((-5, 5)) axR.set_ylim((-1, 1)) initialized_plot = False # record for visualization purposes actions = [] rewards = [] states = [] times = [] WARMUP = args.warmup run_time = time.time() for i in range(args.num_rollouts): vel = [] time_to_exit = 0 state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): time_to_exit += 1 vehicles = env.unwrapped.k.vehicle if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0: vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if SUMMARY_PLOTS: # record for visualization purposes actions.append(action) rewards.append(reward) states.append(state) if PRINT_TO_SCREEN: print("action") pp.pprint(action) print("reward") pp.pprint(reward) print("state") pp.pprint(state) print("after step ") if REALTIME_PLOTS: # Update plots. if not initialized_plot: # initialize lineA, = axA.plot( [0] * len(action), 'g^' ) # Returns a tuple of line objects, thus the comma lineR, = axR.plot( 0, 'bs' ) # Returns a tuple of line objects, thus the comma lineS, = axS.plot( [0] * len(state), 'r+' ) # Returns a tuple of line objects, thus the comma initialized_plot = True lineA.set_ydata(action) lineR.set_ydata(reward) lineS.set_ydata(state) fig.canvas.draw() fig.canvas.flush_events() if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if args.use_delay > 0: if vehicles.get_num_arrived() >= args.use_delay: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(5000) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(5000) final_inflows.append(inflow) times.append(time_to_exit) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) run_time = time.time() - run_time print('==== Summary of results ====') print("Run Time: ", run_time) print("Return:") env.close() return_reward = 0 if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) return_reward = np.mean(rew) else: print(rets) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets), np.std(rets))) return_reward = np.mean(rets) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) print("Time Delay") print(times) print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format( (np.mean(times)), np.std(times))) if args.output: np.savetxt(args.output, [ return_reward, mean_speed, std_speed, final_inflows, final_outflows, times ]) if SUMMARY_PLOTS: generateHtmlplots(actions, rewards, states) # terminate the environment env.unwrapped.terminate() env.terminate() # Deleting the env in order to remove sumo process del env del evaluation_config # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd) return return_reward, mean_speed, final_inflows, final_outflows
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/emission/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None sim_params.summary_path = emission_path if args.gen_emission else None sim_params.tripinfo_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) # terminate the environment env.unwrapped.terminate() emission_location = os.path.join(emission_path, env.network.name) return emission_location
scenario = BottleneckScenario(name="tcy_base", vehicles=vehicles, net_params=net_params, initial_config=initial_config, traffic_lights=traffic_lights) env = DesiredVelocityEnv(env_params, sim_params, scenario) return BottleneckDensityExperiment(env) if __name__ == '__main__': ray.init(num_cpus=1) result_dir = '/home/user/ray_results/mybottleneckwithoutlane10/PPO_DesiredVelocityEnv-v0_0_2019-03-30_15-47-34wdla6uer/checkpoint_200' config = get_rllib_config(result_dir) try: pkl = get_rllib_pkl(result_dir) except Exception: pass flow_params = get_flow_params(config) sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) name = result_dir.split("/")[-2:] # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # FIXME: this not works # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] rl_speed = [] # store rl controlled vehicle's speed log2_stack = defaultdict(list) # This dict stores log2 data during rollouts if args.evaluate: env.unwrapped.env_params.evaluate = True # To cover bug for i in range(args.num_rollouts): vel = [] vel_dict = defaultdict(list) timerange = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle ids = vehicles.get_ids() rls = vehicles.get_rl_ids() speeds = vehicles.get_speed(ids) timerange.append(vehicles.get_timestep(ids[-1]) / 10000) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) for veh_id, speed in zip(ids, speeds): vel_dict[veh_id].append(speed) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) log2 = env.unwrapped.log2 for k in log2: log2_stack[k].append(log2[k]) # plot non-rl's speed and rl's speed graph if i == args.num_rollouts - 1 and args.render_mode != "no_render": veh = list(vel_dict.keys()) plt.subplot(2, 1, 1) plt.title('/'.join(name)) for v in veh[:-1]: plt.plot(timerange, vel_dict[v]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[:-1]) plt.grid(True) # plt.show() plt.subplot(2, 1, 2) plt.plot(timerange, vel_dict[veh[-1]]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[-1:]) plt.grid(True) plt.show() rl_speed = [np.mean(vel_dict[rl]) for rl in vehicles.get_rl_ids()] for k in log2_stack: log2_stack[k] = np.mean(log2_stack[k]).round(3) # export the log2_stack from time import strftime time = strftime('%Y-%m-%d') flow_autonomous_home = os.path.expanduser('~/log/') with open(flow_autonomous_home + f'/log.csv', 'a') as f: keys = ['\"' + str(k) + '\"' for k in log2_stack.keys()] values = ['\"' + str(v) + '\"' for v in log2_stack.values()] # f.write('time,name,'+','.join(keys)+'\n') f.write(f'{time},{name[0]},{",".join(values)}\n') print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('') # bmil edit rls = vehicles.get_rl_ids() [print(f'{rls[i]} Speed, mean (m/s): {rl_speed[i]}') for i in range(len(rls))] print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] state_translation = { "inflow": 0, "slowing": 1, "parking": 2, "parked": 3, "outflow": 4 } custom_outputs = { k: np.zeros(env_params.horizon * args.num_rollouts) for k in ["num_rollout", "t", "speed", "position", "state", "reward"] } for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for j in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle speeds = vehicles.get_speed(vehicles.get_ids()) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break #"num_rollout", "t", "speed", "position", "state", "reward"]} m = i * env_params.horizon + j custom_outputs["num_rollout"][m] = i custom_outputs["t"][m] = j custom_outputs["reward"][m] = reward veh_id = vehicles.get_ids()[0] custom_outputs["speed"][m] = vehicles.get_speed(veh_id) custom_outputs["position"][m] = vehicles.get_global_position( veh_id, env) custom_outputs["state"][m] = state_translation.get( vehicles.get_state(veh_id), -1) if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) df = pd.DataFrame(custom_outputs) df.to_csv("output.csv") # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def printConfig(args): result_dir = args.result_dir if args.result_dir[-1]!='/' else args.result_dir[:-1] config = get_rllib_config(result_dir) print(config)