Original Authors: Sanjeevan Ahilan, Julian Villella, David Rusu Modified By: Benedikt Kolbeinsson, Jiaze Sun, Pedro Castro """ import os import gym from hiway.sumo_scenario import SumoScenario from policy import Policy, OBSERVATION_SPACE, ACTION_SPACE, reward, observation, action # Path to the scenario to test scenario_path = '../tracks/2lane_sharp_bwd' scenario = SumoScenario( scenario_root=scenario_path, random_social_vehicle_count=10) env = gym.make('gym_hiway:hiway-competition-v0', config={ 'sumo_scenario': scenario, 'headless': False, 'visdom': False, 'seed': 41, 'max_step_length': 10000, 'observation_space': OBSERVATION_SPACE, 'action_space': ACTION_SPACE, 'reward_function': reward, 'observation_function': observation, 'action_function': action, })
def __init__(self, env_config): env_config['sumo_scenario'] = SumoScenario( scenario_root=env_setting[env_config.worker_index - 1][0], random_social_vehicle_count=env_setting[env_config.worker_index - 1][1]) super(MultiEnv, self).__init__(config=env_config)
def main(args): sumo_scenario = SumoScenario( scenario_root=os.path.abspath(args.scenario), random_social_vehicle_count=args.num_social_vehicles) dataset_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '../..', 'tracks')) # tracks = ['1lane', '1lane_bwd', '1lane_sharp', '1lane_sharp_bwd', # '1lane_new_a', '1lane_new_b', '1lane_new_c', # '1lane_new_a_bwd', '1lane_new_b_bwd', '1lane_new_c_bwd', # '2lane', '2lane_bwd', '2lane_sharp', '2lane_sharp_bwd', # '2lane_new_a', '2lane_new_b', '2lane_new_c', # '2lane_new_a_bwd', '2lane_new_b_bwd', '2lane_new_c_bwd', # '3lane', '3lane_b', '3lane_bwd', '3lane_bwd_b', '3lane_sharp', # '3lane_sharp_b', '3lane_sharp_bwd', '3lane_sharp_bwd_b', # '3lane_new_a', '3lane_new_b', '3lane_new_c', # '3lane_new_a_bwd', '3lane_new_b_bwd', '3lane_new_c_bwd'] # tracks = ['2lane', '2lane_bwd', '2lane_sharp', '2lane_sharp_bwd', # '2lane_new_a', '2lane_new_b', '2lane_new_c', # '2lane_new_a_bwd', '2lane_new_b_bwd', '2lane_new_c_bwd'] tracks = [ '1lane', '2lane_sharp_bwd', '3lane_bwd', '3lane_sharp_bwd_b', '1lane_new_a', '2lane_new_b', '3lane_new_c', '1lane_new_c_bwd', '2lane_new_a_bwd', '3lane_new_a_bwd', '1lane_new_b', '2lane_new_c', '3lane_new_b_bwd', '3lane_sharp_b', '2lane_sharp_bwd' ] tracks_dir = [os.path.join(dataset_dir, track) for track in tracks] num_social_vehicles = [ 10, 10, 10, 50, 15, 15, 70, 20, 30, 100, 15, 50, 70, 25, 10 ] env_setting = list(zip(tracks_dir, num_social_vehicles)) # note: ensure that len(env_setting) > args.num_workers #train each worker with different environmental setting class MultiEnv(CompetitionEnv): def __init__(self, env_config): env_config['sumo_scenario'] = SumoScenario( scenario_root=env_setting[env_config.worker_index - 1][0], random_social_vehicle_count=env_setting[env_config.worker_index - 1][1]) super(MultiEnv, self).__init__(config=env_config) tune_config = { 'env': MultiEnv, 'log_level': 'WARN', 'num_workers': args.num_workers, 'horizon': 1000, 'env_config': { 'seed': tune.randint(1000), 'sumo_scenario': sumo_scenario, 'headless': args.headless, 'observation_space': OBSERVATION_SPACE, 'action_space': ACTION_SPACE, 'reward_function': tune.function(reward), 'observation_function': tune.function(observation), 'action_function': tune.function(action), 'max_step_length': 1000, }, 'model': { 'custom_model': MODEL_NAME, 'fcnet_activation': "relu" }, "callbacks": { "on_episode_start": on_episode_start, "on_episode_step": on_episode_step, "on_episode_end": on_episode_end }, # These params are tuned from a fixed starting value. "lambda": 0.95, "clip_param": 0.2, # "lr": 0.0, "lr_schedule": [[0, 1e-3], [3000000, 3e-4], [6000000, 1e-4], [9000000, 3e-5], [12000000, 1e-5]], "num_sgd_iter": 10, "sgd_minibatch_size": 4096, "train_batch_size": 131072 } experiment_name = 'rllib_multi_env' result_dir = args.result_dir checkpoint = None if args.checkpoint_num is not None: checkpoint = ('{dir}/checkpoint_{n}/checkpoint-{n}'.format( dir=result_dir, n=args.checkpoint_num)) log_dir = os.path.expanduser("~/ray_results") print(f"Checkpointing at {log_dir}") # for debugging e.g. with pycharm, turn on local_mode # ray.init(local_mode=True) # scheduler = pbt if args.pbt else None analysis = tune.run( 'PPO', name=experiment_name, stop={'time_total_s': 60 * 60 * 40}, checkpoint_freq=1, checkpoint_at_end=True, local_dir=log_dir, resume=args.resume_training, # restore=checkpoint, max_failures=1000, num_samples=args.num_samples, export_formats=['model', 'checkpoint'], config=tune_config, scheduler=scheduler, ) print(analysis.dataframe().head()) logdir = analysis.get_best_logdir('episode_reward_max') model_path = os.path.join(logdir, 'model') dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "model") if not os.path.exists(dest_model_path): shutil.copytree(model_path, dest_model_path) print(f"wrote model to: {dest_model_path}") else: print(f"Model already exists at {dest_model_path} not overwriting") print(f"New model is stored at {model_path}")
def main(args): sumo_scenario = SumoScenario( scenario_root=os.path.abspath(args.scenario), random_social_vehicle_count=args.num_social_vehicles) tune_config = { 'env': CompetitionEnv, 'log_level': 'WARN', 'num_workers': 2, 'horizon': 5000, 'env_config': { 'seed': tune.randint(1000), 'sumo_scenario': sumo_scenario, 'headless': args.headless, 'observation_space': OBSERVATION_SPACE, 'action_space': ACTION_SPACE, 'reward_function': tune.function(reward), 'observation_function': tune.function(observation), 'action_function': tune.function(action), }, 'model': { 'custom_model': MODEL_NAME, }, "callbacks": { "on_episode_start": on_episode_start, "on_episode_step": on_episode_step, "on_episode_end": on_episode_end } } experiment_name = 'rllib_example' log_dir = os.path.expanduser("~/ray_results") print(f"Checkpointing at {log_dir}") analysis = tune.run( 'PPO', name=experiment_name, stop={'time_total_s': 60 * 60}, # 1 hour checkpoint_freq=1, checkpoint_at_end=True, local_dir=log_dir, resume=args.resume_training, max_failures=10, num_samples=args.num_samples, export_formats=['model', 'checkpoint'], config=tune_config, ) print(analysis.dataframe().head()) logdir = analysis.get_best_logdir('episode_reward_max') model_path = os.path.join(logdir, 'model') dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "model") if not os.path.exists(dest_model_path): shutil.copytree(model_path, dest_model_path) print(f"wrote model to: {dest_model_path}") else: print(f"Model already exists at {dest_model_path} not overwriting") print(f"New model is stored at {model_path}")