def run(use_gui=True, runs=1): out_csv = 'outputs/double/sarsa-double' env = SumoEnvironment(net_file='nets/double/network.net.xml', single_agent=False, route_file='nets/double/flow.rou.xml', out_csv_name=out_csv, use_gui=use_gui, num_seconds=86400, yellow_time=3, min_green=5, max_green=60) fixed_tl = False agents = { ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id), env.action_spaces(ts_id), alpha=0.000000001, gamma=0.95, epsilon=0.05, lamb=0.1, fourier_order=7) for ts_id in env.ts_ids } for run in range(1, runs + 1): obs = env.reset() done = {'__all__': False} if fixed_tl: while not done['__all__']: _, _, done, _ = env.step(None) else: while not done['__all__']: actions = { ts_id: agents[ts_id].act(obs[ts_id]) for ts_id in obs.keys() } next_obs, r, done, _ = env.step(action=actions) for ts_id in next_obs.keys(): agents[ts_id].learn(state=obs[ts_id], action=actions[ts_id], reward=r[ts_id], next_state=next_obs[ts_id], done=done[ts_id]) obs[ts_id] = next_obs[ts_id] env.save_csv(out_csv, run)
def test_api(): env = SumoEnvironment(single_agent=True, num_seconds=100000, net_file='nets/single-intersection/single-intersection.net.xml', route_file='nets/single-intersection/single-intersection.rou.xml') env.reset() check_env(env) env.close()
prs.add_argument("-mingreen", dest="min_green", type=int, default=5, required=False, help="Minimum green time.\n") prs.add_argument("-maxgreen", dest="max_green", type=int, default=50, required=False, help="Maximum green time.\n") prs.add_argument("-gui", action="store_true", default=False, help="Run with visualization on SUMO.\n") prs.add_argument("-fixed", action="store_true", default=False, help="Run with fixed timing traffic signals.\n") prs.add_argument("-s", dest="seconds", type=int, default=400000, required=False, help="Number of simulation seconds.\n") prs.add_argument("-runs", dest="runs", type=int, default=1, help="Number of runs.\n") args = prs.parse_args() out_csv = 'outputs/2way-single-intersection/sarsa_lambdavai' write_route_file('nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000) env = SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml', single_agent=True, route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs+1): obs = env.reset() agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=7, lamb=0.9) done = False if args.fixed: while not done: _, _, done, _ = env.step({}) else: while not done:
prs.add_argument("-runs", dest="runs", type=int, default=1, help="Number of runs.\n") args = prs.parse_args() experiment_time = str(datetime.now()).split('.')[0] out_csv = 'outputs/single-intersection/{}_alpha{}_gamma{}_eps{}_decay{}_reward{}'.format( experiment_time, args.alpha, args.gamma, args.epsilon, args.decay, args.reward) env = SumoEnvironment( net_file='nets/single-intersection/single-intersection.net.xml', route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon,
import os import sys if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) else: sys.exit("Please declare the environment variable 'SUMO_HOME'") from sumo_rl import SumoEnvironment from sumo_rl.util.gen_route import write_route_file import traci from stable_baselines.common.policies import MlpPolicy from stable_baselines.common.vec_env import SubprocVecEnv from stable_baselines import A2C if __name__ == '__main__': write_route_file('nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000) env = SubprocVecEnv([lambda: SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml', route_file='nets/2way-single-intersection/single-intersection-gen.rou.xml', out_csv_name='outputs/2way-single-intersection/a2c', single_agent=True, use_gui=False, num_seconds=100000, min_green=5)]) model = A2C(MlpPolicy, env, verbose=1, learning_rate=0.001, lr_schedule='constant') model.learn(total_timesteps=100000)
import sys if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) else: sys.exit("Please declare the environment variable 'SUMO_HOME'") from sumo_rl import SumoEnvironment import traci if __name__ == '__main__': env = SumoEnvironment( net_file='nets/2way-single-intersection/single-intersection.net.xml', route_file= 'nets/2way-single-intersection/single-intersection-vhvh.rou.xml', out_csv_name='outputs/2way-single-intersection/dqn', single_agent=True, use_gui=False, num_seconds=100000, max_depart_delay=0) model = DQN(env=env, policy="MlpPolicy", learning_rate=0.01, learning_starts=0, train_freq=1, target_update_interval=100, exploration_initial_eps=0.05, exploration_final_eps=0.01, verbose=1) model.learn(total_timesteps=100000)
prs.add_argument("-s", dest="seconds", type=int, default=80000, required=False, help="Number of simulation seconds.\n") args = prs.parse_args() experiment_time = str(datetime.now()).split('.')[0].replace(' ', '_') scenario = args.network.replace('nets/5x5-Raphael/', '').replace('.net.xml', '') out_csv = f'outputs/5x5-Raphael/{scenario}_{experiment_time}_alpha{args.alpha}_gamma{args.gamma}_eps{args.epsilon}_decay{args.decay}' env = SumoEnvironment(net_file=args.network, route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_spaces(ts), action_space=env.action_spaces(ts), alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay))
import sys if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) else: sys.exit("Please declare the environment variable 'SUMO_HOME'") import numpy as np from sumo_rl import SumoEnvironment import traci from stable_baselines import DQN env = SumoEnvironment(net_file='nets/DiamondNet/DiamondTLs.net.xml', single_agent=True, route_file='nets/DiamondFlowsAcrossNet.rou.xml', out_csv_name='outputs/DiamondNet/dqn', use_gui=True, num_seconds=5400, yellow_time=4, min_green=5, max_green=60, max_depart_delay=0) model = DQN(env=env, policy="MlpPolicy", learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.02) model.learn(total_timesteps=100000)
if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) else: sys.exit("Please declare the environment variable 'SUMO_HOME'") import numpy as np from sumo_rl import SumoEnvironment import traci from stable_baselines import DQN env = SumoEnvironment(net_file='nets/big-intersection/big-intersection.net.xml', single_agent=True, route_file='nets/big-intersection/routes.rou.xml', out_csv_name='outputs/big-intersection/dqn', use_gui=False, num_seconds=5400, yellow_time=4, min_green=5, max_green=60) model = DQN( env=env, policy="MlpPolicy", learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.05, exploration_final_eps=0.02 ) model.learn(total_timesteps=100000)
sys.exit("Please declare the environment variable 'SUMO_HOME'") from sumo_rl import SumoEnvironment from sumo_rl.agents import QLAgent from sumo_rl.exploration import EpsilonGreedy if __name__ == '__main__': alpha = 0.1 gamma = 0.995 decay = 1 runs = 4 env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=False, num_seconds=80000, min_green=5, delta_time=5) initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids }
import traci from sumo_rl import SumoEnvironment from sumo_rl.agents import QLAgent from sumo_rl.exploration import EpsilonGreedy if __name__ == '__main__': alpha = 0.1 gamma = 0.99 decay = 1 runs = 1 env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=True, num_seconds=80000, max_depart_delay=0) for run in range(1, runs+1): initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids} infos = [] done = {'__all__': False} while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}
from stable_baselines import A2C if __name__ == '__main__': write_route_file( 'nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000) # multiprocess environment n_cpu = 1 env = SubprocVecEnv([ lambda: SumoEnvironment( net_file= 'nets/2way-single-intersection/single-intersection.net.xml', route_file= 'nets/2way-single-intersection/single-intersection-gen.rou.xml', out_csv_name='outputs/2way-single-intersection/a2c', single_agent=True, use_gui=False, num_seconds=100000, min_green=5, max_depart_delay=0) for _ in range(n_cpu) ]) model = A2C(MlpPolicy, env, verbose=1, learning_rate=0.001, lr_schedule='constant') model.learn(total_timesteps=100000)
from ray.rllib.agents.a3c.a3c import A3CTrainer from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy from ray.tune.registry import register_env from gym import spaces import numpy as np from sumo_rl import SumoEnvironment import traci if __name__ == '__main__': ray.init() register_env( "4x4grid", lambda _: SumoEnvironment( net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', out_csv_name='outputs/4x4grid/a3c', use_gui=False, num_seconds=80000, max_depart_delay=0)) trainer = A3CTrainer( env="4x4grid", config={ "multiagent": { "policies": { '0': (A3CTFPolicy, spaces.Box(low=np.zeros(10), high=np.ones(10)), spaces.Discrete(2), {}) }, "policy_mapping_fn":