Exemplo n.º 1
0
def run(use_gui=True, runs=1):
    out_csv = 'outputs/double/sarsa-double'

    env = SumoEnvironment(net_file='nets/double/network.net.xml',
                          single_agent=False,
                          route_file='nets/double/flow.rou.xml',
                          out_csv_name=out_csv,
                          use_gui=use_gui,
                          num_seconds=86400,
                          yellow_time=3,
                          min_green=5,
                          max_green=60)

    fixed_tl = False
    agents = {
        ts_id: TrueOnlineSarsaLambda(env.observation_spaces(ts_id),
                                     env.action_spaces(ts_id),
                                     alpha=0.000000001,
                                     gamma=0.95,
                                     epsilon=0.05,
                                     lamb=0.1,
                                     fourier_order=7)
        for ts_id in env.ts_ids
    }

    for run in range(1, runs + 1):
        obs = env.reset()
        done = {'__all__': False}

        if fixed_tl:
            while not done['__all__']:
                _, _, done, _ = env.step(None)
        else:
            while not done['__all__']:
                actions = {
                    ts_id: agents[ts_id].act(obs[ts_id])
                    for ts_id in obs.keys()
                }

                next_obs, r, done, _ = env.step(action=actions)

                for ts_id in next_obs.keys():
                    agents[ts_id].learn(state=obs[ts_id],
                                        action=actions[ts_id],
                                        reward=r[ts_id],
                                        next_state=next_obs[ts_id],
                                        done=done[ts_id])
                    obs[ts_id] = next_obs[ts_id]

        env.save_csv(out_csv, run)
Exemplo n.º 2
0
def test_api():
    env = SumoEnvironment(single_agent=True,
                          num_seconds=100000,
                          net_file='nets/single-intersection/single-intersection.net.xml',
                          route_file='nets/single-intersection/single-intersection.rou.xml')
    env.reset()
    check_env(env)
    env.close()
Exemplo n.º 3
0
    prs.add_argument("-mingreen", dest="min_green", type=int, default=5, required=False, help="Minimum green time.\n")
    prs.add_argument("-maxgreen", dest="max_green", type=int, default=50, required=False, help="Maximum green time.\n")
    prs.add_argument("-gui", action="store_true", default=False, help="Run with visualization on SUMO.\n")
    prs.add_argument("-fixed", action="store_true", default=False, help="Run with fixed timing traffic signals.\n")
    prs.add_argument("-s", dest="seconds", type=int, default=400000, required=False, help="Number of simulation seconds.\n")
    prs.add_argument("-runs", dest="runs", type=int, default=1, help="Number of runs.\n")
    args = prs.parse_args()

    out_csv = 'outputs/2way-single-intersection/sarsa_lambdavai'

    write_route_file('nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000)
    env = SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml',
                          single_agent=True,
                          route_file=args.route,
                          out_csv_name=out_csv,
                          use_gui=args.gui,
                          num_seconds=args.seconds,
                          min_green=args.min_green,
                          max_green=args.max_green,
                          max_depart_delay=0)

    for run in range(1, args.runs+1):
        obs = env.reset()
        agent = TrueOnlineSarsaLambda(env.observation_space, env.action_space, alpha=args.alpha, gamma=args.gamma, epsilon=args.epsilon, fourier_order=7, lamb=0.9)
        
        done = False
        if args.fixed:
            while not done:
                _, _, done, _ = env.step({})
        else:
            while not done:
Exemplo n.º 4
0
    prs.add_argument("-runs",
                     dest="runs",
                     type=int,
                     default=1,
                     help="Number of runs.\n")
    args = prs.parse_args()
    experiment_time = str(datetime.now()).split('.')[0]
    out_csv = 'outputs/single-intersection/{}_alpha{}_gamma{}_eps{}_decay{}_reward{}'.format(
        experiment_time, args.alpha, args.gamma, args.epsilon, args.decay,
        args.reward)

    env = SumoEnvironment(
        net_file='nets/single-intersection/single-intersection.net.xml',
        route_file=args.route,
        out_csv_name=out_csv,
        use_gui=args.gui,
        num_seconds=args.seconds,
        min_green=args.min_green,
        max_green=args.max_green,
        max_depart_delay=0)

    for run in range(1, args.runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
import os
import sys
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")
from sumo_rl import SumoEnvironment
from sumo_rl.util.gen_route import write_route_file
import traci

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines import A2C


if __name__ == '__main__':

    write_route_file('nets/2way-single-intersection/single-intersection-gen.rou.xml', 400000, 100000)

    env = SubprocVecEnv([lambda: SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml',
                                        route_file='nets/2way-single-intersection/single-intersection-gen.rou.xml',
                                        out_csv_name='outputs/2way-single-intersection/a2c',
                                        single_agent=True,
                                        use_gui=False,
                                        num_seconds=100000,
                                        min_green=5)])

    model = A2C(MlpPolicy, env, verbose=1, learning_rate=0.001, lr_schedule='constant')
    model.learn(total_timesteps=100000)
Exemplo n.º 6
0
import sys
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")
from sumo_rl import SumoEnvironment
import traci

if __name__ == '__main__':

    env = SumoEnvironment(
        net_file='nets/2way-single-intersection/single-intersection.net.xml',
        route_file=
        'nets/2way-single-intersection/single-intersection-vhvh.rou.xml',
        out_csv_name='outputs/2way-single-intersection/dqn',
        single_agent=True,
        use_gui=False,
        num_seconds=100000,
        max_depart_delay=0)

    model = DQN(env=env,
                policy="MlpPolicy",
                learning_rate=0.01,
                learning_starts=0,
                train_freq=1,
                target_update_interval=100,
                exploration_initial_eps=0.05,
                exploration_final_eps=0.01,
                verbose=1)
    model.learn(total_timesteps=100000)
Exemplo n.º 7
0
    prs.add_argument("-s",
                     dest="seconds",
                     type=int,
                     default=80000,
                     required=False,
                     help="Number of simulation seconds.\n")
    args = prs.parse_args()
    experiment_time = str(datetime.now()).split('.')[0].replace(' ', '_')
    scenario = args.network.replace('nets/5x5-Raphael/',
                                    '').replace('.net.xml', '')
    out_csv = f'outputs/5x5-Raphael/{scenario}_{experiment_time}_alpha{args.alpha}_gamma{args.gamma}_eps{args.epsilon}_decay{args.decay}'

    env = SumoEnvironment(net_file=args.network,
                          route_file=args.route,
                          out_csv_name=out_csv,
                          use_gui=args.gui,
                          num_seconds=args.seconds,
                          min_green=args.min_green,
                          max_green=args.max_green,
                          max_depart_delay=0)

    initial_states = env.reset()
    ql_agents = {
        ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                    state_space=env.observation_spaces(ts),
                    action_space=env.action_spaces(ts),
                    alpha=args.alpha,
                    gamma=args.gamma,
                    exploration_strategy=EpsilonGreedy(
                        initial_epsilon=args.epsilon,
                        min_epsilon=args.min_epsilon,
                        decay=args.decay))
Exemplo n.º 8
0
import sys
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")
import numpy as np
from sumo_rl import SumoEnvironment
import traci

from stable_baselines import DQN

env = SumoEnvironment(net_file='nets/DiamondNet/DiamondTLs.net.xml',
                      single_agent=True,
                      route_file='nets/DiamondFlowsAcrossNet.rou.xml',
                      out_csv_name='outputs/DiamondNet/dqn',
                      use_gui=True,
                      num_seconds=5400,
                      yellow_time=4,
                      min_green=5,
                      max_green=60,
                      max_depart_delay=0)

model = DQN(env=env,
            policy="MlpPolicy",
            learning_rate=1e-3,
            buffer_size=50000,
            exploration_fraction=0.05,
            exploration_final_eps=0.02)
model.learn(total_timesteps=100000)
Exemplo n.º 9
0
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare the environment variable 'SUMO_HOME'")
import numpy as np
from sumo_rl import SumoEnvironment
import traci

from stable_baselines import DQN

env = SumoEnvironment(net_file='nets/big-intersection/big-intersection.net.xml',
                        single_agent=True,
                        route_file='nets/big-intersection/routes.rou.xml',
                        out_csv_name='outputs/big-intersection/dqn',
                        use_gui=False,
                        num_seconds=5400,
                        yellow_time=4,
                        min_green=5,
                        max_green=60)

model = DQN(
    env=env,
    policy="MlpPolicy",
    learning_rate=1e-3,
    buffer_size=50000,
    exploration_fraction=0.05,
    exploration_final_eps=0.02
)
model.learn(total_timesteps=100000)
Exemplo n.º 10
0
    sys.exit("Please declare the environment variable 'SUMO_HOME'")

from sumo_rl import SumoEnvironment
from sumo_rl.agents import QLAgent
from sumo_rl.exploration import EpsilonGreedy

if __name__ == '__main__':

    alpha = 0.1
    gamma = 0.995
    decay = 1
    runs = 4

    env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml',
                          route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                          use_gui=False,
                          num_seconds=80000,
                          min_green=5,
                          delta_time=5)

    initial_states = env.reset()
    ql_agents = {
        ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                    state_space=env.observation_space,
                    action_space=env.action_space,
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
        for ts in env.ts_ids
    }
Exemplo n.º 11
0
import traci
from sumo_rl import SumoEnvironment
from sumo_rl.agents import QLAgent
from sumo_rl.exploration import EpsilonGreedy


if __name__ == '__main__':

    alpha = 0.1
    gamma = 0.99
    decay = 1
    runs = 1

    env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml',
                          route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                          use_gui=True,
                          num_seconds=80000,
                          max_depart_delay=0)

    for run in range(1, runs+1):
        initial_states = env.reset()
        ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                                 state_space=env.observation_space,
                                 action_space=env.action_space,
                                 alpha=alpha,
                                 gamma=gamma,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids}
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}
from stable_baselines import A2C

if __name__ == '__main__':

    write_route_file(
        'nets/2way-single-intersection/single-intersection-gen.rou.xml',
        400000, 100000)

    # multiprocess environment
    n_cpu = 1
    env = SubprocVecEnv([
        lambda: SumoEnvironment(
            net_file=
            'nets/2way-single-intersection/single-intersection.net.xml',
            route_file=
            'nets/2way-single-intersection/single-intersection-gen.rou.xml',
            out_csv_name='outputs/2way-single-intersection/a2c',
            single_agent=True,
            use_gui=False,
            num_seconds=100000,
            min_green=5,
            max_depart_delay=0) for _ in range(n_cpu)
    ])

    model = A2C(MlpPolicy,
                env,
                verbose=1,
                learning_rate=0.001,
                lr_schedule='constant')
    model.learn(total_timesteps=100000)
Exemplo n.º 13
0
from ray.rllib.agents.a3c.a3c import A3CTrainer
from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy
from ray.tune.registry import register_env
from gym import spaces
import numpy as np
from sumo_rl import SumoEnvironment
import traci

if __name__ == '__main__':
    ray.init()

    register_env(
        "4x4grid", lambda _: SumoEnvironment(
            net_file='nets/4x4-Lucas/4x4.net.xml',
            route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
            out_csv_name='outputs/4x4grid/a3c',
            use_gui=False,
            num_seconds=80000,
            max_depart_delay=0))

    trainer = A3CTrainer(
        env="4x4grid",
        config={
            "multiagent": {
                "policies": {
                    '0':
                    (A3CTFPolicy, spaces.Box(low=np.zeros(10),
                                             high=np.ones(10)),
                     spaces.Discrete(2), {})
                },
                "policy_mapping_fn":