示例#1
0
                      route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                      use_gui=False,
                      min_green=8,
                      delta_time=5,
                      num_seconds=80000)

    for run in range(1, runs + 1):
        env.reset()
        initial_states = {ts: env.observe(ts) for ts in env.agents}
        ql_agents = {
            ts:
            QLAgent(starting_state=env.unwrapped.env.encode(
                initial_states[ts], ts),
                    state_space=env.observation_space(ts),
                    action_space=env.action_space(ts),
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
            for ts in env.agents
        }
        infos = []
        for agent in env.agent_iter():
            s, r, done, info = env.last()
            if ql_agents[agent].action is not None:
                ql_agents[agent].learn(next_state=env.unwrapped.env.encode(
                    s, agent),
                                       reward=r)

            action = ql_agents[agent].act() if not done else None
示例#2
0
        route_file=args.route,
        out_csv_name=out_csv,
        use_gui=args.gui,
        num_seconds=args.seconds,
        min_green=args.min_green,
        max_green=args.max_green,
        max_depart_delay=0)

    for run in range(1, args.runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}
示例#3
0
    alpha = 0.1
    gamma = 0.99
    decay = 1
    runs = 1

    env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml',
                          route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                          use_gui=True,
                          num_seconds=80000,
                          max_depart_delay=0)

    for run in range(1, runs+1):
        initial_states = env.reset()
        ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                                 state_space=env.observation_space,
                                 action_space=env.action_space,
                                 alpha=alpha,
                                 gamma=gamma,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids}
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(action=actions)

            for agent_id in s.keys():
                ql_agents[agent_id].learn(next_state=env.encode(s[agent_id], agent_id), reward=r[agent_id])

        env.save_csv('outputs/4x4/ql_test', run)
        env.close()