route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=False, min_green=8, delta_time=5, num_seconds=80000) for run in range(1, runs + 1): env.reset() initial_states = {ts: env.observe(ts) for ts in env.agents} ql_agents = { ts: QLAgent(starting_state=env.unwrapped.env.encode( initial_states[ts], ts), state_space=env.observation_space(ts), action_space=env.action_space(ts), alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.agents } infos = [] for agent in env.agent_iter(): s, r, done, info = env.last() if ql_agents[agent].action is not None: ql_agents[agent].learn(next_state=env.unwrapped.env.encode( s, agent), reward=r) action = ql_agents[agent].act() if not done else None
route_file=args.route, out_csv_name=out_csv, use_gui=args.gui, num_seconds=args.seconds, min_green=args.min_green, max_green=args.max_green, max_depart_delay=0) for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = {'__all__': False} infos = [] if args.fixed: while not done['__all__']: _, _, done, _ = env.step({}) else: while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}
alpha = 0.1 gamma = 0.99 decay = 1 runs = 1 env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml', route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml', use_gui=True, num_seconds=80000, max_depart_delay=0) for run in range(1, runs+1): initial_states = env.reset() ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids} infos = [] done = {'__all__': False} while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(action=actions) for agent_id in s.keys(): ql_agents[agent_id].learn(next_state=env.encode(s[agent_id], agent_id), reward=r[agent_id]) env.save_csv('outputs/4x4/ql_test', run) env.close()