Python QLAgent示例

编程语言: Python

命名空间/包名称: sumo_rl.agents

类/类型: QLAgent

hotexamples.com的示例: 3

Python QLAgent - 已找到3个示例。这些是从开源项目中提取的最受好评的sumo_rl.agents.QLAgent现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

QLAgent(3)

常用方法

QLAgent (3)

示例#1

显示文件

文件： ql_4x4grid_pz.py 项目： LucasAlegre/sumo-rl

                      route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                      use_gui=False,
                      min_green=8,
                      delta_time=5,
                      num_seconds=80000)

    for run in range(1, runs + 1):
        env.reset()
        initial_states = {ts: env.observe(ts) for ts in env.agents}
        ql_agents = {
            ts:
            QLAgent(starting_state=env.unwrapped.env.encode(
                initial_states[ts], ts),
                    state_space=env.observation_space(ts),
                    action_space=env.action_space(ts),
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
            for ts in env.agents
        }
        infos = []
        for agent in env.agent_iter():
            s, r, done, info = env.last()
            if ql_agents[agent].action is not None:
                ql_agents[agent].learn(next_state=env.unwrapped.env.encode(
                    s, agent),
                                       reward=r)

            action = ql_agents[agent].act() if not done else None

示例#2

显示文件

        route_file=args.route,
        out_csv_name=out_csv,
        use_gui=args.gui,
        num_seconds=args.seconds,
        min_green=args.min_green,
        max_green=args.max_green,
        max_depart_delay=0)

    for run in range(1, args.runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = {'__all__': False}
        infos = []
        if args.fixed:
            while not done['__all__']:
                _, _, done, _ = env.step({})
        else:
            while not done['__all__']:
                actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

示例#3

显示文件

    alpha = 0.1
    gamma = 0.99
    decay = 1
    runs = 1

    env = SumoEnvironment(net_file='nets/4x4-Lucas/4x4.net.xml',
                          route_file='nets/4x4-Lucas/4x4c1c2c1c2.rou.xml',
                          use_gui=True,
                          num_seconds=80000,
                          max_depart_delay=0)

    for run in range(1, runs+1):
        initial_states = env.reset()
        ql_agents = {ts: QLAgent(starting_state=env.encode(initial_states[ts], ts),
                                 state_space=env.observation_space,
                                 action_space=env.action_space,
                                 alpha=alpha,
                                 gamma=gamma,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids}
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(action=actions)

            for agent_id in s.keys():
                ql_agents[agent_id].learn(next_state=env.encode(s[agent_id], agent_id), reward=r[agent_id])

        env.save_csv('outputs/4x4/ql_test', run)
        env.close()