Пример #1
0
            traci.trafficlight.Phase(35000, 35000, 35000,
                                     "GGGrrr"),  # north-south
            traci.trafficlight.Phase(2000, 2000, 2000, "yyyrrr"),
            traci.trafficlight.Phase(35000, 35000, 35000,
                                     "rrrGGG"),  # west-east
            traci.trafficlight.Phase(2000, 2000, 2000, "rrryyy")
        ])

    for run in range(1, runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts:
            QLAgent(starting_state=env.encode(initial_states[ts]),
                    state_space=env.observation_space,
                    action_space=env.action_space,
                    alpha=alpha,
                    gamma=gamma,
                    exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                       min_epsilon=0.005,
                                                       decay=decay))
            for ts in env.ts_ids
        }
        infos = []
        done = {'__all__': False}
        while not done['__all__']:
            actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()}

            s, r, done, info = env.step(actions=actions)
            infos.append(info)

            for agent_id in ql_agents.keys():
                ql_agents[agent_id].learn(new_state=env.encode(s[agent_id]),
Пример #2
0
            traci.trafficlight.Phase(2000, 2000, 2000, "rrrrryrrrrry")
        ])

    if args.reward == 'queue':
        env._compute_rewards = env._queue_average_reward
    else:
        env._compute_rewards = env._waiting_time_reward

    for run in range(1, args.runs + 1):
        initial_states = env.reset()
        ql_agents = {
            ts: QLAgent(starting_state=initial_states[ts],
                        state_space=env.observation_space,
                        action_space=env.action_space,
                        alpha=args.alpha,
                        gamma=args.gamma,
                        exploration_strategy=EpsilonGreedy(
                            initial_epsilon=args.epsilon,
                            min_epsilon=args.min_epsilon,
                            decay=args.decay))
            for ts in env.ts_ids
        }

        done = False
        infos = []
        if args.fixed:
            while not done:
                _, _, done, info = env.step({})
                infos.append(info)
        else:
            while not done:
Пример #3
0
def main():
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=3000,
                        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--algo', type=str, default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm')
    parser.add_argument('--inference', action="store_true", help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch', type=int, default=30, help='number of training epochs')
    parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency')

    args = parser.parse_args()


    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1
    #state_size = config["state_size"] = 25
    # the single dimension appended to the tail is for the current phase.
    # [vehicle_count for each start lane] + [current_phase]
    logging.info('state size:%s' % state_size)
    config["action_size"] = len(phase_list)
    phase_list = [1,2,3,4,5,6,7,8]
    # build cityflow environment
    env = CityFlowEnv(config)
    EPISODES = 1
    num_step = config['num_step']
    state_size = config['state_size']
    total_step = 0
    #num_step = 10
    with tqdm(total=EPISODES*args.num_step) as pbar:
        for i in range(1, EPISODES+1):
            logging.info('EPISODE >>:%s' % i)
            episode_length = 1
            env.reset()
            t=0
            state = env.get_state()
            state = np.array(list(state['start_lane_vehicle_count'].values()) + [
                state['current_phase']])  # a sample state definition
            # print ('state1:', state)
            state = np.reshape(state, [1, state_size])
            print('state2:', state)
            agent = QLAgent(starting_state=env.get_rl_state(),
                                 state_space=1,
                                 action_space=env.action_space,
                                 alpha=0.1,
                                 gamma=0.99,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                                    min_epsilon=0.005,
                                                                    decay=1.0))

            last_action = phase_list[agent.act(state)]
            print('last action:', last_action)

            print('episode_length:{}, num_step:{}'.format(episode_length, num_step))
            while episode_length < num_step:
                #logging.info('current state:%s' % state)
                logging.info('EPISODE LENGTH >>%s' % episode_length)
                action = agent.act(state)  # index of action
                logging.info('new action:%s' % action)
                action_phase = phase_list[action]  # actual action
                logging.info('action phase:>>%s' % action_phase)
                next_state, reward = env.step(action_phase)  # one step
                logging.info('STATE>>:%s' % next_state)
                logging.info('ACTION PHASE:{}'.format(action_phase))
                logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time))
                logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time()))
                #for n_s in next_state.iteritems():
                #    logging.info(n_s)
                logging.info('REWARD:%s' % reward)

                # last_action_phase = action_phase
                episode_length += 1
                total_step += 1

                pbar.update(1)
                # store to replay buffer
                # prepare state
                agent.learn(new_state=env.get_rl_state(), reward=reward)

                env._compute_step_info()

                state = next_state
                logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}"
                             .format(i, EPISODES, total_step, action, reward))
                pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, "
                                     "reward:{reward}")

    env.save_csv()