示例#1
0
 def run(self):
     self.env = CityFlowEnv(
         path_to_log=self.dic_path["PATH_TO_LOG"],
         path_to_work_directory=self.dic_path["PATH_TO_DATA"],
         dic_traffic_env_conf=self.dic_traffic_env_conf)
     while True:
         command, data = self.remote.recv()
         if command == 'step':
             observation, reward, done, info = self.env.step(data)
             if done:
                 self.env.bulk_log()
                 self.try_reset()
                 #observation = self.try_reset()
             self.remote.send((observation, reward, done, self.task_id, info, self.done))
         elif command == 'reset':
             observation = self.try_reset()
             self.remote.send((observation, self.task_id))
         elif command == 'reset_task':
             self.env.unwrapped.reset_task(data)
             self.remote.send(True)
         elif command == 'close':
             self.remote.close()
             break
         elif command == 'get_spaces':
             self.remote.send((self.env.observation_space,
                              self.env.action_space))
         elif command == 'bulk_log':
             self.env.bulk_log()
             self.remote.send(True)
         else:
             raise NotImplementedError()
示例#2
0
    def __init__(self, remote, dic_path, dic_traffic_env_conf, queue, lock):
        super(EnvWorker, self).__init__()
        self.remote = remote
        self.dic_path = dic_path
        self.dic_traffic_env_conf = dic_traffic_env_conf

        self.queue = queue
        self.lock = lock
        self.task_id = None
        self.done = False

        self.env = CityFlowEnv(
            path_to_log=self.dic_path["PATH_TO_LOG"],
            path_to_work_directory=self.dic_path["PATH_TO_DATA"],
            dic_traffic_env_conf=self.dic_traffic_env_conf)
示例#3
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=2000)
    parser.add_argument('--ckpt', type=str)
    parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm')


    args = parser.parse_args()

    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = "intersection_1_1"
    config["intersection_id"] = intersection_id
    config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    config["action_size"] = len(phase_list)
    config["batch_size"] = args.batch_size
    
    logging.info(phase_list)

    # build cityflow environment
    env = CityFlowEnv(config)

    # build agent
    agent = DQNAgent(config)
    
    # inference
    agent.load(args.ckpt)
    env.reset()
    state = env.get_state()
    
    for i in range(args.num_step): 
        action = agent.choose_action(state) # index of action
        action_phase = phase_list[action] # actual action
        next_state, reward = env.step(action_phase) # one step

        state = next_state

        # logging
        logging.info("step:{}/{}, action:{}, reward:{}"
                        .format(i, args.num_step, action, reward))
示例#4
0
    def single_test_sample(self, policy, task, batch_id, params):
        policy.load_params(params)

        dic_traffic_env_conf = copy.deepcopy(self.dic_traffic_env_conf)
        dic_traffic_env_conf['TRAFFIC_FILE'] = task

        dic_path = copy.deepcopy(self.dic_path)
        dic_path["PATH_TO_LOG"] = os.path.join(
            dic_path['PATH_TO_WORK_DIRECTORY'], 'test_round', task,
            'tasks_round_' + str(batch_id))

        if not os.path.exists(dic_path['PATH_TO_LOG']):
            os.makedirs(dic_path['PATH_TO_LOG'])

        dic_exp_conf = copy.deepcopy(self.dic_exp_conf)

        env = CityFlowEnv(path_to_log=dic_path["PATH_TO_LOG"],
                          path_to_work_directory=dic_path["PATH_TO_DATA"],
                          dic_traffic_env_conf=dic_traffic_env_conf)

        done = False
        state = env.reset()
        step_num = 0
        stop_cnt = 0
        while not done and step_num < int(
                dic_exp_conf["EPISODE_LEN"] /
                dic_traffic_env_conf["MIN_ACTION_TIME"]):
            action_list = []
            for one_state in state:
                action = policy.choose_action(
                    [[one_state]], test=True
                )  # one for multi-state, the other for multi-intersection
                action_list.append(action[0])  # for multi-state

            next_state, reward, done, _ = env.step(action_list)
            state = next_state
            step_num += 1
            stop_cnt += 1
        env.bulk_log()
        write_summary(dic_path, task, self.dic_exp_conf["EPISODE_LEN"],
                      batch_id, self.dic_traffic_env_conf['FLOW_FILE'])
示例#5
0
    def play(self, round, task, if_gui=False):
        dic_traffic_env_conf = copy.deepcopy(self.dic_traffic_env_conf)
        dic_traffic_env_conf['TRAFFIC_FILE'] = task
        if if_gui:
            dic_traffic_env_conf['SAVEREPLAY'] = True

        env = CityFlowEnv(path_to_log=os.path.join(self.work_path, 'test_round'),
                      path_to_work_directory=self.data_path,
                      dic_traffic_env_conf=dic_traffic_env_conf)

        policy = config.DIC_AGENTS[self.dic_exp_conf['MODEL_NAME']](
            dic_agent_conf=self.dic_agent_conf,
            dic_traffic_env_conf=dic_traffic_env_conf,
            dic_path=None
        )

        params = pkl.load(open(os.path.join(self.model_path, 'params_%d.pkl'%round), 'rb'))
        policy.load_params(params)
        done = False
        state = env.reset()
        step_num = 0
        stop_cnt = 0
        while not done and step_num < int(
                self.dic_exp_conf["EPISODE_LEN"] / dic_traffic_env_conf["MIN_ACTION_TIME"]):
            action_list = []
            for one_state in state:
                action = policy.choose_action([[one_state]],
                                              test=True)  # one for multi-state, the other for multi-intersection
                action_list.append(action[0])  # for multi-state

            next_state, reward, done, _ = env.step(action_list)

            state = next_state
            step_num += 1
            stop_cnt += 1
        env.bulk_log()
示例#6
0
class EnvWorker(mp.Process):
    def __init__(self, remote, dic_path, dic_traffic_env_conf, queue, lock):
        super(EnvWorker, self).__init__()
        self.remote = remote
        self.dic_path = dic_path
        self.dic_traffic_env_conf = dic_traffic_env_conf

        self.queue = queue
        self.lock = lock
        self.task_id = None
        self.done = False

        self.env = CityFlowEnv(
            path_to_log=self.dic_path["PATH_TO_LOG"],
            path_to_work_directory=self.dic_path["PATH_TO_DATA"],
            dic_traffic_env_conf=self.dic_traffic_env_conf)

    def empty_step(self):
        observation = [{
            'cur_phase': [0],
            'lane_num_vehicle': [0, 0, 0, 0, 0, 0, 0, 0]
        }]
        reward, done = [0.0], True
        return observation, reward, done, []

    def try_reset(self):
        with self.lock:
            try:
                self.task_id = self.queue.get(True)
                self.done = (self.task_id is None)
            except queue.Empty:
                self.done = True
        if not self.done:
            if "test_round" not in self.dic_path['PATH_TO_LOG']:
                new_path_to_log = os.path.join(self.dic_path['PATH_TO_LOG'],
                                               'episode_%d' % (self.task_id))
            else:
                new_path_to_log = self.dic_path['PATH_TO_LOG']
            self.env.modify_path_to_log(new_path_to_log)
            if not os.path.exists(new_path_to_log):
                os.makedirs(new_path_to_log)
            state = self.env.reset()
            #observation = (np.zeros(self.env.observation_space.shape,
            #    dtype=np.float32) if self.done else self.env.reset())
            return state
        else:
            return False

    def run(self):
        while True:
            command, data = self.remote.recv()
            if command == 'step':
                observation, reward, done, info = self.env.step(data)
                if done:
                    self.env.bulk_log()
                    self.try_reset()
                    #observation = self.try_reset()
                self.remote.send(
                    (observation, reward, done, self.task_id, info, self.done))
            elif command == 'reset':
                observation = self.try_reset()
                self.remote.send((observation, self.task_id))
            elif command == 'reset_task':
                self.env.unwrapped.reset_task(data)
                self.remote.send(True)
            elif command == 'close':
                self.remote.close()
                break
            elif command == 'get_spaces':
                self.remote.send(
                    (self.env.observation_space, self.env.action_space))
            elif command == 'bulk_log':
                self.env.bulk_log()
                self.remote.send(True)
            else:
                raise NotImplementedError()
示例#7
0
        #'replay_data_path': 'data/frontend/web',
        'num_step': args.num_step,
        'lane_phase_info': parse_roadnet(
            roadnet)  # get lane and phase mapping by parsing the roadnet
    }

    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    config['state_size'] = len(
        config['lane_phase_info'][intersection_id]['start_lane']) + 1
    config['action_size'] = len(phase_list)

    # add visible gpu if necessary
    os.environ["CUDA_VISIBLE_DEVICES"] = ''

    env = CityFlowEnv(config)
    agent = DQNAgent(config)

    # some parameters in dqn
    batch_size = 32
    EPISODES = 100
    learning_start = 300
    update_model_freq = 300
    update_target_model_freq = 1500
    num_step = config['num_step']
    state_size = config['state_size']

    for e in range(EPISODES):
        # reset initially at each episode
        env.reset()
        t = 0
示例#8
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json',
                        help='config file')
    parser.add_argument('--algo',
                        type=str,
                        default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'],
                        help='choose an algorithm')
    parser.add_argument('--inference',
                        action="store_true",
                        help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='number of training epochs')
    parser.add_argument(
        '--num_step',
        type=int,
        default=200,
        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--save_freq',
                        type=int,
                        default=1,
                        help='model saving frequency')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='batchsize for training')
    parser.add_argument('--phase_step',
                        type=int,
                        default=15,
                        help='seconds of one phase')

    args = parser.parse_args()

    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step

    assert "1x1" in config[
        'cityflow_config_file'], "please use 1x1 config file for cityflow"

    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    config["intersection_id"] = intersection_id

    phase_list = config['lane_phase_info'][config["intersection_id"]]['phase']
    config["action_size"] = len(phase_list)
    config["batch_size"] = args.batch_size

    logging.info(phase_list)

    model_dir = "model/{}_{}".format(args.algo, date)
    result_dir = "result/{}_{}".format(args.algo, date)
    config["result_dir"] = result_dir

    # parameters for training and inference
    # batch_size = 32
    EPISODES = args.epoch
    learning_start = 300
    # update_model_freq = args.batch_size
    update_model_freq = 1
    update_target_model_freq = 10

    if not args.inference:
        # build cityflow environment
        cityflow_config["saveReplay"] = True
        json.dump(cityflow_config, open(config["cityflow_config_file"], 'w'))
        env = CityFlowEnv(
            lane_phase_info=config["lane_phase_info"],
            intersection_id=config["intersection_id"],  # for single agent
            num_step=args.num_step,
            cityflow_config_file=config["cityflow_config_file"])

        # build agent
        config["state_size"] = env.state_size
        if args.algo == 'DQN':
            agent = DQNAgent(intersection_id,
                             state_size=config["state_size"],
                             action_size=config["action_size"],
                             batch_size=config["batch_size"],
                             phase_list=phase_list,
                             env=env)

        elif args.algo == 'DDQN':
            agent = DDQNAgent(config)
        elif args.algo == 'DuelDQN':
            agent = DuelingDQNAgent(config)

        # make dirs
        if not os.path.exists("model"):
            os.makedirs("model")
        if not os.path.exists("result"):
            os.makedirs("result")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        if not os.path.exists(result_dir):
            os.makedirs(result_dir)

        # training
        total_step = 0
        episode_rewards = []
        episode_scores = []
        with tqdm(total=EPISODES * args.num_step) as pbar:
            for i in range(EPISODES):
                # print("episode: {}".format(i))
                env.reset()
                state = env.get_state()

                episode_length = 0
                episode_reward = 0
                episode_score = 0
                while episode_length < args.num_step:

                    action = agent.choose_action_(state)  # index of action
                    action_phase = phase_list[action]  # actual action
                    # no yellow light
                    next_state, reward = env.step(action_phase)  # one step
                    # last_action_phase = action_phase
                    episode_length += 1
                    total_step += 1
                    episode_reward += reward
                    episode_score += env.get_score()

                    for _ in range(args.phase_step - 1):
                        next_state, reward_ = env.step(action_phase)
                        reward += reward_

                    reward /= args.phase_step

                    pbar.update(1)
                    # store to replay buffer
                    if episode_length > learning_start:
                        agent.remember(state, action_phase, reward, next_state)

                    state = next_state

                    # training
                    if episode_length > learning_start and total_step % update_model_freq == 0:
                        if len(agent.memory) > args.batch_size:
                            agent.replay()

                    # update target Q netwark
                    if episode_length > learning_start and total_step % update_target_model_freq == 0:
                        agent.update_target_network()

                    # logging
                    # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}"
                    #             .format(i+1, EPISODES, total_step, action, reward))
                    pbar.set_description(
                        "total_step:{}, episode:{}, episode_step:{}, reward:{}"
                        .format(total_step, i + 1, episode_length, reward))

                # save episode rewards
                episode_rewards.append(
                    episode_reward /
                    args.num_step)  # record episode mean reward
                episode_scores.append(episode_score)
                print("score: {}, mean reward:{}".format(
                    episode_score, episode_reward / args.num_step))

                # save model
                if (i + 1) % args.save_freq == 0:
                    if args.algo != 'DuelDQN':
                        agent.model.save(model_dir +
                                         "/{}-{}.h5".format(args.algo, i + 1))
                    else:
                        agent.save(model_dir + "/{}-ckpt".format(args.algo),
                                   i + 1)

                    # save reward to file
                    df = pd.DataFrame({"rewards": episode_rewards})
                    df.to_csv(result_dir + '/rewards.csv', index=None)

                    df = pd.DataFrame({"rewards": episode_scores})
                    df.to_csv(result_dir + '/scores.csv', index=None)

                    # save figure
                    plot_data_lists([episode_rewards], ['episode reward'],
                                    figure_name=result_dir + '/rewards.pdf')
                    plot_data_lists([episode_scores], ['episode score'],
                                    figure_name=result_dir + '/scores.pdf')

    else:
        # inference
        cityflow_config["saveReplay"] = True
        json.dump(cityflow_config, open(config["cityflow_config_file"], 'w'))
        env = CityFlowEnv(
            lane_phase_info=config["lane_phase_info"],
            intersection_id=config["intersection_id"],  # for single agent
            num_step=args.num_step,
            cityflow_config_file=config["cityflow_config_file"])
        env.reset()

        # build agent
        config["state_size"] = env.state_size
        if args.algo == 'DQN':
            agent = DQNAgent(intersection_id,
                             state_size=config["state_size"],
                             action_size=config["action_size"],
                             batch_size=config["batch_size"],
                             phase_list=phase_list,
                             env=env)

        elif args.algo == 'DDQN':
            agent = DDQNAgent(config)
        elif args.algo == 'DuelDQN':
            agent = DuelingDQNAgent(config)
        agent.load(args.ckpt)

        state = env.get_state()
        scores = []
        for i in range(args.num_step):
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            next_state, reward = env.step(action_phase)  # one step

            for _ in range(args.phase_step - 1):
                next_state, reward_ = env.step(action_phase)
                reward += reward_

            reward /= args.phase_step

            score = env.get_score()
            scores.append(score)
            state = next_state

            # logging
            logging.info("step:{}/{}, action:{}, reward:{}, score:{}".format(
                i + 1, args.num_step, action, reward, score))

        inf_result_dir = "result/" + args.ckpt.split("/")[1]
        df = pd.DataFrame({"inf_scores": scores})
        df.to_csv(inf_result_dir + '/inf_scores.csv', index=None)
        plot_data_lists([scores], ['inference scores'],
                        figure_name=inf_result_dir + '/inf_scores.pdf')
示例#9
0
def main():
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=3000,
                        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--algo', type=str, default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm')
    parser.add_argument('--inference', action="store_true", help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch', type=int, default=30, help='number of training epochs')
    parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency')

    args = parser.parse_args()


    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1
    #state_size = config["state_size"] = 25
    # the single dimension appended to the tail is for the current phase.
    # [vehicle_count for each start lane] + [current_phase]
    logging.info('state size:%s' % state_size)
    config["action_size"] = len(phase_list)
    phase_list = [1,2,3,4,5,6,7,8]
    # build cityflow environment
    env = CityFlowEnv(config)
    EPISODES = 1
    num_step = config['num_step']
    state_size = config['state_size']
    total_step = 0
    #num_step = 10
    with tqdm(total=EPISODES*args.num_step) as pbar:
        for i in range(1, EPISODES+1):
            logging.info('EPISODE >>:%s' % i)
            episode_length = 1
            env.reset()
            t=0
            state = env.get_state()
            state = np.array(list(state['start_lane_vehicle_count'].values()) + [
                state['current_phase']])  # a sample state definition
            # print ('state1:', state)
            state = np.reshape(state, [1, state_size])
            print('state2:', state)
            agent = QLAgent(starting_state=env.get_rl_state(),
                                 state_space=1,
                                 action_space=env.action_space,
                                 alpha=0.1,
                                 gamma=0.99,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                                    min_epsilon=0.005,
                                                                    decay=1.0))

            last_action = phase_list[agent.act(state)]
            print('last action:', last_action)

            print('episode_length:{}, num_step:{}'.format(episode_length, num_step))
            while episode_length < num_step:
                #logging.info('current state:%s' % state)
                logging.info('EPISODE LENGTH >>%s' % episode_length)
                action = agent.act(state)  # index of action
                logging.info('new action:%s' % action)
                action_phase = phase_list[action]  # actual action
                logging.info('action phase:>>%s' % action_phase)
                next_state, reward = env.step(action_phase)  # one step
                logging.info('STATE>>:%s' % next_state)
                logging.info('ACTION PHASE:{}'.format(action_phase))
                logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time))
                logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time()))
                #for n_s in next_state.iteritems():
                #    logging.info(n_s)
                logging.info('REWARD:%s' % reward)

                # last_action_phase = action_phase
                episode_length += 1
                total_step += 1

                pbar.update(1)
                # store to replay buffer
                # prepare state
                agent.learn(new_state=env.get_rl_state(), reward=reward)

                env._compute_step_info()

                state = next_state
                logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}"
                             .format(i, EPISODES, total_step, action, reward))
                pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, "
                                     "reward:{reward}")

    env.save_csv()
示例#10
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=10**3)
    args = parser.parse_args()

    # preparing config
    # # for rnvironment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    config["state_size"] = len(
        config['lane_phase_info'][intersection_id]['start_lane']
    ) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    config["action_size"] = len(phase_list)

    # build cotyflow environment
    env = CityFlowEnv(config)

    # build learner
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    coord = tf.train.Coordinator()

    reward_clip = agent_config.reward_clip[1]
    lock = threading.Lock()
    agent = IMPALAAgent(sess=sess,
                        name='global',
                        unroll=agent_config.unroll,
                        state_shape=agent_config.state_shape,
                        output_size=agent_config.output_size,
                        activation=agent_config.activation,
                        final_activation=agent_config.final_activation,
                        hidden=agent_config.hidden,
                        coef=agent_config.entropy_coef,
                        reward_clip=reward_clip)

    # build agents
    n_threads = 16

    thread_list = []

    for i in range(n_threads):
        single_agent = async_agent.Agent(session=sess,
                                         coord=coord,
                                         name='thread_{}'.format(i),
                                         global_network=agent,
                                         reward_clip=reward_clip,
                                         lock=lock)

        thread_list.append(single_agent)

    init = tf.global_variables_initializer()
    sess.run(init)

    for t in thread_list:
        t.start()

    # training
    batch_size = 32
    EPISODES = 11
    learning_start = 300
    update_model_freq = 300
    update_target_model_freq = 1500
    num_step = config['num_step']
    state_size = config['state_size']

    ### the dqp learning code
    if not os.path.exists("model"):
        os.makedirs("model")
    model_dir = "model/{}".format(date)
    os.makedirs(model_dir)

    total_step = 0
    for i in range(EPISODES):
        env.reset()
        state = env.get_state()
        state = np.array(
            list(state['start_lane_vehicle_count'].values()) +
            [state['current_phase']])
        state = np.reshape(state, [1, state_size])

        episode_length = 0
        while episode_length < num_step:
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            # no yellow light
            next_state, reward = env.step(action_phase)  # one step
            last_action_phase = action_phase
            episode_length += 1
            total_step += 1

            # store to replay buffer
            next_state = np.array(
                list(next_state['start_lane_vehicle_count'].values()) +
                [next_state['current_phase']])
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action_phase, reward, next_state)

            state = next_state

            # training
            if total_step > learning_start and total_step % update_model_freq == 0:
                agent.replay()

            # update target Q netwark
            if total_step > learning_start and total_step % update_target_model_freq == 0:
                agent.update_target_network()

            # log
            logging.info(
                "episode:{}/{}, total_step:{}, action:{}, reward:{}".format(
                    i, EPISODES, total_step, action, reward))

        # save model
        if i % 10 == 0:
            agent.model.save(model_dir + "/dqn-{}.h5".format(i))
示例#11
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=10**3)
    args = parser.parse_args()

    # preparing config
    # # for rnvironment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    config["state_size"] = len(
        config['lane_phase_info'][intersection_id]['start_lane']
    ) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    config["action_size"] = len(phase_list)

    # build cotyflow environment
    env = CityFlowEnv(config)

    # build agent
    agent = DQNAgent(config)

    # training
    batch_size = 32
    EPISODES = 11
    learning_start = 300
    update_model_freq = 300
    update_target_model_freq = 1500
    num_step = config['num_step']
    state_size = config['state_size']

    ### the dqp learning code
    if not os.path.exists("model"):
        os.makedirs("model")
    model_dir = "model/{}".format(date)
    os.makedirs(model_dir)

    total_step = 0
    for i in range(EPISODES):
        env.reset()
        state = env.get_state()
        state = np.array(
            list(state['start_lane_vehicle_count'].values()) +
            [state['current_phase']])
        state = np.reshape(state, [1, state_size])

        episode_length = 0
        while episode_length < num_step:
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            # no yellow light
            next_state, reward = env.step(action_phase)  # one step
            last_action_phase = action_phase
            episode_length += 1
            total_step += 1

            # store to replay buffer
            next_state = np.array(
                list(next_state['start_lane_vehicle_count'].values()) +
                [next_state['current_phase']])
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action_phase, reward, next_state)

            state = next_state

            # training
            if total_step > learning_start and total_step % update_model_freq == 0:
                agent.replay()

            # update target Q netwark
            if total_step > learning_start and total_step % update_target_model_freq == 0:
                agent.update_target_network()

            # log
            logging.info(
                "episode:{}/{}, total_step:{}, action:{}, reward:{}".format(
                    i, EPISODES, total_step, action, reward))

        # save model
        if i % 10 == 0:
            agent.model.save(model_dir + "/dqn-{}.h5".format(i))
示例#12
0
args = parse_arguments()
roadnet = 'data/{}/roadnet.json'.format(args.scenario)

## configuration for both environment and agent
config = {
    'scenario': args.scenario,
    'data': 'data/{}'.format(args.scenario),
    'roadnet': roadnet,
    'flow': 'data/{}/flow.json'.format(args.scenario),
    #'replay_data_path': 'data/frontend/web',
    'num_step': args.num_step,
    'lane_phase_info': parse_roadnet(roadnet) # get lane and phase mapping by parsing the roadnet
}

env = CityFlowEnv(config)
agent = SOTLAgent(config)

# reset initially
t = 0
env.reset()
last_action = agent.choose_action(env.get_state())

while t < config['num_step']:
    state = env.get_state()
    action = agent.choose_action(state)
    if action == last_action:
        env.step(action)
    else:
        for _ in range(env.yellow_time):
            env.step(0)  # required yellow time