def run(self): self.env = CityFlowEnv( path_to_log=self.dic_path["PATH_TO_LOG"], path_to_work_directory=self.dic_path["PATH_TO_DATA"], dic_traffic_env_conf=self.dic_traffic_env_conf) while True: command, data = self.remote.recv() if command == 'step': observation, reward, done, info = self.env.step(data) if done: self.env.bulk_log() self.try_reset() #observation = self.try_reset() self.remote.send((observation, reward, done, self.task_id, info, self.done)) elif command == 'reset': observation = self.try_reset() self.remote.send((observation, self.task_id)) elif command == 'reset_task': self.env.unwrapped.reset_task(data) self.remote.send(True) elif command == 'close': self.remote.close() break elif command == 'get_spaces': self.remote.send((self.env.observation_space, self.env.action_space)) elif command == 'bulk_log': self.env.bulk_log() self.remote.send(True) else: raise NotImplementedError()
def __init__(self, remote, dic_path, dic_traffic_env_conf, queue, lock): super(EnvWorker, self).__init__() self.remote = remote self.dic_path = dic_path self.dic_traffic_env_conf = dic_traffic_env_conf self.queue = queue self.lock = lock self.task_id = None self.done = False self.env = CityFlowEnv( path_to_log=self.dic_path["PATH_TO_LOG"], path_to_work_directory=self.dic_path["PATH_TO_DATA"], dic_traffic_env_conf=self.dic_traffic_env_conf)
def main(): logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=2000) parser.add_argument('--ckpt', type=str) parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = "intersection_1_1" config["intersection_id"] = intersection_id config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] phase_list = config['lane_phase_info'][intersection_id]['phase'] config["action_size"] = len(phase_list) config["batch_size"] = args.batch_size logging.info(phase_list) # build cityflow environment env = CityFlowEnv(config) # build agent agent = DQNAgent(config) # inference agent.load(args.ckpt) env.reset() state = env.get_state() for i in range(args.num_step): action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action next_state, reward = env.step(action_phase) # one step state = next_state # logging logging.info("step:{}/{}, action:{}, reward:{}" .format(i, args.num_step, action, reward))
def single_test_sample(self, policy, task, batch_id, params): policy.load_params(params) dic_traffic_env_conf = copy.deepcopy(self.dic_traffic_env_conf) dic_traffic_env_conf['TRAFFIC_FILE'] = task dic_path = copy.deepcopy(self.dic_path) dic_path["PATH_TO_LOG"] = os.path.join( dic_path['PATH_TO_WORK_DIRECTORY'], 'test_round', task, 'tasks_round_' + str(batch_id)) if not os.path.exists(dic_path['PATH_TO_LOG']): os.makedirs(dic_path['PATH_TO_LOG']) dic_exp_conf = copy.deepcopy(self.dic_exp_conf) env = CityFlowEnv(path_to_log=dic_path["PATH_TO_LOG"], path_to_work_directory=dic_path["PATH_TO_DATA"], dic_traffic_env_conf=dic_traffic_env_conf) done = False state = env.reset() step_num = 0 stop_cnt = 0 while not done and step_num < int( dic_exp_conf["EPISODE_LEN"] / dic_traffic_env_conf["MIN_ACTION_TIME"]): action_list = [] for one_state in state: action = policy.choose_action( [[one_state]], test=True ) # one for multi-state, the other for multi-intersection action_list.append(action[0]) # for multi-state next_state, reward, done, _ = env.step(action_list) state = next_state step_num += 1 stop_cnt += 1 env.bulk_log() write_summary(dic_path, task, self.dic_exp_conf["EPISODE_LEN"], batch_id, self.dic_traffic_env_conf['FLOW_FILE'])
def play(self, round, task, if_gui=False): dic_traffic_env_conf = copy.deepcopy(self.dic_traffic_env_conf) dic_traffic_env_conf['TRAFFIC_FILE'] = task if if_gui: dic_traffic_env_conf['SAVEREPLAY'] = True env = CityFlowEnv(path_to_log=os.path.join(self.work_path, 'test_round'), path_to_work_directory=self.data_path, dic_traffic_env_conf=dic_traffic_env_conf) policy = config.DIC_AGENTS[self.dic_exp_conf['MODEL_NAME']]( dic_agent_conf=self.dic_agent_conf, dic_traffic_env_conf=dic_traffic_env_conf, dic_path=None ) params = pkl.load(open(os.path.join(self.model_path, 'params_%d.pkl'%round), 'rb')) policy.load_params(params) done = False state = env.reset() step_num = 0 stop_cnt = 0 while not done and step_num < int( self.dic_exp_conf["EPISODE_LEN"] / dic_traffic_env_conf["MIN_ACTION_TIME"]): action_list = [] for one_state in state: action = policy.choose_action([[one_state]], test=True) # one for multi-state, the other for multi-intersection action_list.append(action[0]) # for multi-state next_state, reward, done, _ = env.step(action_list) state = next_state step_num += 1 stop_cnt += 1 env.bulk_log()
class EnvWorker(mp.Process): def __init__(self, remote, dic_path, dic_traffic_env_conf, queue, lock): super(EnvWorker, self).__init__() self.remote = remote self.dic_path = dic_path self.dic_traffic_env_conf = dic_traffic_env_conf self.queue = queue self.lock = lock self.task_id = None self.done = False self.env = CityFlowEnv( path_to_log=self.dic_path["PATH_TO_LOG"], path_to_work_directory=self.dic_path["PATH_TO_DATA"], dic_traffic_env_conf=self.dic_traffic_env_conf) def empty_step(self): observation = [{ 'cur_phase': [0], 'lane_num_vehicle': [0, 0, 0, 0, 0, 0, 0, 0] }] reward, done = [0.0], True return observation, reward, done, [] def try_reset(self): with self.lock: try: self.task_id = self.queue.get(True) self.done = (self.task_id is None) except queue.Empty: self.done = True if not self.done: if "test_round" not in self.dic_path['PATH_TO_LOG']: new_path_to_log = os.path.join(self.dic_path['PATH_TO_LOG'], 'episode_%d' % (self.task_id)) else: new_path_to_log = self.dic_path['PATH_TO_LOG'] self.env.modify_path_to_log(new_path_to_log) if not os.path.exists(new_path_to_log): os.makedirs(new_path_to_log) state = self.env.reset() #observation = (np.zeros(self.env.observation_space.shape, # dtype=np.float32) if self.done else self.env.reset()) return state else: return False def run(self): while True: command, data = self.remote.recv() if command == 'step': observation, reward, done, info = self.env.step(data) if done: self.env.bulk_log() self.try_reset() #observation = self.try_reset() self.remote.send( (observation, reward, done, self.task_id, info, self.done)) elif command == 'reset': observation = self.try_reset() self.remote.send((observation, self.task_id)) elif command == 'reset_task': self.env.unwrapped.reset_task(data) self.remote.send(True) elif command == 'close': self.remote.close() break elif command == 'get_spaces': self.remote.send( (self.env.observation_space, self.env.action_space)) elif command == 'bulk_log': self.env.bulk_log() self.remote.send(True) else: raise NotImplementedError()
#'replay_data_path': 'data/frontend/web', 'num_step': args.num_step, 'lane_phase_info': parse_roadnet( roadnet) # get lane and phase mapping by parsing the roadnet } intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] config['state_size'] = len( config['lane_phase_info'][intersection_id]['start_lane']) + 1 config['action_size'] = len(phase_list) # add visible gpu if necessary os.environ["CUDA_VISIBLE_DEVICES"] = '' env = CityFlowEnv(config) agent = DQNAgent(config) # some parameters in dqn batch_size = 32 EPISODES = 100 learning_start = 300 update_model_freq = 300 update_target_model_freq = 1500 num_step = config['num_step'] state_size = config['state_size'] for e in range(EPISODES): # reset initially at each episode env.reset() t = 0
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json', help='config file') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=10, help='number of training epochs') parser.add_argument( '--num_step', type=int, default=200, help='number of timesteps for one episode, and for inference') parser.add_argument('--save_freq', type=int, default=1, help='model saving frequency') parser.add_argument('--batch_size', type=int, default=64, help='batchsize for training') parser.add_argument('--phase_step', type=int, default=15, help='seconds of one phase') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step assert "1x1" in config[ 'cityflow_config_file'], "please use 1x1 config file for cityflow" # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] config["intersection_id"] = intersection_id phase_list = config['lane_phase_info'][config["intersection_id"]]['phase'] config["action_size"] = len(phase_list) config["batch_size"] = args.batch_size logging.info(phase_list) model_dir = "model/{}_{}".format(args.algo, date) result_dir = "result/{}_{}".format(args.algo, date) config["result_dir"] = result_dir # parameters for training and inference # batch_size = 32 EPISODES = args.epoch learning_start = 300 # update_model_freq = args.batch_size update_model_freq = 1 update_target_model_freq = 10 if not args.inference: # build cityflow environment cityflow_config["saveReplay"] = True json.dump(cityflow_config, open(config["cityflow_config_file"], 'w')) env = CityFlowEnv( lane_phase_info=config["lane_phase_info"], intersection_id=config["intersection_id"], # for single agent num_step=args.num_step, cityflow_config_file=config["cityflow_config_file"]) # build agent config["state_size"] = env.state_size if args.algo == 'DQN': agent = DQNAgent(intersection_id, state_size=config["state_size"], action_size=config["action_size"], batch_size=config["batch_size"], phase_list=phase_list, env=env) elif args.algo == 'DDQN': agent = DDQNAgent(config) elif args.algo == 'DuelDQN': agent = DuelingDQNAgent(config) # make dirs if not os.path.exists("model"): os.makedirs("model") if not os.path.exists("result"): os.makedirs("result") if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(result_dir): os.makedirs(result_dir) # training total_step = 0 episode_rewards = [] episode_scores = [] with tqdm(total=EPISODES * args.num_step) as pbar: for i in range(EPISODES): # print("episode: {}".format(i)) env.reset() state = env.get_state() episode_length = 0 episode_reward = 0 episode_score = 0 while episode_length < args.num_step: action = agent.choose_action_(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step # last_action_phase = action_phase episode_length += 1 total_step += 1 episode_reward += reward episode_score += env.get_score() for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) reward += reward_ reward /= args.phase_step pbar.update(1) # store to replay buffer if episode_length > learning_start: agent.remember(state, action_phase, reward, next_state) state = next_state # training if episode_length > learning_start and total_step % update_model_freq == 0: if len(agent.memory) > args.batch_size: agent.replay() # update target Q netwark if episode_length > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # logging # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}" # .format(i+1, EPISODES, total_step, action, reward)) pbar.set_description( "total_step:{}, episode:{}, episode_step:{}, reward:{}" .format(total_step, i + 1, episode_length, reward)) # save episode rewards episode_rewards.append( episode_reward / args.num_step) # record episode mean reward episode_scores.append(episode_score) print("score: {}, mean reward:{}".format( episode_score, episode_reward / args.num_step)) # save model if (i + 1) % args.save_freq == 0: if args.algo != 'DuelDQN': agent.model.save(model_dir + "/{}-{}.h5".format(args.algo, i + 1)) else: agent.save(model_dir + "/{}-ckpt".format(args.algo), i + 1) # save reward to file df = pd.DataFrame({"rewards": episode_rewards}) df.to_csv(result_dir + '/rewards.csv', index=None) df = pd.DataFrame({"rewards": episode_scores}) df.to_csv(result_dir + '/scores.csv', index=None) # save figure plot_data_lists([episode_rewards], ['episode reward'], figure_name=result_dir + '/rewards.pdf') plot_data_lists([episode_scores], ['episode score'], figure_name=result_dir + '/scores.pdf') else: # inference cityflow_config["saveReplay"] = True json.dump(cityflow_config, open(config["cityflow_config_file"], 'w')) env = CityFlowEnv( lane_phase_info=config["lane_phase_info"], intersection_id=config["intersection_id"], # for single agent num_step=args.num_step, cityflow_config_file=config["cityflow_config_file"]) env.reset() # build agent config["state_size"] = env.state_size if args.algo == 'DQN': agent = DQNAgent(intersection_id, state_size=config["state_size"], action_size=config["action_size"], batch_size=config["batch_size"], phase_list=phase_list, env=env) elif args.algo == 'DDQN': agent = DDQNAgent(config) elif args.algo == 'DuelDQN': agent = DuelingDQNAgent(config) agent.load(args.ckpt) state = env.get_state() scores = [] for i in range(args.num_step): action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action next_state, reward = env.step(action_phase) # one step for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) reward += reward_ reward /= args.phase_step score = env.get_score() scores.append(score) state = next_state # logging logging.info("step:{}/{}, action:{}, reward:{}, score:{}".format( i + 1, args.num_step, action, reward, score)) inf_result_dir = "result/" + args.ckpt.split("/")[1] df = pd.DataFrame({"inf_scores": scores}) df.to_csv(inf_result_dir + '/inf_scores.csv', index=None) plot_data_lists([scores], ['inference scores'], figure_name=inf_result_dir + '/inf_scores.pdf')
def main(): date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=3000, help='number of timesteps for one episode, and for inference') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=30, help='number of training epochs') parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 #state_size = config["state_size"] = 25 # the single dimension appended to the tail is for the current phase. # [vehicle_count for each start lane] + [current_phase] logging.info('state size:%s' % state_size) config["action_size"] = len(phase_list) phase_list = [1,2,3,4,5,6,7,8] # build cityflow environment env = CityFlowEnv(config) EPISODES = 1 num_step = config['num_step'] state_size = config['state_size'] total_step = 0 #num_step = 10 with tqdm(total=EPISODES*args.num_step) as pbar: for i in range(1, EPISODES+1): logging.info('EPISODE >>:%s' % i) episode_length = 1 env.reset() t=0 state = env.get_state() state = np.array(list(state['start_lane_vehicle_count'].values()) + [ state['current_phase']]) # a sample state definition # print ('state1:', state) state = np.reshape(state, [1, state_size]) print('state2:', state) agent = QLAgent(starting_state=env.get_rl_state(), state_space=1, action_space=env.action_space, alpha=0.1, gamma=0.99, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=1.0)) last_action = phase_list[agent.act(state)] print('last action:', last_action) print('episode_length:{}, num_step:{}'.format(episode_length, num_step)) while episode_length < num_step: #logging.info('current state:%s' % state) logging.info('EPISODE LENGTH >>%s' % episode_length) action = agent.act(state) # index of action logging.info('new action:%s' % action) action_phase = phase_list[action] # actual action logging.info('action phase:>>%s' % action_phase) next_state, reward = env.step(action_phase) # one step logging.info('STATE>>:%s' % next_state) logging.info('ACTION PHASE:{}'.format(action_phase)) logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time)) logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time())) #for n_s in next_state.iteritems(): # logging.info(n_s) logging.info('REWARD:%s' % reward) # last_action_phase = action_phase episode_length += 1 total_step += 1 pbar.update(1) # store to replay buffer # prepare state agent.learn(new_state=env.get_rl_state(), reward=reward) env._compute_step_info() state = next_state logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}" .format(i, EPISODES, total_step, action, reward)) pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, " "reward:{reward}") env.save_csv()
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=10**3) args = parser.parse_args() # preparing config # # for rnvironment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) config["state_size"] = len( config['lane_phase_info'][intersection_id]['start_lane'] ) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] config["action_size"] = len(phase_list) # build cotyflow environment env = CityFlowEnv(config) # build learner tf.reset_default_graph() sess = tf.InteractiveSession() coord = tf.train.Coordinator() reward_clip = agent_config.reward_clip[1] lock = threading.Lock() agent = IMPALAAgent(sess=sess, name='global', unroll=agent_config.unroll, state_shape=agent_config.state_shape, output_size=agent_config.output_size, activation=agent_config.activation, final_activation=agent_config.final_activation, hidden=agent_config.hidden, coef=agent_config.entropy_coef, reward_clip=reward_clip) # build agents n_threads = 16 thread_list = [] for i in range(n_threads): single_agent = async_agent.Agent(session=sess, coord=coord, name='thread_{}'.format(i), global_network=agent, reward_clip=reward_clip, lock=lock) thread_list.append(single_agent) init = tf.global_variables_initializer() sess.run(init) for t in thread_list: t.start() # training batch_size = 32 EPISODES = 11 learning_start = 300 update_model_freq = 300 update_target_model_freq = 1500 num_step = config['num_step'] state_size = config['state_size'] ### the dqp learning code if not os.path.exists("model"): os.makedirs("model") model_dir = "model/{}".format(date) os.makedirs(model_dir) total_step = 0 for i in range(EPISODES): env.reset() state = env.get_state() state = np.array( list(state['start_lane_vehicle_count'].values()) + [state['current_phase']]) state = np.reshape(state, [1, state_size]) episode_length = 0 while episode_length < num_step: action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step last_action_phase = action_phase episode_length += 1 total_step += 1 # store to replay buffer next_state = np.array( list(next_state['start_lane_vehicle_count'].values()) + [next_state['current_phase']]) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action_phase, reward, next_state) state = next_state # training if total_step > learning_start and total_step % update_model_freq == 0: agent.replay() # update target Q netwark if total_step > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # log logging.info( "episode:{}/{}, total_step:{}, action:{}, reward:{}".format( i, EPISODES, total_step, action, reward)) # save model if i % 10 == 0: agent.model.save(model_dir + "/dqn-{}.h5".format(i))
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=10**3) args = parser.parse_args() # preparing config # # for rnvironment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) config["state_size"] = len( config['lane_phase_info'][intersection_id]['start_lane'] ) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] config["action_size"] = len(phase_list) # build cotyflow environment env = CityFlowEnv(config) # build agent agent = DQNAgent(config) # training batch_size = 32 EPISODES = 11 learning_start = 300 update_model_freq = 300 update_target_model_freq = 1500 num_step = config['num_step'] state_size = config['state_size'] ### the dqp learning code if not os.path.exists("model"): os.makedirs("model") model_dir = "model/{}".format(date) os.makedirs(model_dir) total_step = 0 for i in range(EPISODES): env.reset() state = env.get_state() state = np.array( list(state['start_lane_vehicle_count'].values()) + [state['current_phase']]) state = np.reshape(state, [1, state_size]) episode_length = 0 while episode_length < num_step: action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step last_action_phase = action_phase episode_length += 1 total_step += 1 # store to replay buffer next_state = np.array( list(next_state['start_lane_vehicle_count'].values()) + [next_state['current_phase']]) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action_phase, reward, next_state) state = next_state # training if total_step > learning_start and total_step % update_model_freq == 0: agent.replay() # update target Q netwark if total_step > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # log logging.info( "episode:{}/{}, total_step:{}, action:{}, reward:{}".format( i, EPISODES, total_step, action, reward)) # save model if i % 10 == 0: agent.model.save(model_dir + "/dqn-{}.h5".format(i))
args = parse_arguments() roadnet = 'data/{}/roadnet.json'.format(args.scenario) ## configuration for both environment and agent config = { 'scenario': args.scenario, 'data': 'data/{}'.format(args.scenario), 'roadnet': roadnet, 'flow': 'data/{}/flow.json'.format(args.scenario), #'replay_data_path': 'data/frontend/web', 'num_step': args.num_step, 'lane_phase_info': parse_roadnet(roadnet) # get lane and phase mapping by parsing the roadnet } env = CityFlowEnv(config) agent = SOTLAgent(config) # reset initially t = 0 env.reset() last_action = agent.choose_action(env.get_state()) while t < config['num_step']: state = env.get_state() action = agent.choose_action(state) if action == last_action: env.step(action) else: for _ in range(env.yellow_time): env.step(0) # required yellow time