def run_one_episode_on_STL(self, episode, total_episodes): # load fixed route: simu_type = genorator.gen_route(episode, is_random = False) # load fixed_route. print('Run fixed_route on mode: ', self.get_simu_type_str(simu_type)) self._epsilon = 0.0 # all actions by model. traci.start(self._sumoCmd) # then, start sumo # INIT some vars: self._steps = 0 self._sum_intersection_queue = 0 # increases every step/seconds tot_neg_reward = 0 # total negative reward pre_wait_time = 0 # # INIT my vars: action = 0 # initial action old_action = 0 state = self._get_state(self.I) # run 1 simulation (maxsteps) while self._steps < self._max_steps: # reset current_wait_time: current_wait_time = 0 # select action (select index of action, then edit action_time) action = self._choose_action(state) # ================================ Take action ==================================================================== if self._steps != 0 and old_action != action: # just set traffic_light in sumo self._set_yellow_phase(old_action) # ver 2: dynamic yellow. yellow_duration_new = self.cal_yellow_duration(old_action) current_wait_time = self._simulate(yellow_duration_new) self._set_green_phase(action) current_wait_time = self._simulate(self._green_duration) # ================================================================================================================= # get next_state and reward next_state = self._get_state(self.I) reward = pre_wait_time - current_wait_time if reward < 0: tot_neg_reward += reward # reassign: pre_wait_time = current_wait_time state = next_state old_action = action # print print('step: ', self._steps, '|',self._max_steps,' || action: ', self.get_action_name(action), ': || negative reward: ', tot_neg_reward) # append data self._save_fixed_status(simu_type) # close gui. traci.close(wait = False)
def run_one_episode(self, simu_type, total_episodes): traffic_code_mode = genorator.gen_route(simu_type, is_random = False) # gen route file. print('Mode: ', self.get_simu_type_str(traffic_code_mode)) traci.start(self._sumoCmd) # then, start sumo traci.gui.setZoom('View #0',1500) traci.gui.setOffset('View #0',595,916) self.reset() # INIT some vars: self._steps = 0 self._sum_intersection_queue = 0 # increases every step/seconds tot_neg_reward = 0 # total negative reward pre_wait_time = 0 # # INIT my vars: new_action = 0 # initial action state = self._get_state(self.I) # test actions_count = [0,0] # count numb_times of picked actions. # run 1 simulation (1h30m) # while self._steps < self._max_steps: while (traci.simulation.getMinExpectedNumber() > 0): current_wait_time = 0 # select action (select index of action, then edit action_time) new_action = self._choose_action(state) # for writing log: actions_count[new_action] += 1 action_name = self.get_action_name(new_action) # ================================ Take new_action ==================================================================== if self._steps != 0 and old_action != new_action: self._set_yellow_phase(old_action) current_wait_time = self._simulate(self._yellow_duration) # take action: self._set_green_phase(new_action) current_wait_time = self._simulate(self._green_duration) # ====================================================================================================================== # get next_state and reward next_state = self._get_state(self.I) reward = pre_wait_time - current_wait_time self._neg_pos_reward_store.append(reward) if reward < 0: tot_neg_reward += reward # reassign: pre_wait_time = current_wait_time state = next_state old_action = new_action # print # print('step: ', self._steps, ' || action: ', action_name, ' || negative reward: ', tot_neg_reward) self._save_stats(traffic_code_mode, tot_neg_reward) # mode LOW + total neg-REWARD print("Total negative reward: {}, Eps: {}".format(tot_neg_reward, self._epsilon)) mode = self.get_simu_type_str(traffic_code_mode) log_path = constants.plot_path_trained_model + mode + '/' os.makedirs(os.path.dirname(log_path), exist_ok=True) log = open(log_path +'tog_neg_reward.txt', 'a') # open file text. text = mode + ' reward: ' + str(tot_neg_reward) + ' _sum_intersection_queue: ' + str(self._sum_intersection_queue) log.write(text) log.close() # close gui. traci.close(wait = False)
def run_one_episode_training(self, episode, total_episodes): # gen 3 way random route. simu_type = genorator.gen_route(episode, is_random = True) print('Training on random route with type: ', self.get_simu_type_str(simu_type)) self._epsilon = 1.0 - (episode / constants.total_ep_for_epislon) # setup epsilon traci.start(self._sumoCmd) # then, start sumo # INIT some vars: self._steps = 0 self._sum_intersection_queue = 0 # increases every step/seconds tot_neg_reward = 0 # total negative reward pre_wait_time = 0 # # INIT my vars: action = 0 # initial action old_action = 0 state = self._get_state(self.I) action_count = [0,0] # cal percent of actions good_bad_count = [0,0] # count good bad actions # run 1 simulation (maxsteps) while self._steps < self._max_steps: # reset current_wait_time: current_wait_time = 0 # select action (select index of action, then edit action_time) action = self._choose_action(state) # just count numb of taken actions. action_count[action] += 1 # ================================ Take action ==================================================================== if self._steps != 0 and old_action != action: # just set traffic_light in sumo self._set_yellow_phase(old_action) # ver 1: fixed yellow. # current_wait_time = self._simulate(self._yellow_duration) # ver 2: dynamic yellow. yellow_duration_new = self.cal_yellow_duration(old_action) current_wait_time = self._simulate(yellow_duration_new) self._set_green_phase(action) current_wait_time = self._simulate(self._green_duration) # ================================================================================================================= # get next_state and reward next_state = self._get_state(self.I) reward = pre_wait_time - current_wait_time if reward < 0: tot_neg_reward += reward good_bad_count[1] += 1 else: good_bad_count[0] += 1 # save tuple: self._memory.add_sample((state, action, reward, next_state)) # training: self._replay() # reassign: pre_wait_time = current_wait_time state = next_state old_action = action # print eval_this_action = 'Good action' if (reward>=0) else 'Bad Action' print('step: ', self._steps, '|',self._max_steps,' || action: ', self.get_action_name(action), ': ',eval_this_action,' || negative reward: ', tot_neg_reward) print('percent of actions: ', np.array(action_count)/sum(action_count)) print("Total negative reward: {}, Eps: {}".format(tot_neg_reward, self._epsilon)) # append data self._save_stats(simu_type,tot_neg_reward) # save array to disk: self.save_all_arrays() # close gui. traci.close(wait = False)
def run_fixed_duration(self, simu_type, durations_of_phases): traffic_code_mode = genorator.gen_route(simu_type, is_random = False) # gen route file. print('Fixed route, mode: ', self.get_simu_type_str(simu_type)) traci.start(self._sumoCmd) # then, start sumo # reset everything: self.reset() self._steps = 0 self._sum_intersection_queue = 0 tot_neg_reward = 0 pre_wait_time = 0 action = 0 # initial action # run 1 simulation (1h30m) while self._steps < self._max_steps: # reset current_wait_time: current_wait_time = 0 # ============================================================ Perform action ====================== # 0: EW green 1: EW yellow # 2: NS green 3: NS Yellow if action == 0: self._set_green_phase(action) current_wait_time = self._simulate(durations_of_phases[0]) elif action == 1: self._set_yellow_phase(0) current_wait_time = self._simulate(self._yellow_duration) elif action == 2: self._set_green_phase(1) current_wait_time = self._simulate(durations_of_phases[1]) elif action == 3: self._set_yellow_phase(1) current_wait_time = self._simulate(self._yellow_duration) # ================================================================================================================= reward = pre_wait_time - current_wait_time self._neg_pos_reward_store.append(reward) if reward < 0: tot_neg_reward += reward # reassign: pre_wait_time = current_wait_time # next action (phase): action = self.select_fixed_action(action) # print every step: # print('step: ', self._steps, ' || action: ', self.get_action_name(action), ' || negative reward: ', tot_neg_reward) self._save_stats(traffic_code_mode, tot_neg_reward) # mode LOW + total neg-REWARD print("Total negative reward: {}, Total_waiting_time: {}, AWT: {}".format(tot_neg_reward, self._sum_intersection_queue, self._avg_waiting_time_per_veh)) mode = self.get_simu_type_str(traffic_code_mode) log_path = constants.plot_path_fixed_sys + mode + '/' os.makedirs(os.path.dirname(log_path), exist_ok=True) log = open(log_path +'tog_neg_reward.txt', 'a') # open file text. text = mode + ' reward: ' + str(tot_neg_reward) + ' _sum_intersection_queue: ' + str(self._sum_intersection_queue) + ' || AWT: ' + str(self._avg_waiting_time_per_veh) + '\n' log.write(text) log.close() # close gui. traci.close(wait = False)