def take_action_pre_train(self, phase_time_now): current_phase_number = self.get_current_phase() rewards_detail_dict_list = [] if (self.current_phase_duration < phase_time_now[current_phase_number]): action = 0 else: action = 1 for i in range(self.para_set.MIN_ACTION_TIME): action_in_second = 0 current_phase_number = self.get_current_phase() if action == 1 and i == 0: action_in_second = 1 self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run( action=action_in_second, current_phase=current_phase_number, current_phase_duration=self.current_phase_duration, vehicle_dict=self.dic_vehicles, rewards_info_dict=self.para_set.REWARDS_INFO_DICT, f_log_rewards=self.f_log_rewards, rewards_detail_dict_list=rewards_detail_dict_list ) # run 1s SUMO reward = self.cal_reward_from_list(rewards_detail_dict_list) #self.update_vehicles() self.update_state() return reward, action
def take_action(self, action): current_phase_number = self.get_current_phase() rewards_detail_dict_list = [] if (self.current_phase_duration < self.para_set.MIN_PHASE_TIME[current_phase_number]): action = 0 for i in range(self.para_set.MIN_ACTION_TIME): action_in_second = 0 current_phase_number = self.get_current_phase() if action == 1 and i == 0: action_in_second = 1 # print("test", rewards_detail_dict_list) self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run(action=action_in_second, current_phase=current_phase_number, current_phase_duration=self.current_phase_duration, vehicle_dict=self.dic_vehicles, rewards_info_dict=self.para_set.REWARDS_INFO_DICT, f_log_rewards=self.f_log_rewards, rewards_detail_dict_list=rewards_detail_dict_list) # run 1s SUMO #reward, reward_detail_dict = self.cal_reward(action) # print("test1", rewards_detail_dict_list) reward = self.cal_reward_from_list(rewards_detail_dict_list) #self.update_vehicles() self.update_state() return reward, action
def take_action(self, action, phase): current_phase_number = self.get_current_phase() rewards_detail_dict_list = [] if int(action) == int(phase): changed = False else: changed = True # 현재 신호가 최소 지속 시간을 넘지 않았다면 a # MIN_ACTION_TIME 까지 돌아라(5까지) for i in range(self.ParaSet.MIN_ACTION_TIME): # action time 동안에 action_in_second = self.get_current_phase() current_phase_number = self.get_current_phase() #### 이 부분 바꿔야함!### # a가 바꾸는 거라면 일단 첫 액션은 바꾸는 걸로 해라 if changed and i == 0: action_in_second = action # 현재 상황에서 actioninsecond을 주었을 때의 결과 self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run( action=action_in_second, current_phase=current_phase_number, current_phase_duration=self.current_phase_duration, vehicle_dict=self.dic_vehicles, rewards_info_dict=self.ParaSet.REWARDS_INFO_DICT, f_log_rewards=self.f_log_rewards, changed=changed, rewards_detail_dict_list=rewards_detail_dict_list ) # run 1s SUMO changed = False reward = self.cal_reward_from_list(rewards_detail_dict_list) self.update_state() return reward, action