Exemplo n.º 1
0
    def take_action_pre_train(self, phase_time_now):
        current_phase_number = self.get_current_phase()
        rewards_detail_dict_list = []
        if (self.current_phase_duration <
                phase_time_now[current_phase_number]):
            action = 0
        else:
            action = 1
        for i in range(self.para_set.MIN_ACTION_TIME):
            action_in_second = 0
            current_phase_number = self.get_current_phase()
            if action == 1 and i == 0:
                action_in_second = 1
            self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run(
                action=action_in_second,
                current_phase=current_phase_number,
                current_phase_duration=self.current_phase_duration,
                vehicle_dict=self.dic_vehicles,
                rewards_info_dict=self.para_set.REWARDS_INFO_DICT,
                f_log_rewards=self.f_log_rewards,
                rewards_detail_dict_list=rewards_detail_dict_list
            )  # run 1s SUMO
        reward = self.cal_reward_from_list(rewards_detail_dict_list)

        #self.update_vehicles()
        self.update_state()

        return reward, action
Exemplo n.º 2
0
    def take_action(self, action):
        current_phase_number = self.get_current_phase()
        rewards_detail_dict_list = []
        if (self.current_phase_duration < self.para_set.MIN_PHASE_TIME[current_phase_number]):
            action = 0
        for i in range(self.para_set.MIN_ACTION_TIME):
            action_in_second = 0
            current_phase_number = self.get_current_phase()
            if action == 1 and i == 0:
                action_in_second = 1
            
            # print("test", rewards_detail_dict_list)
            self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run(action=action_in_second,
                                                                               current_phase=current_phase_number,
                                                                               current_phase_duration=self.current_phase_duration,
                                                                               vehicle_dict=self.dic_vehicles,
                                                                               rewards_info_dict=self.para_set.REWARDS_INFO_DICT,
                                                                               f_log_rewards=self.f_log_rewards,
                                                                               rewards_detail_dict_list=rewards_detail_dict_list)  # run 1s SUMO

        #reward, reward_detail_dict = self.cal_reward(action)
        # print("test1", rewards_detail_dict_list)
        reward = self.cal_reward_from_list(rewards_detail_dict_list)
        #self.update_vehicles()
        self.update_state()

        return reward, action
Exemplo n.º 3
0
    def take_action(self, action, phase):
        current_phase_number = self.get_current_phase()
        rewards_detail_dict_list = []
        if int(action) == int(phase):
            changed = False
        else:
            changed = True
        # 현재 신호가 최소 지속 시간을 넘지 않았다면 a
        # MIN_ACTION_TIME 까지 돌아라(5까지)
        for i in range(self.ParaSet.MIN_ACTION_TIME):
            # action time 동안에
            action_in_second = self.get_current_phase()
            current_phase_number = self.get_current_phase()
            #### 이 부분 바꿔야함!###
            # a가 바꾸는 거라면 일단 첫 액션은 바꾸는 걸로 해라
            if changed and i == 0:
                action_in_second = action
            # 현재 상황에서 actioninsecond을 주었을 때의 결과
            self.current_phase, self.current_phase_duration, self.vehicle_dict = map_computor.run(
                action=action_in_second,
                current_phase=current_phase_number,
                current_phase_duration=self.current_phase_duration,
                vehicle_dict=self.dic_vehicles,
                rewards_info_dict=self.ParaSet.REWARDS_INFO_DICT,
                f_log_rewards=self.f_log_rewards,
                changed=changed,
                rewards_detail_dict_list=rewards_detail_dict_list
            )  # run 1s SUMO
            changed = False

        reward = self.cal_reward_from_list(rewards_detail_dict_list)

        self.update_state()

        return reward, action