Exemplo n.º 1
0
    def run_one_episode_on_STL(self, episode, total_episodes):
        # load fixed route:
        simu_type = genorator.gen_route(episode, is_random = False)                                     # load fixed_route.
        print('Run fixed_route on mode: ', self.get_simu_type_str(simu_type))
        self._epsilon = 0.0                                                                             # all actions by model.
        traci.start(self._sumoCmd)														                # then, start sumo
        
        # INIT some vars:
        self._steps = 0
        self._sum_intersection_queue = 0		# increases every step/seconds
        tot_neg_reward = 0						# total negative reward
        pre_wait_time = 0						# 

        # INIT my vars:
        action = 0						        # initial action
        old_action = 0
        state = self._get_state(self.I)

        # run 1 simulation (maxsteps)
        while self._steps < self._max_steps:
            # reset current_wait_time:
            current_wait_time = 0

            # select action (select index of action, then edit action_time)
            action = self._choose_action(state)

            #  ================================ Take action ====================================================================
            if self._steps != 0 and old_action != action:
                # just set traffic_light in sumo
                self._set_yellow_phase(old_action)

                # ver 2: dynamic yellow.
                yellow_duration_new = self.cal_yellow_duration(old_action)
                current_wait_time = self._simulate(yellow_duration_new)     

            self._set_green_phase(action)
            current_wait_time = self._simulate(self._green_duration)
            #  =================================================================================================================

            # get next_state and reward
            next_state = self._get_state(self.I)

            reward = pre_wait_time - current_wait_time
            if reward < 0:
                tot_neg_reward += reward

            # reassign:
            pre_wait_time = current_wait_time

            state = next_state
            old_action = action

            # print
            print('step: ', self._steps, '|',self._max_steps,' || action: ', self.get_action_name(action), ':  || negative reward: ', tot_neg_reward)

        # append data
        self._save_fixed_status(simu_type)
        
        # close gui.
        traci.close(wait = False)
	def run_one_episode(self, simu_type, total_episodes):
		
		traffic_code_mode = genorator.gen_route(simu_type, is_random = False)						# gen route file.       
		print('Mode: ', self.get_simu_type_str(traffic_code_mode))
		traci.start(self._sumoCmd)														# then, start sumo

		traci.gui.setZoom('View #0',1500)
		traci.gui.setOffset('View #0',595,916)
		self.reset()
		# INIT some vars:
		self._steps = 0			
		self._sum_intersection_queue = 0		# increases every step/seconds
		tot_neg_reward = 0						# total negative reward
		pre_wait_time = 0						# 

		# INIT my vars:
		new_action = 0						# initial action
		state = self._get_state(self.I)

		# test
		actions_count = [0,0]				# count numb_times of picked actions.
		# run 1 simulation (1h30m)
		# while self._steps < self._max_steps:
		while (traci.simulation.getMinExpectedNumber() > 0):
			current_wait_time = 0

			# select action (select index of action, then edit action_time)
			new_action = self._choose_action(state)
			
			# for writing log:
			actions_count[new_action] += 1
			action_name = self.get_action_name(new_action)			

            #  ================================ Take new_action ====================================================================
			if self._steps != 0 and old_action != new_action:
				self._set_yellow_phase(old_action)
				current_wait_time = self._simulate(self._yellow_duration)           
			# take action:
			self._set_green_phase(new_action)
			current_wait_time = self._simulate(self._green_duration)
			#  ======================================================================================================================

			# get next_state and reward
			next_state = self._get_state(self.I)
			reward = pre_wait_time - current_wait_time
			self._neg_pos_reward_store.append(reward)

			if reward < 0:
				tot_neg_reward += reward
			
		
			# reassign:
			pre_wait_time = current_wait_time
			state = next_state
			old_action = new_action

			# print
			# print('step: ', self._steps, ' || action: ', action_name, ' || negative reward: ', tot_neg_reward)


		self._save_stats(traffic_code_mode, tot_neg_reward)		# mode LOW + total neg-REWARD

		print("Total negative reward: {}, Eps: {}".format(tot_neg_reward, self._epsilon))
		
		mode = self.get_simu_type_str(traffic_code_mode)
		log_path = constants.plot_path_trained_model + mode + '/'
		os.makedirs(os.path.dirname(log_path), exist_ok=True)
		log = open(log_path +'tog_neg_reward.txt', 'a')  # open file text.
		text = mode + ' reward: ' + str(tot_neg_reward) + ' _sum_intersection_queue: ' + str(self._sum_intersection_queue)
		log.write(text)
		log.close()

		# close gui.
		traci.close(wait = False)
Exemplo n.º 3
0
    def run_one_episode_training(self, episode, total_episodes):
        # gen 3 way random route.
        simu_type = genorator.gen_route(episode, is_random = True)
        
        print('Training on random route with type: ', self.get_simu_type_str(simu_type))
        self._epsilon = 1.0 - (episode / constants.total_ep_for_epislon)								# setup epsilon

        traci.start(self._sumoCmd)														                # then, start sumo

        # INIT some vars:
        self._steps = 0			
        self._sum_intersection_queue = 0		# increases every step/seconds
        tot_neg_reward = 0						# total negative reward
        pre_wait_time = 0						# 

        # INIT my vars:
        action = 0						        # initial action
        old_action = 0
        state = self._get_state(self.I)
        action_count = [0,0]                    # cal percent of actions
        good_bad_count = [0,0]                  #   count good bad actions

        # run 1 simulation (maxsteps)
        while self._steps < self._max_steps:
            # reset current_wait_time:
            current_wait_time = 0

            # select action (select index of action, then edit action_time)
            action = self._choose_action(state)
            # just count numb of taken actions.
            action_count[action] += 1

            #  ================================ Take action ====================================================================
            if self._steps != 0 and old_action != action:
                # just set traffic_light in sumo
                self._set_yellow_phase(old_action)
                
                # ver 1: fixed yellow.
                # current_wait_time = self._simulate(self._yellow_duration)

                # ver 2: dynamic yellow.
                yellow_duration_new = self.cal_yellow_duration(old_action)
                current_wait_time = self._simulate(yellow_duration_new)     

            self._set_green_phase(action)
            current_wait_time = self._simulate(self._green_duration)
            #  =================================================================================================================

            # get next_state and reward
            next_state = self._get_state(self.I)

            reward = pre_wait_time - current_wait_time
            if reward < 0:
                tot_neg_reward += reward
                good_bad_count[1] += 1
            else:
                good_bad_count[0] += 1
            
            # save tuple:			
            self._memory.add_sample((state, action, reward, next_state))
            
            # training:
            self._replay()

            # reassign:
            pre_wait_time = current_wait_time
            state = next_state
            old_action = action

            # print
            eval_this_action = 'Good action' if (reward>=0) else 'Bad Action'
            print('step: ', self._steps, '|',self._max_steps,' || action: ', self.get_action_name(action), ': ',eval_this_action,' || negative reward: ', tot_neg_reward)


        print('percent of actions: ', np.array(action_count)/sum(action_count))
        print("Total negative reward: {}, Eps: {}".format(tot_neg_reward, self._epsilon))
        
        # append data
        self._save_stats(simu_type,tot_neg_reward)		
        
        # save array to disk:
        self.save_all_arrays()

        # close gui.
        traci.close(wait = False)
Exemplo n.º 4
0
	def run_fixed_duration(self, simu_type, durations_of_phases):

		traffic_code_mode = genorator.gen_route(simu_type, is_random = False)								# gen route file.       

		print('Fixed route, mode: ', self.get_simu_type_str(simu_type))
		traci.start(self._sumoCmd)														# then, start sumo

		# reset everything:
		self.reset()
		self._steps = 0			
		self._sum_intersection_queue = 0            
		tot_neg_reward = 0
		pre_wait_time = 0		
		action = 0						# initial action

		# run 1 simulation (1h30m)
		while self._steps < self._max_steps:
			# reset current_wait_time:
			current_wait_time = 0
			#  ============================================================ Perform action ======================
			# 0: EW green   1: EW yellow
			# 2: NS  green  3: NS Yellow
			if action == 0:
				self._set_green_phase(action)
				current_wait_time = self._simulate(durations_of_phases[0])
			elif action == 1:
				self._set_yellow_phase(0)
				current_wait_time = self._simulate(self._yellow_duration)			
			elif action == 2:
				self._set_green_phase(1)
				current_wait_time = self._simulate(durations_of_phases[1])
			elif action == 3:
				self._set_yellow_phase(1)
				current_wait_time = self._simulate(self._yellow_duration)
			#  =================================================================================================================
				
			reward = pre_wait_time - current_wait_time
			self._neg_pos_reward_store.append(reward)

			if reward < 0:
				tot_neg_reward += reward
				
			# reassign:
			pre_wait_time = current_wait_time

			# next action (phase):
			action = self.select_fixed_action(action)

			# print every step:
			# print('step: ', self._steps, ' || action: ',  self.get_action_name(action), ' || negative reward: ', tot_neg_reward)


		self._save_stats(traffic_code_mode, tot_neg_reward)		# mode LOW + total neg-REWARD

		print("Total negative reward: {}, Total_waiting_time: {}, AWT: {}".format(tot_neg_reward, self._sum_intersection_queue, self._avg_waiting_time_per_veh))
		
		mode = self.get_simu_type_str(traffic_code_mode)
		log_path = constants.plot_path_fixed_sys + mode + '/'
		os.makedirs(os.path.dirname(log_path), exist_ok=True)
		log = open(log_path +'tog_neg_reward.txt', 'a')  # open file text.
		text = mode + ' reward: ' + str(tot_neg_reward) + ' _sum_intersection_queue: ' + str(self._sum_intersection_queue) + ' || AWT: ' + str(self._avg_waiting_time_per_veh) + '\n'
		log.write(text)
		log.close()

		# close gui.
		traci.close(wait = False)