def play(): client = carla.Client(settings.CONNECTION_IP, settings.CONNECTION_PORT) client.set_timeout(20.0) # Create controllers trafic_control = TraficControlThread(client) weather_control = WeatherControlThread(client) trafic_control.start() weather_control.start() logger.info("Controllers started") predicter = ModelHandler(settings.MODEL_NAME, target_weights_path=MODEL_WEIGHTS, train=False) agent = Agent(999999, client, False) try: while True: step = 1 state = agent.spawn() while True: start_step_time = time.time() action = int(np.argmax(predicter.get_qs(state))) new_state, _, done = agent.step(action) state = new_state if done: agent.clear_agent() break time_diff1 = agent.episode_start + step / settings.FPS_COMPENSATION - time.time() time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time() if time_diff1 > 0: time.sleep(min(0.125, time_diff1)) elif time_diff2 > 0: time.sleep(min(0.125, time_diff2)) except KeyboardInterrupt: logger.info("Exiting playing - Keyboard interrupt") except: logger.error("Playing failed") finally: trafic_control.terminate = True weather_control.terminate = True
class Trainer(Thread): def __init__(self, client, identifier, epsilon, get_qs_callbatch, update_replay_memory_callback): super().__init__() self.daemon = True self.client = client self.terminate = False self.fail_flag = False self.halt = False self.get_qs = get_qs_callbatch self.update_replay_memory = update_replay_memory_callback self.identifier = identifier self.agent = Agent(identifier, self.client, True) self.action = None self.episode = 0 self.epsilon = epsilon self.scores_history = deque(maxlen=settings.LOG_EVERY) self.score_record = None self.steps_per_second = deque(maxlen=settings.LOG_EVERY) self.actions_statistic = deque( maxlen=int(settings.LOG_EVERY * settings.SECONDS_PER_EXPISODE * settings.FPS_COMPENSATION)) def get_action(self, action: int): num_of_logged_actions = len(self.actions_statistic) if num_of_logged_actions <= 0: return 0 return self.actions_statistic.count(action) / num_of_logged_actions def get_steps_per_second(self): if len(self.steps_per_second) > 0: return sum(self.steps_per_second) / len(self.steps_per_second) return 0 def get_preview_data(self): if self.agent.prev_camera is not None and self.agent.initialized: return cv2.cvtColor(self.agent.prev_camera, cv2.COLOR_RGB2BGR) return np.zeros((settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[1], settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[0], settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[2])) def get_mean_score(self): if len(self.scores_history) > 0: return sum(self.scores_history) / len(self.scores_history) return 0 def get_episode(self): return self.episode def run(self) -> None: logger.info(f"Trainer {self.identifier} started") while not self.terminate: if self.halt: time.sleep(0.1) continue reward = None episode_reward = 0 step = 1 try: state = self.agent.spawn() self.fail_flag = False except: self.fail_flag = True break episode_data_memory = deque() while not self.fail_flag: start_step_time = time.time() if self.epsilon is None or np.random.random() > self.epsilon: self.action = int(np.argmax(self.get_qs(state))) self.actions_statistic.append(self.action) else: self.action = random.choice(list(settings.ACTIONS.keys())) try: new_state, reward, done = self.agent.step(self.action) except: logger.error( f"Trainer {self.identifier} - Failed to make step") self.fail_flag = True break episode_data_memory.append( (state, self.action, reward, new_state, done)) state = new_state episode_reward += reward if done: self.agent.clear_agent() self.action = None break time_diff1 = self.agent.episode_start + step / settings.FPS_COMPENSATION - time.time( ) time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time( ) if time_diff1 > 0: time.sleep(min(0.125, time_diff1)) elif time_diff2 > 0: time.sleep(min(0.125, time_diff2)) step += 1 if not reward or not self.agent.episode_start: continue episode_time = time.time() - self.agent.episode_start if episode_time == 0: episode_time = 10 ^ -9 average_steps_per_second = step / episode_time self.steps_per_second.append(average_steps_per_second) reward_factor = settings.FPS_COMPENSATION / average_steps_per_second episode_reward_weighted = ( (episode_reward - reward) * reward_factor + reward) * settings.EPISODE_REWARD_MULTIPLIER if episode_time > settings.MINIMUM_EPISODE_LENGTH: self.update_replay_memory(episode_data_memory) self.scores_history.append(episode_reward_weighted) self.episode += 1 del episode_data_memory logger.info(f"Trainer {self.identifier} stopped")