class CustomUnityEnv(Wrapper): GAMES_BETWEEN_RESTARTS = 50 EXECUTABLE_PATH = "C:\\Users\\Alex Thiel\\Google Drive\\Project - 2018 - Deep Reinforcement Learning\\DRL_Playground\\Build\\RobotGame.exe" observation_space = spaces.Box(0, 0, shape=(84, 84, 1)) action_space = spaces.Discrete(n=NUM_ACTIONS) def __init__(self): self.steps_since_restart = 0 self.total_steps_ever = 0 self.latest_total_score = 0 self.total_games = 0 self.episode_rewards = [] # Make sure there aren't any other unity processes running self._kill_unity() # Run server self._open_unity() self.server = UnityInterface("localhost", 1234) @property def env(self): # This is for fooling the get_wrapper_by_name function parent = self class Monitor: def get_total_steps(self): return parent.total_steps_ever def get_episode_rewards(self): return parent.episode_rewards return Monitor() def step(self, action): """Return observation, reward, done, info info is unused""" self.steps_since_restart += 1 self.total_steps_ever += 1 # Send a state and get a response with ContextTimer(post_print=False) as timer: self.server.send_state(action) is_over, image, new_score = self._get_state() # # Print FPS? # if self.total_steps_ever % 1000 == 0: # print("FPS", 1 / (timer.elapsed + .00001)) # Update the score and log info reward = new_score - self.latest_total_score self.latest_total_score = new_score return image, reward, is_over, None def reset(self): """Return the first observation after reset""" self.total_games += 1 # Force a restart of the game in case of glitched-out robot if self.total_games % self.GAMES_BETWEEN_RESTARTS == 0: self.server.disconnect() self._kill_unity() self._open_unity() self.server.connect() # Record the previous episodes rewards self.episode_rewards.append(self.latest_total_score) # Reset the game self.server.send_reset() is_over, image, new_score = self._get_state() self.latest_total_score = new_score return image def close(self): """Called after all training is done""" pass def _get_state(self): """""" is_over, image, new_score = self.server.get_state() image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image_gray = cv2.resize(image_gray, (84, 84)) image_gray = np.expand_dims(image_gray, axis=2) return is_over, image_gray, new_score def _open_unity(self): print("Running Unity process") subprocess.Popen([self.EXECUTABLE_PATH]) def _kill_unity(self): process_name = Path(self.EXECUTABLE_PATH).name print("Killing Unity process", process_name) os.system("taskkill /f /im " + process_name)
class UnityALEWrapper(ABC): def __init__(self, executable_path, action_set, screen_dim, host, port): """ :param action_set: An array of integers to send to unity :param screen_dim: (width, height) of the screen :param record_data: True/False. If true, images will be saved """ self.executable_path = executable_path self.action_set = action_set self.screen_dim = screen_dim self.loadROM() self.server = UnityInterface(host, port) self.reset_game() def kill_process(self): process_name = Path(self.executable_path).name print("Killing Unity: ", "taskkill /IM " + process_name) subprocess.call("taskkill /IM " + process_name) def act(self, action): """ Must return the reward """ self.is_game_over, latest_color_frame, new_score = self.server.get_state() # Calculate reward and store the new score reward = new_score - self.latest_total_score self.latest_total_score = new_score # Convert the latest frame to grayscale self.latest_frame_bgr = latest_color_frame self.server.send_state(action) if reward != 0: print("Received reward of ", reward) return reward def getScreenGrayscale(self, screen_buffer): """ Fill the screen buffer with the latest frame """ gray = cv2.cvtColor(self.latest_frame_bgr, cv2.COLOR_BGR2GRAY) screen_buffer[...] = gray def getScreenColor(self, screen_buffer): """ Fill the screen buffer with the latest frame """ screen_buffer[...] = self.latest_frame_bgr def reset_game(self): self.server.get_state() self.server.send_reset() self.is_game_over = False self.latest_total_score = 0 self.latest_frame_bgr = None def lives(self): return 1 def game_over(self): """ Returns True or False """ return self.is_game_over def getScreenDims(self): """ Return width, height of the screen""" return self.screen_dim def getMinimalActionSet(self): return self.action_set # The following functions are only there to maintain compatibility def setInt(self, *args): pass def setBool(self, *args): pass def setFloat(self, *args): pass def loadROM(self, *args): print("Starting Unity: ", self.executable_path) subprocess.Popen([self.executable_path])