예제 #1
0
class CustomUnityEnv(Wrapper):
    GAMES_BETWEEN_RESTARTS = 50
    EXECUTABLE_PATH = "C:\\Users\\Alex Thiel\\Google Drive\\Project - 2018 - Deep Reinforcement Learning\\DRL_Playground\\Build\\RobotGame.exe"

    observation_space = spaces.Box(0, 0, shape=(84, 84, 1))
    action_space = spaces.Discrete(n=NUM_ACTIONS)

    def __init__(self):
        self.steps_since_restart = 0
        self.total_steps_ever = 0
        self.latest_total_score = 0
        self.total_games = 0
        self.episode_rewards = []

        # Make sure there aren't any other unity processes running
        self._kill_unity()

        # Run server
        self._open_unity()
        self.server = UnityInterface("localhost", 1234)

    @property
    def env(self):
        # This is for fooling the get_wrapper_by_name function
        parent = self

        class Monitor:
            def get_total_steps(self):
                return parent.total_steps_ever

            def get_episode_rewards(self):
                return parent.episode_rewards

        return Monitor()

    def step(self, action):
        """Return observation, reward, done, info
        info is unused"""
        self.steps_since_restart += 1
        self.total_steps_ever += 1

        # Send a state and get a response
        with ContextTimer(post_print=False) as timer:
            self.server.send_state(action)
            is_over, image, new_score = self._get_state()

        # # Print FPS?
        # if self.total_steps_ever % 1000 == 0:
        #     print("FPS", 1 / (timer.elapsed + .00001))

        # Update the score and log info
        reward = new_score - self.latest_total_score
        self.latest_total_score = new_score
        return image, reward, is_over, None

    def reset(self):
        """Return the first observation after reset"""
        self.total_games += 1
        # Force a restart of the game in case of glitched-out robot
        if self.total_games % self.GAMES_BETWEEN_RESTARTS == 0:
            self.server.disconnect()
            self._kill_unity()
            self._open_unity()
            self.server.connect()

        # Record the previous episodes rewards
        self.episode_rewards.append(self.latest_total_score)

        # Reset the game
        self.server.send_reset()
        is_over, image, new_score = self._get_state()
        self.latest_total_score = new_score
        return image

    def close(self):
        """Called after all training is done"""
        pass

    def _get_state(self):
        """"""
        is_over, image, new_score = self.server.get_state()
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image_gray = cv2.resize(image_gray, (84, 84))
        image_gray = np.expand_dims(image_gray, axis=2)
        return is_over, image_gray, new_score

    def _open_unity(self):
        print("Running Unity process")
        subprocess.Popen([self.EXECUTABLE_PATH])

    def _kill_unity(self):
        process_name = Path(self.EXECUTABLE_PATH).name
        print("Killing Unity process", process_name)
        os.system("taskkill /f /im " + process_name)
예제 #2
0
class UnityALEWrapper(ABC):
    def __init__(self, executable_path, action_set, screen_dim, host, port):
        """
        :param action_set: An array of integers to send to unity
        :param screen_dim: (width, height) of the screen
        :param record_data: True/False. If true, images will be saved
        """
        self.executable_path = executable_path
        self.action_set = action_set
        self.screen_dim = screen_dim

        self.loadROM()

        self.server = UnityInterface(host, port)
        self.reset_game()

    def kill_process(self):
        process_name = Path(self.executable_path).name
        print("Killing Unity: ", "taskkill /IM " + process_name)
        subprocess.call("taskkill /IM " + process_name)


    def act(self, action):
        """ Must return the reward """
        self.is_game_over, latest_color_frame, new_score = self.server.get_state()

        # Calculate reward and store the new score
        reward = new_score - self.latest_total_score
        self.latest_total_score = new_score

        # Convert the latest frame to grayscale
        self.latest_frame_bgr = latest_color_frame
        self.server.send_state(action)

        if reward != 0: print("Received reward of ", reward)
        return reward

    def getScreenGrayscale(self, screen_buffer):
        """ Fill the screen buffer with the latest frame """
        gray = cv2.cvtColor(self.latest_frame_bgr, cv2.COLOR_BGR2GRAY)
        screen_buffer[...] = gray

    def getScreenColor(self, screen_buffer):
        """ Fill the screen buffer with the latest frame """
        screen_buffer[...] = self.latest_frame_bgr

    def reset_game(self):
        self.server.get_state()
        self.server.send_reset()
        self.is_game_over = False
        self.latest_total_score = 0
        self.latest_frame_bgr = None

    def lives(self):
        return 1

    def game_over(self):
        """ Returns True or False """
        return self.is_game_over

    def getScreenDims(self):
        """ Return width, height of the screen"""
        return self.screen_dim

    def getMinimalActionSet(self):
        return self.action_set

    # The following functions are only there to maintain compatibility
    def setInt(self, *args): pass

    def setBool(self, *args): pass

    def setFloat(self, *args): pass

    def loadROM(self, *args):
        print("Starting Unity: ", self.executable_path)
        subprocess.Popen([self.executable_path])