Python DDQN.build_frame_stack示例

编程语言: Python

命名空间/包名称: serpent.machine_learning.reinforcement_learning.ddqn

类/类型: DDQN

方法/功能: build_frame_stack

hotexamples.com的示例: 10

Python DDQN.build_frame_stack - 已找到10个示例。这些是从开源项目中提取的最受好评的serpent.machine_learning.reinforcement_learning.ddqn.DDQN.build_frame_stack现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

DDQN(14)

enter_run_mode(11)

enter_train_mode(11)

erode_epsilon(11)

append_to_replay_memory(10)

build_frame_stack(10)

calculate_target_error(1)

示例#1

显示文件

class SerpentPikaBallGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play
        self.frame_handler_setups["PLAY"] = self.setup_play
        self.previous_game_frame = None

        self.lowerY = np.array([255, 255, 0], np.uint8)
        self.upperY = np.array([255, 255, 10], np.uint8)
        self.lowerR = np.array([255, 0, 0], np.uint8)
        self.upperR = np.array([255, 0, 10], np.uint8)

        self.game_state = None
        self._reset_game_state()

    def setup_key(self):
        self.input_mapping = {
            "JUMP": [KeyboardKey.KEY_UP],
            "RIGHT": [KeyboardKey.KEY_RIGHT],
            "LEFT": [KeyboardKey.KEY_LEFT],
            "UP_HIT": [KeyboardKey.KEY_UP, KeyboardKey.KEY_RETURN],
            "L_HIT": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_RETURN],
            "DOWN_HIT": [KeyboardKey.KEY_DOWN, KeyboardKey.KEY_RETURN],
            "NONE": []
        }

        self.key_mapping = {
            KeyboardKey.KEY_UP: "UP",
            KeyboardKey.KEY_RIGHT: "RIGHT",
            KeyboardKey.KEY_DOWN: "DOWN",
            KeyboardKey.KEY_LEFT: "LEFT",
            KeyboardKey.KEY_RETURN: "HIT"
        }
        self.action_space = KeyboardMouseActionSpace(action=[
            'JUMP', 'RIGHT', 'LEFT', 'UP_HIT', 'L_HIT', 'DOWN_HIT', 'NONE'
        ])

        move_inputs = {
            "JUMP": [KeyboardKey.KEY_UP],
            "RIGHT": [KeyboardKey.KEY_RIGHT],
            "LEFT": [KeyboardKey.KEY_LEFT],
            "NO_MOVE": []
        }
        attack_inputs = {"Power Hit": [KeyboardKey.KEY_RETURN], "NO_HIT": []}
        self.game_inputs = dict()
        for move_label, attack_label in itertools.product(
                move_inputs, attack_inputs):
            label = f"{move_label.ljust(10)}{attack_label}"
            self.game_inputs[
                label] = move_inputs[move_label] + attack_inputs[attack_label]
        print(self.game_inputs)

    def setup_play(self):
        #self.cid = 0
        self.trainID = 0
        self.setup_key()
        self.frame_process = False
        self.rewards = list()
        self.started_at = datetime.now()
        self.started_at_str = self.started_at.isoformat()

        latest_epsilon = 1
        model_file_path = 'fighting_movement_dqn_0_1_.h5'
        model_list = os.listdir('model')
        for item in model_list:
            for epsilon in re.findall("\d+\.\d+", item):
                if latest_epsilon > float(epsilon):
                    latest_epsilon = float(epsilon)
                    model_file_path = item

        model_file_path = f'model/{model_file_path}'.replace('/', os.sep)
        print(">> LOAD MODEL: ", model_file_path)
        time.sleep(1)

        self.dqn_action = DDQN(
            model_file_path=model_file_path
            if os.path.isfile(model_file_path) else None,
            input_shape=(114, 162, 4),
            input_mapping=self.input_mapping,
            action_space=self.action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=100 if os.path.isfile(model_file_path) else 1000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=True)
        print('Starting Game')
        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)

    def getDifference(self, game_frame, previous_game_frame):
        return game_frame.grayscale_frame - previous_game_frame.grayscale_frame

    # old img process way
    def handle_frame_process(self, game_frame):
        '''
        if not self.frame_process:
            return
        if self.game_frame_buffer.previous_game_frame is not None:
            try:
                threshold = skimage.filters.threshold_otsu(game_frame.grayscale_frame)
            except ValueError:
                threshold = -1

            gray_frame = game_frame.grayscale_frame > threshold
            gray_frame = skimage.filters.gaussian(gray_frame)
            gray_cv_frame = cv2.cvtColor(np.array(gray_frame * 255, dtype='uint8'), cv2.COLOR_GRAY2RGB)

            cv_frame = cv2.cvtColor(np.asarray(game_frame.frame, dtype='uint8'), cv2.COLOR_BGR2RGB)
            cv_frame = cv2.cvtColor(cv_frame, cv2.COLOR_BGR2RGB)
            frame_threshed = cv2.inRange(cv_frame, self.lowerY, self.upperY)

            # find connected components (pikachu)
            _, cnts, hierarchy, = cv2.findContours(frame_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

            if len(cnts) > 0:
                cnts = sorted(cnts, key = cv2.contourArea, reverse = True)
                for i in range(0, 2):
                    # Draw a rectangular frame around the detected object
                    x, y, w, h = cv2.boundingRect(cnts[i])
                    cv2.rectangle(gray_cv_frame, (x,y), (x+w,y+h), (0,255,0), 2)

            frame_threshed = cv2.inRange(cv_frame, self.lowerR, self.upperR)

            # find connected components (ball)
            _, cnts, hierarchy, = cv2.findContours(frame_threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

            if len(cnts) > 0:
                cnt = sorted(cnts, key = cv2.contourArea, reverse = True)[0]
                if len(cnt) > 70:
                    (x,y), radius = cv2.minEnclosingCircle(cnt)
                    cv2.circle(gray_cv_frame, (int(x),int(y)), int(radius), (255,0,0),2)

            self.visual_debugger.store_image_data(
                gray_cv_frame,
                gray_cv_frame.shape,
                "grayscale"
            )
            
            # send difference to debugger (optional)
            frame_difference = self.getDifference(game_frame, self.game_frame_buffer.previous_game_frame)
            bw_frame_difference = frame_difference > 100
            bw_frame_difference = skimage.filters.sobel(bw_frame_difference)
            self.visual_debugger.store_image_data(
                np.array(bw_frame_difference * 255, dtype='uint8'),
                bw_frame_difference.shape,
                "frame_diff"
            )
            '''

    def handle_play(self, game_frame):
        # locate sprite position and existence
        '''
        logo_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_LOGO'], game_frame=game_frame)
        menu_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_MENU'], game_frame=game_frame)
        game_set_locator = sprite_locator.locate(sprite=self.game.sprites['SPRITE_GAME_SET'], game_frame=game_frame)
        '''

        # append memory data into game state
        (self.game_state["com_x"], self.game_state["com_y"], self.ai_x,
         self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc,
         self.col_size, self.game_state["col_x"],
         self.game_state["col_y"]) = readInfo()
        self.game_state["ai_x"].appendleft(self.ai_x)
        self.game_state["ai_y"].appendleft(self.ai_y)
        self.game_state["ball_x"].appendleft(self.ball_x)
        self.game_state["ball_y"].appendleft(self.ball_y)
        self.game_state["ai_score"].appendleft(self.ai_sc)
        self.game_state["com_score"].appendleft(self.com_sc)
        self.game_state["col_size"].appendleft(self.col_size)

        self.handle_frame_process(game_frame)
        '''
        if(logo_locator):
            print('Entering Logo...')
            self.game_state["playing"] = False
            self.handle_menu()
        elif (menu_locator):
            print('Entering Menu...')
            self.game_state["playing"] = False
            self.handle_menu()
        elif (game_set_locator):
            print('Game Set!')
            self.handle_fight_end(game_frame)
        '''

        # judge is-in-game by read pixel value (tricky)
        self.game_frame_img = FrameGrabber.get_frames(
            [0], frame_type="PIPELINE").frames[0].frame
        if self.game_frame_img[100, 81] != 0.7137254901960784:
            self.handle_notInGame()
        else:
            self.game_state["playing"] = True
            self.handle_fight(game_frame)

    def handle_notInGame(self):
        serpent.utilities.clear_terminal()
        print('Currently not in game...please wait..')

        playAnimation(self.game_state["animeIndex"])
        self.game_state["animeIndex"] = self.game_state[
            "animeIndex"] + 1 if self.game_state["animeIndex"] < 3 else 0

        #print(self.game_frame_img[95:105,80:83])

        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
        time.sleep(0.3)

    def handle_menu(self):
        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
        time.sleep(4)

    def handle_fight(self, game_frame):
        gc.disable()
        if self.dqn_action.first_run:
            self.dqn_action.first_run = False
            return

        if self.dqn_action.frame_stack is None:
            game_frame_buffer = FrameGrabber.get_frames(
                [0], frame_type="PIPELINE").frames[0]
            self.dqn_action.build_frame_stack(game_frame_buffer.frame)
        else:
            # saving frame pic to analyze
            #self.cid = self.cid + 1
            #game_frame_img = FrameGrabber.get_frames([0], frame_type="PIPELINE").frames[0]
            #skimage.io.imsave(f"frame{self.cid}.png", game_frame_img.frame)
            game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                        frame_type="PIPELINE")

            if self.dqn_action.mode == "TRAIN":
                reward = self._calculate_reward()

                self.game_state["reward"] += reward

                self.dqn_action.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["ai_score"][0] == 15)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_action.current_step % 1000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/fighting_movement")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_action.current_step % 10000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/fighting_movement",
                        is_checkpoint=True)
            elif self.dqn_action.mode == "RUN":
                self.dqn_action.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at
            serpent.utilities.clear_terminal()
            print('')
            print(Fore.YELLOW)
            print(Style.BRIGHT)
            print(f"STARTED AT:{self.started_at_str}")
            print(
                f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s"
            )

            print(Style.RESET_ALL)
            #print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_action.output_step_data()
            print(Style.RESET_ALL)
            print(Style.BRIGHT)
            print(f"CURRENT RUN: {self.game_state['current_run'] }")
            print("")
            print(
                f"CURRENT RUN   REWARD: {round(self.game_state['reward'], 4)}")
            print(f"CURRENT AI    SCORE: {self.game_state['ai_score'][0]}")
            print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}")
            print("")
            print(
                f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(Style.RESET_ALL)

            self.dqn_action.pick_action()
            self.dqn_action.generate_action()

            movement_keys = self.dqn_action.get_input_values()

            print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            #print(movement_keys)
            print("" + " + ".join(
                list(map(lambda k: self.key_mapping.get(k), movement_keys))))
            print(Style.RESET_ALL)
            print("")
            print(
                f"AI:        ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})"
            )
            print(
                f"COM:       ({self.game_state['com_x']}, {self.game_state['com_y']})"
            )
            print(
                f"BALL:      ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})"
            )
            print(
                f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})"
            )
            print(f"Distance:   {self.game_state['distance'][0]}")

            self.input_controller.handle_keys(movement_keys)

            if self.dqn_action.current_action_type == "PREDICTED":
                self.game_state["run_predicted_actions"] += 1

            self.dqn_action.erode_epsilon(factor=2)
            self.dqn_action.next_step()

            self.game_state["current_run"] += 1

            if self.game_state['ai_score'][0] == 15 or self.game_state[
                    'com_score'][0] == 15:
                # Game over
                self.game_state["ai_score"].appendleft(0)
                self.game_state["com_score"].appendleft(0)
                self.handle_fight_end(game_frame)

    def handle_fight_end(self, game_frame):
        self.game_state["playing"] = False
        self.input_controller.handle_keys([])
        self.game_state["current_run"] += 1
        self.handle_fight_training(game_frame)

    def handle_fight_training(self, game_frame):
        #self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
        serpent.utilities.clear_terminal()
        gc.enable()
        gc.collect()
        gc.disable()
        print("TRAIN MODE")
        self.input_controller.handle_keys([])

        if self.dqn_action.mode == "TRAIN":
            for i in range(16):
                serpent.utilities.clear_terminal()
                print("")
                print(Fore.GREEN)
                print(Style.BRIGHT)
                print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                print(
                    f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}"
                )
                print(Style.RESET_ALL)

                self.dqn_action.train_on_mini_batch()

        self.game_state["run_predicted_actions"] = 0

        if self.dqn_action.mode in ["TRAIN", "RUN"]:
            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 100 == 0:
                self.dqn_action.update_target_model()

            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 20 == 0:
                self.dqn_action.enter_run_mode()
            else:
                self.dqn_action.enter_train_mode()

        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
        time.sleep(2)

    def _reset_game_state(self):
        self.game_state = {
            "reward": 0,
            "animeIndex": 0,
            "current_run": 1,
            "playing": False,
            "run_predicted_actions": 0,
            "ai_x": collections.deque(np.full((4, ), 0), maxlen=4),
            "ai_y": collections.deque(np.full((4, ), 0), maxlen=4),
            "ai_score": collections.deque(np.full((4, ), 0), maxlen=4),
            "ball_x": collections.deque(np.full((4, ), 0), maxlen=4),
            "ball_y": collections.deque(np.full((4, ), 0), maxlen=4),
            "com_score": collections.deque(np.full((4, ), 0), maxlen=4),
            "col_size": collections.deque(np.full((4, ), 6), maxlen=4),
            "com_x": 36,
            "com_y": 244,
            "col_x": 0,
            "col_y": 0,
            "distance": collections.deque(np.full((20, ), 100), maxlen=20),
        }

    def _calculate_reward(self):
        reward = 0
        distance = math.sqrt(
            abs(self.game_state["ai_x"][0] - self.game_state["ball_x"][0])**2 +
            abs(self.game_state["ai_y"][0] - self.game_state["ball_y"][0])**2)
        self.game_state["distance"].appendleft(int(distance))

        # collision with ball
        collision = self.game_state["distance"][0] < 80 and self.game_state[
            "distance"][1] < 80 and self.game_state["distance"][
                2] < 80 and self.game_state["distance"][0] > self.game_state[
                    "distance"][1] and self.game_state["distance"][
                        1] < self.game_state["distance"][2]
        if collision:
            reward += 0.25

        # power hit
        if self.game_state["col_size"][0] > 0 and self.game_state["distance"][
                0] < 90 and self.game_state["col_y"] != 272:
            reward += 0.5

        # AI gain score
        if self.game_state["ai_score"][0] > self.game_state["ai_score"][1]:
            reward += 1

        # Com gain score
        if self.game_state["com_score"][0] > self.game_state["com_score"][1]:
            reward += -1

        if reward > 1:
            reward = 1

        self.game_state["reward"] = reward
        return reward

示例#2

显示文件

class SerpentFortniteGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.game_state = None
        self._reset_game_state()

    def setup_play(self):
        self.detector = ObjectDetection()
        self.detector.setModelTypeAsTinyYOLOv3()
        self.detector.setModelPath("yolo.h5")
        self.detector.loadModel(detection_speed="flash")

        input_mapping = {
            "KEY_W": [KeyboardKey.KEY_W],
            "KEY_A": [KeyboardKey.KEY_A],
            "KEY_S": [KeyboardKey.KEY_S],
            "KEY_D": [KeyboardKey.KEY_D],
            "KEY_SPACE": [KeyboardKey.KEY_SPACE],
            "KEY_C": [KeyboardKey.KEY_C],
            "KEY_1": [KeyboardKey.KEY_1],
            "KEY_2": [KeyboardKey.KEY_2]
        }

        self.key_mapping = {
            KeyboardKey.KEY_W.name: "KEY_W",
            KeyboardKey.KEY_A.name: "KEY_A",
            KeyboardKey.KEY_S.name: "KEY_S",
            KeyboardKey.KEY_D.name: "KEY_D",
            KeyboardKey.KEY_SPACE.name: "KEY_SPACE",
            KeyboardKey.KEY_C.name: "KEY_C",
            KeyboardKey.KEY_1.name: "KEY_1",
            KeyboardKey.KEY_2.name: "KEY_2"
        }

        direction_action_space = KeyboardMouseActionSpace(direction_keys=[
            "KEY_W", "KEY_A", "KEY_S", "KEY_D", "KEY_SPACE", "KEY_C", "KEY_1",
            "KEY_2"
        ])

        direction_model_file_path = "datasets/Fortnite_direction_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_direction = DDQN(
            model_file_path=direction_model_file_path
            if os.path.isfile(direction_model_file_path) else None,
            input_shape=(480, 640, 4),
            input_mapping=input_mapping,
            action_space=direction_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=600,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=1,
            final_epsilon=0.01,
        )

    def handle_play(self, game_frame):

        gc.disable()

        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(game_frame.frame,
                                                  game_frame.frame.shape,
                                                  str(i))

        if self.dqn_direction.first_run:
            # self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
            # time.sleep(5)

            self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

            self.dqn_direction.first_run = False

            return None

        actor_hp = self._measure_actor_hp(game_frame)
        run_score = self._measure_run_score(game_frame)

        self.game_state["health"].appendleft(actor_hp)
        self.game_state["score"].appendleft(run_score)

        if self.dqn_direction.frame_stack is None:
            full_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE").frames[0]

            self.dqn_direction.build_frame_stack(full_game_frame.frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE")

            if self.dqn_direction.mode == "TRAIN":
                reward_direction, reward_action = self._calculate_reward()

                self.game_state["run_reward_direction"] += reward_direction
                self.game_state["run_reward_action"] += reward_action

                self.dqn_direction.append_to_replay_memory(
                    game_frame_buffer,
                    reward_direction,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_direction.current_step % 2000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/Fortnite_direction")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_direction.current_step % 20000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/Fortnite_direction",
                        is_checkpoint=True)

            elif self.dqn_direction.mode == "RUN":
                self.dqn_direction.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes,, {run_time.seconds % 60} seconds"
            )
            print(
                "GAME: Fortnite   PLATFORM: EXE   AGENT: DDQN + Prioritized Experience Replay"
            )
            print("")

            self.dqn_direction.output_step_data()

            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT SCORE: {self.game_state['score'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )
            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )

            if self.game_state["health"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_direction.mode == "RUN"
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0
                self.input_controller.handle_keys([])

                if self.dqn_direction.mode == "TRAIN":
                    for i in range(8):
                        run_time = datetime.now() - self.started_at
                        serpent.utilities.clear_terminal()
                        print(
                            f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                        )
                        print(
                            "GAME: Fortnite                 PLATFORM: EXE                AGENT: DDQN + Prioritized Experience Replay"
                        )
                        print("")

                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/2")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        self.dqn_direction.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_direction"] = 0
                self.game_state["run_reward_action"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      3),
                                                              maxlen=8)
                self.game_state["score"] = collections.deque(np.full((8, ), 0),
                                                             maxlen=8)

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        if self.dqn_direction.type == "DDQN":
                            self.dqn_direction.update_target_model()
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_direction.enter_run_mode()
                    else:
                        self.dqn_direction.enter_train_mode()

                # self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                # time.sleep(3)

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

                return None

        self.dqn_direction.pick_action()
        self.dqn_direction.generate_action()

        keys = self.dqn_direction.get_input_values()

        print("")
        print(keys)
        img = pyautogui.screenshot(region=(0, 0, 1920, 1080))
        # convert image to numpy array
        im = np.array(img)
        custom = self.detector.CustomObjects(person=True)
        detections = self.detector.detectCustomObjectsFromImage(
            custom_objects=custom, input_type="array", input_image=im)
        for eachObject in detections:
            print(eachObject["box_points"])
            tuple_of_x_and_y = eachObject["box_points"]
            centerX = (tuple_of_x_and_y[0] + tuple_of_x_and_y[2]) / 2
            centerY = (tuple_of_x_and_y[1] + tuple_of_x_and_y[3]) / 2
            centerX = int(centerX)
            centerY = int(centerY)
            ctypes.windll.user32.SetCursorPos(centerX, centerY)
            ctypes.windll.user32.mouse_event(2, 0, 0, 0, 0)  # left down
            time.sleep(0.05)
            ctypes.windll.user32.mouse_event(4, 0, 0, 0, 0)  # left up
            self.shot_reward = 100000

        self.input_controller.handle_keys(keys)
        if self.dqn_direction.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_direction.erode_epsilon(factor=2)

        self.dqn_direction.next_step()

        self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 3), maxlen=8),
            "score": collections.deque(np.full((8, ), 0), maxlen=8),
            "run_reward_direction": 0,
            "run_reward_action": 0,
            "current_run": 1,
            "current_run_steps": 0,
            "current_run_health": 0,
            "current_run_score": 0,
            "run_predicted_actions": 0,
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "run_timestamp": datetime.utcnow(),
        }

    def _measure_actor_hp(self, game_frame):
        hp_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["HP_AREA"])
        hp_area_image = Image.fromarray(hp_area_frame)

        actor_hp = 0

        image_colors = hp_area_image.getcolors(
        )  # TODO: remove in favor of sprite detection and location
        if image_colors:
            actor_hp = len(image_colors) - 7

        for name, sprite in self.game.sprites.items():
            sprite_to_locate = Sprite("QUERY", image_data=sprite.image_data)

            sprite_locator = SpriteLocator()
            location = sprite_locator.locate(sprite=sprite_to_locate,
                                             game_frame=game_frame)
            print(location)
            if location:
                actor_hp = 1000000

        return actor_hp

    def _measure_run_score(self, game_frame):
        score_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["SCORE_AREA"])

        score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) *
                                   255,
                                   dtype="uint8")
        score_image = Image.fromarray(score_grayscale)

        score = '0'

        image_colors = score_image.getcolors()
        if image_colors and len(image_colors) > 1:
            score = serpent.ocr.perform_ocr(image=score_grayscale,
                                            scale=10,
                                            order=5,
                                            horizontal_closing=10,
                                            vertical_closing=5)
            score = score.split(":")[0]

        count = 0

        if not score.isdigit():
            score = '0'

        self.game_state["current_run_score"] = score

        return score

    def _calculate_reward(self):
        reward = 0
        reward = self.shot_reward
        reward += self.game_state["health"][0] / 10.0

        return reward, reward

示例#3

显示文件

class SerpentCloneyGameAgent(GameAgent):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        # self.frame_handlers["PLAY"] = self.handle_play
        self.frame_handlers["PLAY_DDQN"] = self.handle_play_ddqn

        # self.frame_handler_setups["PLAY"] = self.setup_play
        self.frame_handler_setups["PLAY_DDQN"] = self.setup_play_ddqn

        self.analytics_client = None

    def setup_play(self):
        self.plugin_path = offshoot.config["file_paths"]["plugins"]

        # Context Classifier
        context_classifier_path = f"{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_context_classifier.model"

        context_classifier = CNNInceptionV3ContextClassifier(input_shape=(288, 512, 3))
        context_classifier.prepare_generators()
        context_classifier.load_classifier(context_classifier_path)

        self.machine_learning_models["context_classifier"] = context_classifier

        # Object Detection of leaves
        self.object_detector = ObjectDetector(graph_fp=f'{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_detection/frozen_inference_graph.pb',
                                              labels_fp=f'{self.plugin_path}/SerpentCloneyGameAgentPlugin/files/ml_models/cloney_detection/cloney-detection.pbtxt',
                                              num_classes=2,
                                              threshold=0.6)

        # Reset Variables
        self._reset_game_state()

    # =============================
    # -----------DQN TODO ---------
    # =============================
    def setup_play_ddqn(self):

        self._reset_game_state()

        input_mapping = {
            "UP": [KeyboardKey.KEY_SPACE]
        }

        self.key_mapping = {
            KeyboardKey.KEY_SPACE.name: "UP"
        }

        movement_action_space = KeyboardMouseActionSpace(
            default_keys=[None, "UP"]
        )

        movement_model_file_path = "datasets/cloney_direction_dqn_0_1_.hp5".replace("/", os.sep)

        self.dqn_movement = DDQN(
            model_file_path=movement_model_file_path if os.path.isfile(movement_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=movement_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=1000,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False
            )

    def handle_play_ddqn(self, game_frame):
        gc.disable()

        if self.dqn_movement.first_run:
            self.input_controller.tap_key(KeyboardKey.KEY_W)

            self.dqn_movement.first_run = False

            time.sleep(5)

            return None

        dragon_alive = self._measure_dragon_alive(game_frame)
        # dragon_coins = self._measure_dragon_coins(game_frame)

        self.game_state["alive"].appendleft(dragon_alive)
        # self.game_state["coins"].appendleft(dragon_coins)

        if self.dqn_movement.frame_stack is None:
            # pipeline_game_frame = FrameGrabber.get_frames(
            #     [0],
            #     frame_shape=game_frame.frame.shape,
            #     frame_type="MINI"
            # ).frames[0]

            self.dqn_movement.build_frame_stack(game_frame.ssim_frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=game_frame.frame.shape,
                frame_type="MINI"
                )

            if self.dqn_movement.mode == "TRAIN":
                reward = self._calculate_reward()

                self.game_state["run_reward"] += reward

                self.dqn_movement.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["alive"] == 0
                )
                # Every 2000 steps, save latest weights to disk
                if self.dqn_movement.current_step % 2000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/cloney_movement"
                    )

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_movement.current_step % 20000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/cloney_movement",
                        is_checkpoint=True
                    )

            elif self.dqn_movement.mode == "RUN":
                self.dqn_movement.update_frame_stack(self.game_frame_buffer)

            run_time = datetime.now() - self.started_at

            print("\033c" + f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds")
            print("")

            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_movement.output_step_data()

            print("")
            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(f"CURRENT RUN REWARD: {round(self.game_state['run_reward'], 2)}")
            print(f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}")
            print(f"CURRENT DRAGON ALIVE: {self.game_state['alive'][0]}")
            # print(f"CURRENT DRAGON COINS: {self.game_state['coins'][0]})

            print("")
            # print(f"AVERAGE ACTIONS PER SECOND: {round(self.game_state['average_aps'], 2)}")
            print("")
            print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds")
            # print(f"LAST RUN COINS: {self.game_state['last_run_coins'][0]})

            print("")
            print(f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})")
            # print(f"RECORD COINS COLLECTED: {self.game_state['record_coins_collected'].get('value')} coins (Run {self.game_state['record_coins_collected'].get('run')}, {'Predicted' if self.game_state['record_coins_collected'].get('predicted') else 'Training'})")
            print("")
            print(f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds")

            if self.game_state["alive"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                # Set display stuff TODO
                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state["record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_movement.mode == "RUN"
                        }

                    # if self.game_state["coins"][0] < self.game_state["record_coins_collected"].get("value", 1000):
                    #     self.game_state["record_coins_collected"] = {
                    #         "value": self.game_state["coins"][0],
                    #         "run": self.game_state["current_run"],
                    #         "predicted": self.dqn_movement.mode == "RUN"
                    #     }
                else:
                    self.game_state["random_time_alives"].append(self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.release_key(KeyboardKey.KEY_SPACE)

                if self.dqn_movement.mode == "TRAIN":
                    for i in range(8):
                        serpent.utilities.clear_terminal()
                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/8")
                        print(f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}")

                        self.dqn_movement.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_movement"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["alive"] = collections.deque(np.full((8,), 4), maxlen=8)
                # self.game_state["coins"] = collections.deque(np.full((8,), 0), maxlen=8)

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0:
                        if self.dqn_movement.type == "DDQN":
                            self.dqn_movement.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0:
                        self.dqn_movement.enter_run_mode()
                    else:
                        self.dqn_movement.enter_train_mode()

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                time.sleep(5)

                return None

        self.dqn_movement.pick_action()
        self.dqn_movement.generate_action()

        keys = self.dqn_movement.get_input_values()
        print("")
        print(" + ".join(list(map(lambda k: self.key_mapping.get(k.name), keys))))

        self.input_controller.handle_keys(keys)

        if self.dqn_movement.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_movement.erode_epsilon(factor=2)

        self.dqn_movement.next_step()

        self.game_state["current_run_steps"] += 1

    def handle_play(self, game_frame):
        context = self.machine_learning_models["context_classifier"].predict(game_frame.frame)

        if context is None:
            return

        if context == "GAME_WORLD_1":
            self.display_game_agent_state(context=context)
            self.handle_play_context_game_world(game_frame=game_frame)
            self.in_progress_game_over = False
        elif context == "GAME_OVER":
            self.display_game_agent_state(context=context)
            time.sleep(2)
            if self.in_progress_game_over is False:
                self.handle_play_context_game_over(game_frame=game_frame)
        elif context == "MAIN_MENU":
            self.input_controller.click_screen_region(screen_region="MAIN_MENU_PLAY")
            time.sleep(3.5)
            self.current_run_started_at = datetime.utcnow()
        elif context == "GAME_PAUSE":
            self.handle_play_context_game_pause(game_frame)

    def handle_play_context_game_world(self, game_frame):
        # Only predict if object_detector is idle
        if self.object_detector.get_status() is False:
            self.object_predictions = self.object_detector.predict(frame=game_frame.frame)

            for prediction in self.object_predictions:
                if prediction['class'] == "dragon":
                    self.positions['dragon_pos_right_x'] = prediction['bb_o'][3]
                    self.positions['dragon_pos_left_x'] = prediction['bb_o'][1]
                    self.positions['dragon_pos_mid_y'] = (prediction['bb_o'][0] + prediction['bb_o'][2]) / 2
                    self.positions['dragon_pos_mid_x'] = (prediction['bb_o'][1] + prediction['bb_o'][3]) / 2
                    self.dragon_object = prediction
                elif prediction['class'] == "leaves":
                    self.positions['leaf_pos_mid_y'] = (prediction['bb_o'][0] + prediction['bb_o'][2]) / 2
                    self.positions['leaf_pos_top_y'] = prediction['bb_o'][0]
                    self.positions['leaf_pos_bottom_y'] = prediction['bb_o'][2]
                    self.positions['leaf_pos_right_x'] = prediction['bb_o'][3]
                    self.positions['leaf_pos_left_x']= prediction['bb_o'][1]
                    self.leaf_object = prediction

                if (self.positions['dragon_pos_mid_y'] > (self.positions['leaf_pos_top_y'] - 50) and self.positions['dragon_pos_mid_y'] < (self.positions['leaf_pos_bottom_y']) + 50) and (self.positions['dragon_pos_right_x'] + 100) > self.positions['leaf_pos_left_x']: # Same height
                    self.warning = "HIGH"
                    if self.positions['dragon_pos_right_x'] + 100 > self.positions['leaf_pos_left_x']:
                        self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025)
                        time.sleep(0.1)
                    elif self.positions['dragon_pos_mid_y'] - 50 < self.positions['leaf_pos_bottom_y']:
                        time.sleep(0.225)
                        self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025)
                    elif self.positions['dragon_pos_mid_y'] + 50 > self.positions['leaf_pos_top_y']:
                        self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.025)
                        time.sleep(0.1)
                    break
                else:
                    self.warning = "SAFE"
                    self.input_controller.tap_key(KeyboardKey.KEY_S, duration=0.026)
                    time.sleep(0.23)
                    break

    def handle_play_context_game_over(self, game_frame):
            self.in_progress_game_over = True

            time.sleep(4)

            self.game_state['last_run_duration'] = (datetime.utcnow() - self.game_state['current_run_started_at']).seconds if self.game_state['current_run_started_at'] else 0
            self.game_state['last_run'] = self.game_state['current_run']

            if self.game_state['record_duration'] is not None:
                if self.game_state['last_run_duration'] > self.game_state['record_duration']:
                    self.game_state['record_duration'] = self.game_state['last_run_duration']
                    self.game_state['record_run'] = self.game_state['last_run']
            else:
                self.game_state['record_duration'] = self.game_state['last_run_duration']

            # Process Image for OCR
            frame = game_frame.frame
            gray_frame = skimage.color.rgb2gray(frame)
            frame_coins = gray_frame[190:300, 250: 780]
            frame_distance = gray_frame[355:410, 550:760]
            frame_time = gray_frame[300:355, 550:760]

            # Find Coins
            text_coins = ocr.perform_ocr(image=frame_coins, scale=2, order=5, horizontal_closing=2, vertical_closing=3)

            # Find Distance
            text_distance = ocr.perform_ocr(image=frame_distance, scale=2, order=5, horizontal_closing=2, vertical_closing=3)
            text_time = ocr.perform_ocr(image=frame_time, scale=2, order=5, horizontal_closing=2, vertical_closing=3)
            print(text_coins)
            print(text_time)
            print(text_distance)

            # if "$" in coins:
            #     num_coins = coins.replace('$', '')
            #     self.game_state['last_run_coins_collected'] = int(num_coins)
            #
            # if self.game_state['last_run_coins_collected'] > self.game_state['record_coins_collected']:
            #     self.game_state['record_coins_collected'] = self.game_state['last_run_coins_collected']

            # Find Distance and Time
            #candidates, regions = ocr.extract_ocr_candidates(image=frame, gradient_size=3, closing_size=10, minimum_area=100, minimum_aspect_ratio=2)

            #print(regions)

            #gray_frame = skimage.color.rgb2gray(frame)

            # for region in regions:
            #     crop = gray_frame[region[0]:region[2], region[1]:region[3]]
            #     read = ocr.perform_ocr(image=crop, scale=1, order=5, horizontal_closing=1, vertical_closing=1)
            #     print(read)
            #     if "Distance" in read or "Time":
            #         self.pos_d = regions.index(region) + 1
            #     elif "Time" in read:
            #         self.pos_t = regions.index(region) + 1
            #
            #     if regions.index(region) == self.pos_d:
            #         self.game_state['last_run_distance'] = read.replace('m', '')
            #         if self.game_state['last_run_distance'] > self.game_state['record_distance']:
            #             self.game_state['record_distance'] = self.game_state['last_run_distance']
            #     elif regions.index(region) == self.pos_t:
            #         self.game_state['last_run_duration_actual'] = read
                    # Have to still check for record. Find out about time formatting

            # Click PLAY button to start a new run
            #self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            self.input_controller.click_screen_region(screen_region="GAME_OVER_PLAY")

            # Wait for "Ready, Set, Tap"
            time.sleep(3)
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(0.2)

            self.game_state['current_run'] += 1
            self.game_state['current_run_started_at'] = datetime.utcnow()

    def handle_play_context_main_menu(self, game_frame):
        self.input_controller.click_screen_region(screen_region="MAIN_MENU_PLAY")

    def handle_play_context_game_pause(self, game_frame):
        time.sleep(1)
        self.input_controller.click_screen_region(screen_region="GAME_PAUSE")

    def display_game_agent_state(self, context):
        self.game_state['current_run_duration'] = (datetime.utcnow() - self.game_state['current_run_started_at']).seconds

        print("\033c")
        print("======================================================")
        print(f"GAME: Cloney    PLATFORM: Steam    VERSION: v0.0.1")
        print("======================================================")

        print("")

        print(xtermcolor.colorize("OBJECT DETECTION", ansi=9))
        print(f"Detected:                   {len(self.object_predictions)} objects")
        if self.warning == "HIGH":
            print(xtermcolor.colorize(f"Danger Level:               {self.warning}", ansi=1))
        elif self.warning == "SAFE":
            print(xtermcolor.colorize(f"Danger Level:               {self.warning}", ansi=2))
        # print(f"DRAGON POS: {self.dragon_object['bb_o'][0]}, {self.dragon_object['bb_o'][1]}, {self.dragon_object['bb_o'][2]}, {self.dragon_object['bb_o'][3]}")
        # print(f"LAST LEAF POS: {self.leaf_object['bb_o'][0]}, {self.leaf_object['bb_o'][1]}. {self.leaf_object['bb_o'][2]}. {self.leaf_object['bb_o'][3]}")

        print("")

        print(xtermcolor.colorize("GAME STATISTICS", ansi=9))
        print(f"Current Context:            {context}\n")
        print(f"Current Run:                #{self.game_state['current_run']}")
        print(f"Current Run Duration:       {self.game_state['current_run_duration']}s")
        print("")
        print(f"Last Run:                   #{self.game_state['last_run']}")
        print(f"Last Run Duration:          {self.game_state['last_run_duration']}s")
        print(f"Last Run Duration Actual:   {self.game_state['last_run_duration_actual']}")
        print(f"Last Run Distance:          {self.game_state['last_run_distance']}m")
        print(f"Last Run Coins Collected:   {self.game_state['last_run_coins_collected']}")
        print(f"Record Duration:            {self.game_state['record_duration']}s (Run #{self.game_state['record_run']})")

    def _reset_game_state(self):
        # Display Variables
        self.game_state = {
            "alive": collections.deque(np.full((8,), 4), maxlen=8),
            "coins": collections.deque(np.full((8,), 0), maxlen=8),
            "current_run": 1,
            "current_run_started_at": datetime.utcnow(),
            "current_run_duration": None,
            "current_run_steps": 0,
            "run_reward": 0,
            "run_future_rewards": 0,
            "run_predicted_actions": 0,
            "run_timestamp": datetime.utcnow(),
            "last_run": 0,
            "last_run_duration": 0,
            "last_run_duration_actual": None,
            "last_run_distance": 0.0,
            "last_run_coins_collected": 0,
            "record_duration": None,
            "record_duration_actual": 0,
            "record_run": 0,
            "record_distance": 0.0,
            "record_coins_collected": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "random_distance_travelled": 0.0
            }

        # Object Detection Variables
        self.object_predictions = []
        self.warning = ""
        self.dragon_object = []
        self.leaf_object = []
        self.positions = {
            'leaf_pos_mid_y': 0,
            'leaf_pos_right_x': 0,
            'leaf_pos_left_x': 0,
            'leaf_pos_top_y': 0,
            'leaf_pos_bottom_y': 0,
            'dragon_pos_right_x': 0,
            'dragon_pos_left_x': 0,
            'dragon_pos_mid_y': 0,
            'dragon_pos_mid_x': 0
        }

        # Other Variables
        self.pos_d = -1
        self.pos_t = -1
        self.in_progress_game_over = False
    def _measure_dragon_alive(self, game_frame):
        dollar_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["DOLLAR_AREA"])

        dragon_alive = None
        max_ssim = 0

        for name, sprite in self.game.sprites.items():
            print(name)
            print(name[-1])
            for i in range(sprite.image_data.shape[3]):
                ssim = skimage.measure.compare_ssim(dollar_area_frame, np.squeeze(sprite.image_data[..., :3, i]), multichannel=True)

                if ssim > max_ssim:
                    max_ssim = ssim
                    dragon_alive = 1 # int(name[-1])

        return dragon_alive

    def _measure_dragon_coins(self, game_frame):
        coins_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["COINS_AREA"])

        return coins_area_frame[coins_area_frame[..., 2] > 150].size

    def _calculate_reward(self):
        reward = 0

        reward += (-0.5 if self.game_state["alive"][0] < self.game_state["alive"][1] else 0.05)
        # reward += (0.5 if (self.game_state["coins"][0] - self.game_state["coins"][1]) >= 1 else -0.05)

        return reward

示例#4

显示文件

文件： serpent_Robo_game_agent.py 项目： m1ndgames/SerpentRoboGameAgentPlugin

class SerpentRoboGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.sprite_locator = SpriteLocator()

        self.game_state = None
        self._reset_game_state()

    def setup_play(self):
        input_mapping = {
            "W": [KeyboardKey.KEY_W],
            "A": [KeyboardKey.KEY_A],
            "S": [KeyboardKey.KEY_S],
            "D": [KeyboardKey.KEY_D],
            "WA": [KeyboardKey.KEY_W, KeyboardKey.KEY_A],
            "WD": [KeyboardKey.KEY_W, KeyboardKey.KEY_D],
            "SA": [KeyboardKey.KEY_S, KeyboardKey.KEY_A],
            "SD": [KeyboardKey.KEY_S, KeyboardKey.KEY_D],
            "J": [KeyboardKey.KEY_J],
            "K": [KeyboardKey.KEY_K],
            "L": [KeyboardKey.KEY_L],
            "U": [KeyboardKey.KEY_U],
            "I": [KeyboardKey.KEY_I],
            "O": [KeyboardKey.KEY_O],
            "JU": [KeyboardKey.KEY_J, KeyboardKey.KEY_U],
            "KI": [KeyboardKey.KEY_K, KeyboardKey.KEY_I],
            "LO": [KeyboardKey.KEY_L, KeyboardKey.KEY_O],
            "N": [KeyboardKey.KEY_N],
            "M": [KeyboardKey.KEY_M],
            "NONE": []
        }

        self.key_mapping = {
            KeyboardKey.KEY_W.name: "MOVE UP",
            KeyboardKey.KEY_A.name: "MOVE LEFT",
            KeyboardKey.KEY_S.name: "MOVE DOWN",
            KeyboardKey.KEY_D.name: "MOVE RIGHT",
            KeyboardKey.KEY_J.name: "LIGHT PUNCH",
            KeyboardKey.KEY_K.name: "MEDIUM PUNCH",
            KeyboardKey.KEY_L.name: "HARD PUNCH",
            KeyboardKey.KEY_U.name: "LIGHT KICK",
            KeyboardKey.KEY_I.name: "MEDIUM KICK",
            KeyboardKey.KEY_O.name: "HARD KICK",
            KeyboardKey.KEY_N.name: "START",
            KeyboardKey.KEY_M.name: "SELECT"
        }

        movement_action_space = KeyboardMouseActionSpace(directional_keys=[
            "W", "A", "S", "D", "WA", "WD", "SA", "SD", "NONE"
        ])

        fightinput_action_space = KeyboardMouseActionSpace(fightinput_keys=[
            "J", "K", "L", "U", "I", "O", "JU", "KI", "LO", "NONE"
        ])

        movement_model_file_path = "datasets/fighting_movement_dqn_0_1_.h5".replace(
            "/", os.sep)
        self.dqn_movement = DDQN(
            model_file_path=movement_model_file_path
            if os.path.isfile(movement_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=movement_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=1000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False)

        fightinput_model_file_path = "datasets/fighting_fightinput_dqn_0_1_.h5".replace(
            "/", os.sep)
        self.dqn_fightinput = DDQN(
            model_file_path=fightinput_model_file_path
            if os.path.isfile(fightinput_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=fightinput_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=1000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False)
        print("Debug: Game Started")

    def handle_play(self, game_frame):
        #print("Debug: Main")
        title_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_TITLE_TEXT'],
            game_frame=game_frame)
        menu_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_MAINMENU_TEXT'],
            game_frame=game_frame)
        fightmenu_select_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_FIGHTMENU_SELECT'],
            game_frame=game_frame)
        playerselect_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_PLAYERSELECT'],
            game_frame=game_frame)
        backbutton_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_BACKBUTTON'],
            game_frame=game_frame)
        fightcheck_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_FIGHTCHECK'],
            game_frame=game_frame)
        roundstart_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_ROUNDSTART'],
            game_frame=game_frame)
        retrybutton_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_FIGHTMENU_RETRY'],
            game_frame=game_frame)
        backbutton_locator = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_BACKBUTTON'],
            game_frame=game_frame)

        (self.p1hp, self.p2hp) = readhp()
        self.game_state["health"].appendleft(self.p1hp)
        self.game_state["enemy_health"].appendleft(self.p2hp)

        if (roundstart_locator):
            #print("Debug: roundstart_locator Locator")
            self.game_state["fightstarted"] = True
        elif (retrybutton_locator):
            #print("Debug: retrybutton_locator Locator")
            self.handle_fight_end(game_frame)
        elif (fightcheck_locator):
            #print("Debug: fightcheck_locator Locator")
            self.handle_fight(game_frame)
        elif (title_locator):
            #print("Debug: title_locator Locator")
            self.handle_menu_title(game_frame)
        elif (menu_locator):
            #print("Debug: menu_locator Locator")
            self.handle_menu_select(game_frame)
        elif (playerselect_locator):
            #print("Debug: playerselect_locator Locator")
            self.handle_player_select(game_frame)
        elif (backbutton_locator):
            #print("Debug: backbutton_locator Locator")
            self.handle_backbutton(game_frame)
        elif ((fightmenu_select_locator)
              and (self.game_state["current_run"] != 1)):
            #print("Debug: fightmenu_select_locator Locator")
            self.handle_fightmenu_select(game_frame)
        else:
            return

    def handle_retry_button(self, game_frame):
        if (self.game_state["current_run"] % 25 == 0):
            print(Fore.RED + 'Changing Opponent')
            print(Style.RESET_ALL)
            time.sleep(1)
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(0.5)
            self.input_controller.tap_key(KeyboardKey.KEY_J)
            time.sleep(1)
        else:
            print(Fore.RED + 'Restarting Fight')
            print(Style.RESET_ALL)
            time.sleep(1)
            self.input_controller.tap_key(KeyboardKey.KEY_J)
            time.sleep(1)

    def handle_backbutton(self, game_frame):
        print(Fore.RED + 'Pressing Select')
        print(Style.RESET_ALL)
        self.input_controller.tap_key(KeyboardKey.KEY_M)
        time.sleep(1)

    def handle_menu_title(self, game_frame):
        print(Fore.RED + 'Pressing Start')
        print(Style.RESET_ALL)
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(2)

    def handle_fightmenu_select(self, game_frame):
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(2)

    def handle_player_select(self, game_frame):
        time.sleep(1)
        print(Fore.RED + 'Choosing one Char')
        self.input_controller.tap_key(KeyboardKey.KEY_A)
        time.sleep(0.3)
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(0.5)
        print("Choosing Robo")
        self.input_controller.tap_key(KeyboardKey.KEY_S)
        time.sleep(0.3)
        self.input_controller.tap_key(KeyboardKey.KEY_S)
        time.sleep(0.3)
        self.input_controller.tap_key(KeyboardKey.KEY_D)
        time.sleep(0.3)
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(0.5)
        print("Choosing one CPU Char")
        self.input_controller.tap_key(KeyboardKey.KEY_A)
        time.sleep(0.3)
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(0.3)
        print("Choosing Random CPU Char")
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(0.3)
        print("Starting Game")
        print(Style.RESET_ALL)
        self.input_controller.tap_key(KeyboardKey.KEY_J)
        time.sleep(1)

    def handle_menu_select(self, game_frame):
        menu_selector = sprite_locator.locate(
            sprite=self.game.sprites['SPRITE_MAINMENU_SINGLEPLAY'],
            game_frame=game_frame)
        if (menu_selector):
            print(Fore.RED + 'Starting Singleplayer Mode')
            print(Style.RESET_ALL)
            self.input_controller.tap_key(KeyboardKey.KEY_J)
            time.sleep(1)
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(1)
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(1)
            self.input_controller.tap_key(KeyboardKey.KEY_J)
            time.sleep(1)
        else:
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(1)

    def handle_fight(self, game_frame):
        gc.disable()

        if not (self.game_state["fightstarted"]):
            return

        if ((self.game_state["health"][0] == 0) and
            (self.game_state["health"][1] == 0)
                or (self.game_state["enemy_health"][0] == 0) and
            (self.game_state["enemy_health"][1] == 0)):
            return

        if self.dqn_movement.first_run:
            self.dqn_movement.first_run = False
            self.dqn_fightinput.first_run = False
            return None

        if self.dqn_movement.frame_stack is None:
            pipeline_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64").frames[0]

            self.dqn_movement.build_frame_stack(pipeline_game_frame.frame)
            self.dqn_fightinput.frame_stack = self.dqn_movement.frame_stack
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64")

            if self.dqn_movement.mode == "TRAIN":
                reward_movement, reward_fightinput = self._calculate_reward()

                self.game_state["run_reward_movement"] += reward_movement
                self.game_state["run_reward_fightinput"] += reward_fightinput

                self.dqn_movement.append_to_replay_memory(
                    game_frame_buffer,
                    reward_movement,
                    terminal=self.game_state["health"] == 0)

                self.dqn_fightinput.append_to_replay_memory(
                    game_frame_buffer,
                    reward_fightinput,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_movement.current_step % 2000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/fighting_movement")

                    self.dqn_fightinput.save_model_weights(
                        file_path_prefix=f"datasets/fighting_fightinput")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_movement.current_step % 20000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/fighting_movement",
                        is_checkpoint=True)

                    self.dqn_fightinput.save_model_weights(
                        file_path_prefix=f"datasets/fighting_fightinput",
                        is_checkpoint=True)
            elif self.dqn_movement.mode == "RUN":
                self.dqn_movement.update_frame_stack(game_frame_buffer)
                self.dqn_fightinput.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at
            serpent.utilities.clear_terminal()
            print("")
            print(Fore.YELLOW)
            print(Style.BRIGHT)
            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
            )
            print(Style.RESET_ALL)
            print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_movement.output_step_data()
            print("")
            print("FIGHT NEURAL NETWORK:\n")
            self.dqn_fightinput.output_step_data()
            print(Style.RESET_ALL)
            print("")
            print(Style.BRIGHT)
            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )
            print("")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_movement'] + self.game_state['run_reward_fightinput'], 4)}"
            )
            print(
                f"COMBO MULTIPLICATOR: {self.game_state['multiplier_damage']}")
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(
                f"CURRENT ENEMY HEALTH: {self.game_state['enemy_health'][0]}")
            print("")
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(Style.RESET_ALL)

            self.dqn_movement.pick_action()
            self.dqn_movement.generate_action()

            self.dqn_fightinput.pick_action(
                action_type=self.dqn_movement.current_action_type)
            self.dqn_fightinput.generate_action()

            movement_keys = self.dqn_movement.get_input_values()
            fightinput_keys = self.dqn_fightinput.get_input_values()

            print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            print("" + " + ".join(
                list(
                    map(lambda k: self.key_mapping.get(k.name), movement_keys +
                        fightinput_keys))))
            print(Style.RESET_ALL)

            self.input_controller.handle_keys(movement_keys + fightinput_keys)

            if self.dqn_movement.current_action_type == "PREDICTED":
                self.game_state["run_predicted_actions"] += 1

            self.dqn_movement.erode_epsilon(factor=2)
            self.dqn_fightinput.erode_epsilon(factor=2)

            self.dqn_movement.next_step()
            self.dqn_fightinput.next_step()

            self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 6), maxlen=8),
            "enemy_health": collections.deque(np.full((8, ), 654), maxlen=8),
            "current_run": 1,
            "current_run_steps": 0,
            "run_reward_movement": 0,
            "run_reward_fightinput": 0,
            "run_future_rewards": 0,
            "run_predicted_actions": 0,
            "run_timestamp": datetime.utcnow(),
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "record_enemy_hp": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "random_enemy_hp": None,
            "random_enemy_hps": list(),
            "fightstarted": None,
            "multiplier_damage": 0
        }

    def _calculate_reward(self):
        reward_movement = 0
        reward_fightinput = 0

        # getting hit by enemy
        if self.game_state["health"][0] < self.game_state["health"][1]:
            self.game_state["multiplier_damage"] = 0
            reward_movement += -0.10
            reward_fightinput += -0.10
        else:
            reward_movement += 0.05

        # hitting the enemy
        if self.game_state["enemy_health"][0] < self.game_state[
                "enemy_health"][1]:
            # combo multiplicator
            self.game_state["multiplier_damage"] += 0.20
            if self.game_state["multiplier_damage"] > 1:
                self.game_state["multiplier_damage"] = 1

            # check how much dmg the attack did and add 0.05 per class to reward
            if (self.game_state["enemy_health"][1] -
                    self.game_state["enemy_health"][0]) > 150:  # light
                reward_fightinput += 0.05
            if (self.game_state["enemy_health"][1] -
                    self.game_state["enemy_health"][0]) > 500:  # medium
                reward_fightinput += 0.05
            if (self.game_state["enemy_health"][1] -
                    self.game_state["enemy_health"][0]) > 750:  # hard
                reward_fightinput += 0.05

            # calculate reward
            reward_fightinput += (1 * self.game_state["multiplier_damage"])
        else:
            reward_fightinput += -0.05
            reward_movement += -0.01

        # enemy wasnt hit for 5 rounds
        if self.game_state["enemy_health"][0] == self.game_state[
                "enemy_health"][5]:
            self.game_state["multiplier_damage"] = 0

        # return rewards
        return reward_movement, reward_fightinput

    def handle_fight_end(self, game_frame):
        self.game_state["fightstarted"] = None
        self.input_controller.handle_keys([])
        self.game_state["current_run"] += 1
        self.handle_fight_training(game_frame)

    def handle_fight_training(self, game_frame):
        serpent.utilities.clear_terminal()
        timestamp = datetime.utcnow()
        timestamp_delta = timestamp - self.game_state["run_timestamp"]
        self.game_state["last_run_duration"] = timestamp_delta.seconds
        gc.enable()
        gc.collect()
        gc.disable()

        if self.dqn_movement.mode in ["TRAIN", "RUN"]:
            # Check for Records
            if self.game_state["last_run_duration"] > self.game_state[
                    "record_time_alive"].get("value", 0):
                self.game_state["record_time_alive"] = {
                    "value": self.game_state["last_run_duration"],
                    "run": self.game_state["current_run"],
                    "predicted": self.dqn_movement.mode == "RUN",
                    "enemy_hp": self.game_state["enemy_health"][0]
                }

            if self.game_state["enemy_health"][0] < self.game_state[
                    "record_enemy_hp"].get("value", 1000):
                self.game_state["record_enemy_hp"] = {
                    "value": self.game_state["enemy_health"][0],
                    "run": self.game_state["current_run"],
                    "predicted": self.dqn_movement.mode == "RUN",
                    "time_alive": self.game_state["last_run_duration"]
                }
        else:
            self.game_state["random_time_alives"].append(
                self.game_state["last_run_duration"])
            self.game_state["random_enemy_hps"].append(
                self.game_state["enemy_health"][0])

            self.game_state["random_time_alive"] = np.mean(
                self.game_state["random_time_alives"])
            self.game_state["random_enemy_hp"] = np.mean(
                self.game_state["random_enemy_hps"])

        self.game_state["current_run_steps"] = 0

        self.input_controller.handle_keys([])

        if self.dqn_movement.mode == "TRAIN":
            for i in range(16):
                serpent.utilities.clear_terminal()
                print("")
                print(Fore.GREEN)
                print(Style.BRIGHT)
                print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                print(
                    f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}"
                )
                print(Style.RESET_ALL)

                self.dqn_movement.train_on_mini_batch()
                self.dqn_fightinput.train_on_mini_batch()

        self.game_state["run_timestamp"] = datetime.utcnow()
        self.game_state["run_reward_movement"] = 0
        self.game_state["run_reward_fightinput"] = 0
        self.game_state["run_predicted_actions"] = 0
        self.game_state["health"] = collections.deque(np.full((8, ), 6),
                                                      maxlen=8)
        self.game_state["enemy_health"] = collections.deque(np.full((8, ),
                                                                    654),
                                                            maxlen=8)

        if self.dqn_movement.mode in ["TRAIN", "RUN"]:
            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 100 == 0:
                self.dqn_movement.update_target_model()
                self.dqn_fightinput.update_target_model()

            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 20 == 0:
                self.dqn_movement.enter_run_mode()
                self.dqn_fightinput.enter_run_mode()
            else:
                self.dqn_movement.enter_train_mode()
                self.dqn_fightinput.enter_train_mode()

        self.handle_retry_button(game_frame)

示例#5

显示文件

class SerpentGeometryDashGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.game_state = None
        self._reset_game_state()

    def setup_play(self):
        input_mapping = {"SPACE": [KeyboardKey.KEY_SPACE]}

        self.key_mapping = {KeyboardKey.KEY_SPACE.name: "SPACE"}

        action_space = KeyboardMouseActionSpace(action_keys=[None, "SPACE"])

        action_model_file_path = "datasets/GeometryDash_action_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_action = DDQN(
            model_file_path=action_model_file_path
            if os.path.isfile(action_model_file_path) else None,
            input_shape=(self.game.frame_height, self.game.frame_width, 4),
            input_mapping=input_mapping,
            action_space=action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=10000,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=0.25,
            final_epsilon=0.01,
            override_epsilon=False)

    def handle_play(self, game_frame):
        gc.disable()

        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(
                game_frame.grayscale_frame, game_frame.grayscale_frame.shape,
                str(i))

        if self.dqn_action.first_run:
            self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

            self.dqn_action.first_run = False

            time.sleep(5)

            return None

        actor_hp = self._measure_actor_hp(game_frame)
        run_score = self._measure_run_score(game_frame)

        self.game_state["health"].appendleft(actor_hp)
        self.game_state["score"].appendleft(run_score)

        if self.dqn_action.frame_stack is None:
            full_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE").frames[0]

            self.dqn_action.build_frame_stack(full_game_frame.frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE")

            if self.dqn_action.mode == "TRAIN":
                reward_action = self._calculate_reward()

                self.game_state["run_reward_action"] = max(
                    self.game_state["run_reward_action"], reward_action)

                self.dqn_action.append_to_replay_memory(
                    game_frame_buffer,
                    reward_action,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_action.current_step % 2000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"datasets/GeometryDash_action")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_action.current_step % 20000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"datasets/GeometryDash_action",
                        is_checkpoint=True)

            elif self.dqn_action.mode == "RUN":
                self.dqn_action.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
            )
            print(
                "GAME: GeometryDash                 PLATFORM: Steam                AGENT: DDQN + Prioritized Experience Replay"
            )
            print("")

            self.dqn_action.output_step_data()

            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_action'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT SCORE: {self.game_state['score'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )

            if self.game_state["health"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_action.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_action.mode == "RUN"
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.handle_keys([])

                if self.dqn_action.mode == "TRAIN":
                    for i in range(8):
                        run_time = datetime.now() - self.started_at
                        serpent.utilities.clear_terminal()
                        print(
                            f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                        )
                        print(
                            "GAME: GeometryDash                 PLATFORM: Steam                AGENT: DDQN + Prioritized Experience Replay"
                        )
                        print("")

                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/8")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        print(f"LAST RUN: {self.game_state['current_run']}")
                        print(
                            f"LAST RUN REWARD: {round(self.game_state['run_reward_action'], 2)}"
                        )
                        print(
                            f"LAST RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
                        )
                        print(f"LAST SCORE: {self.game_state['score'][0]}")

                        print("")
                        print(
                            f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
                        )
                        print("")

                        print(
                            f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
                        )

                        self.dqn_action.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_action"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      3),
                                                              maxlen=8)
                self.game_state["score"] = collections.deque(np.full((8, ), 0),
                                                             maxlen=8)

                if self.dqn_action.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        if self.dqn_action.type == "DDQN":
                            self.dqn_action.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_action.enter_run_mode()
                    else:
                        self.dqn_action.enter_train_mode()

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                time.sleep(1)

                #self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

                return None

        self.dqn_action.pick_action()
        self.dqn_action.generate_action()

        keys = self.dqn_action.get_input_values()
        print("")

        print("PRESSING: ", end='')
        print(" + ".join(
            list(map(lambda k: self.key_mapping.get(k.name), keys))))

        self.input_controller.handle_keys(keys)

        if self.dqn_action.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_action.erode_epsilon(factor=2)

        self.dqn_action.next_step()

        self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 3), maxlen=8),
            "score": collections.deque(np.full((8, ), 0), maxlen=8),
            "run_reward_action": 0,
            "current_run": 1,
            "current_run_steps": 0,
            "current_run_health": 0,
            "current_run_score": 0,
            "run_predicted_actions": 0,
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "run_timestamp": datetime.utcnow(),
        }

    def _measure_actor_hp(self, game_frame):
        hp_area_grayscale = serpent.cv.extract_region_from_image(
            game_frame.grayscale_frame, self.game.screen_regions["SCORE_AREA"])

        try:
            threshold = skimage.filters.threshold_otsu(hp_area_grayscale)
        except ValueError:
            threshold = 0

        return (1 if threshold > 90 else 0)

    def _measure_run_score(self, game_frame):
        score_grayscale = serpent.cv.extract_region_from_image(
            game_frame.grayscale_frame, self.game.screen_regions["SCORE_AREA"])

        try:
            threshold = skimage.filters.threshold_otsu(score_grayscale)
        except ValueError:
            threshold = 0

        bw_score_bar = score_grayscale > threshold

        score = str(bw_score_bar[bw_score_bar > 0].size)

        self.game_state["current_run_score"] = score

        return score

    def _calculate_reward(self):
        reward = int(self.game_state["score"][0])

        return reward

示例#6

显示文件

文件： serpent_SlayTheSpire_game_agent.py 项目： tdar31/SerpentSLSGameAgentPlugin

class SerpentSlayTheSpireGameAgent(GameAgent):

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        global prevContext
        prevContext = "None"

        self.game_state = None
        self._reset_game_state()

        serpent.utilities.clear_terminal()
        print("------------------------------------")
        print("Starting up . . . ")

        self.frame_handlers["PLAY"] = self.handle_play
        self.frame_handler_setups["PLAY"] = self.setup_play
        
    def setup_play(self):
        global input_mapping

        print("------------------------------------")
        print("Loading Image Classifer . . . ")
        print("------------------------------------")

        plugin_path = offshoot.config["file_paths"]["plugins"]

        context_classifier_path = f"{plugin_path}/SerpentSlayTheSpireGameAgentPlugin/files/ml_models/context_classifier.model"

        context_classifier = CNNInceptionV3ContextClassifier(input_shape=(384, 512, 3))  # Replace with the shape (rows, cols, channels) of your captured context frames

        context_classifier.prepare_generators()
        context_classifier.load_classifier(context_classifier_path)

        self.machine_learning_models["context_classifier"] = context_classifier

        input_mapping = {
            1: [KeyboardKey.KEY_1],
            2: [KeyboardKey.KEY_2],
            3: [KeyboardKey.KEY_3],
            4: [KeyboardKey.KEY_4],
            5: [KeyboardKey.KEY_5],      
            "E": [KeyboardKey.KEY_E]                                   
        }

        action_space = KeyboardMouseActionSpace(
            card_inputs=[1, 2, 3, 4, 5, "E"]
        )

        card_selection_model_file_path = "datasets/tdar31_slaythespire_dqn_0.9981189999999986_.h5".replace("/", os.sep)

        # DDQN setup
        self.dqn_card_selection = DDQN(
            model_file_path=card_selection_model_file_path if os.path.isfile(card_selection_model_file_path) else None,
            input_shape=(90, 160, 4),
            input_mapping=input_mapping,
            action_space=action_space,
            replay_memory_size=1000,
            max_steps=1000,
            observe_steps=100,
            batch_size=64,
            model_learning_rate=1e-4,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=True
        )

    def find_index(self):
        return print("TEST")

    def handle_play(self, game_frame):
        global prevContext
        context = self.machine_learning_models["context_classifier"].predict(game_frame.frame)
        print(context)

        if context != prevContext:
            prevContext = context
            time.sleep(1)
            return print("context doesn't match prevContext")

        if context == "DEATH_MENU":
            self.handle_DEATH_MENU(game_frame, context)

        elif context == "BATTLE_STAGE":
            self.handle_BATTLE_STAGE(game_frame, context)

        elif context == "REWARD_STAGE":
            self.handle_REWARD_STAGE(game_frame, context)

        # While these classes aren't used; the image classifer is trained to check for them
        # and maybe prove useful/important as I expand on the project.  So even though they
        # currently do nothing I am going to leave them for now

        elif context == "START_RUN":
            self.handle_START_RUN(game_frame, context)

        elif context == "MAP_MENU":
            self.handle_MAP_MENU(game_frame, context)

        elif context == "MERCHANT_MENU":
            self.handle_MERCHANT_MENU(game_frame, context)

        elif context == "MERCHANT_PRE_MENU":
            self.handle_MERCHANT_PRE_MENU(game_frame, context)

        elif context == "REST_STAGE":
            self.handle_REST_STAGE(game_frame, context)

        elif context == "SMITH_DECK_LIST":
            self.handle_SMITH_DECK_LIST(game_frame, context)

    def _reset_game_state(self):
        self.game_state = {
            "current_run": 0,
            "current_run_steps": 0,
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "run_timestamp": datetime.utcnow(),
            "masterCardList": ["Strike_G", "Strike_G", "Strike_G", "Strike_G", "Defend_G", "Defend_G", "Defend_G", "Defend_G", "Poisoned Stab", "Neutralize", "Dodge and Roll"],
            "player_energy_available": [3],
            "player_energy_total": [3],
            "player_health": [70],
            "final_cultist_attack": [0],
            "poison_check": [False],
            "run_reward_selection": 0,
            "run_predicted_selection": 0,
        }
       
    def handle_DEATH_MENU(self, game_frame, context):
        print("INSIDE DEATH_MENU function")
        global prevContext
        prevContext = "DEATH_STAGE"

        time.sleep(1)

        death_menuing_Xcoords = [639, 644, 104, 344, 634, 1207]
        death_menuing_Ycoords = [622, 637, 440, 376, 579, 593]

        menuing_delays = [1, 2, 1, 1, 1, 1]

        for elem in range(6):
            self.input_controller.move(x=death_menuing_Xcoords[elem], y=death_menuing_Ycoords[elem], duration=0.25, absolute=True)
            self.input_controller.click(button=MouseButton.LEFT, duration=0.25)

            time.sleep(menuing_delays[elem])

        time.sleep(1)
                
        self.fight_setup()

    # Remove relic and reset deck
    def fight_setup(self):
        console_commands = ["relic remove r", "deck remove a", "fight cu"]

        self.input_controller.type_string("~", 0.05)

        for elem in range(3):
            self.input_controller.type_string(console_commands[elem], 0.05)
            self.input_controller.tap_key(KeyboardKey.KEY_TAB)
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)

            time.sleep(1)

        self.input_controller.type_string("~", 0.05)

        time.sleep(1)

        self.game_state["current_run"] += 1

        print("self.dqn_card_selection.mode: --- ", self.dqn_card_selection.mode)

        if self.dqn_card_selection.mode in ["TRAIN", "RUN"]:
            print("if self.dqn_card_selection.mode in ['TRAIN', 'RUN']:")
            print("----------------------------------------------------")
            time.sleep(2)
            if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 100 == 0:
                if self.dqn_card_selection.type == "DDQN":
                    self.dqn_card_selection.update_target_model()
                    
            if self.game_state["current_run"] > 0 and self.game_state["current_run"] % 20 == 0:
                self.dqn_card_selection.enter_run_mode()
                
            else:
                self.dqn_card_selection.enter_train_mode()

        # NOW INSIDE BATTLE STAGE BUT FUNCTION HASN'T BEEN TRIGGERED BY IMAGE CONTEXT CLASSIFER
        self.populating_deck()

    def populating_deck(self):
        masterCardList = self.game_state["masterCardList"]

        time.sleep(1)

        self.input_controller.type_string("~", 0.05)
        
        time.sleep(1)

        prefixCmd = "hand add "
        for elem in range(len(masterCardList)):
            print(masterCardList[elem])
            tempCard = ""
            tempCardSelection = ""

            tempCard = masterCardList[elem]
            tempCardSelection = prefixCmd + tempCard

            self.input_controller.type_string(tempCardSelection, 0.05)
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            time.sleep(1)

        # Closes console
        self.input_controller.type_string("~", 0.05)
        
        # Ends turn
        self.input_controller.tap_key(KeyboardKey.KEY_E)
        
        time.sleep(1.5)

    def handle_BATTLE_STAGE(self, game_frame, context):
        print("INSIDE BATTLE_STAGE function")
        global prevContext
        prevContext = "BATTLE_STAGE"

        # Player Energy
        player_energy = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["PLAYER_ENERGY"])
        player_energy_grayscale = np.array(skimage.color.rgb2gray(player_energy) * 255, dtype="uint8")

        player_energy = serpent.ocr.perform_ocr(image=player_energy_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1)
        
        print("player_energy")
        print(player_energy)

        # Parses returned value from tesseract for grabbing current energy
        # Issue is that because of the swirling animation behind the numbers the OCR isn't 100% at returning this value correctly
        # The '/' is by far the most consistant value returned and it's the char that the energy values revolve around so if it's not found we force the program to refresh and grab a new game_image and try again
        if '/' in player_energy:
            print("player_energy INSIDE IF STATEMENT")
            finalArr = []

            ## Examples of type of values returned by OCR when grabbing energy
            # 3/3 <- correct
            # '3/3
            # "3 /3
            # 53 /3
            # 27/ 3
            # "3/3'
            # 3/3.
            # .3 /3
            # *3/3

            for elem in player_energy:
                # Next layer of parsing
                # If the value isn't '/' or an integer it isn't pushed into finalArr
                if (elem == "/") or (elem.isdigit() == True):
                    finalArr.append(elem)

            # Final check
            # if the length of the list is greater 3 or '/' isn't in the second position
            # Techincally this fails if the player has 10 or more energy but due to how infrequently this happens I don't have a conditional check for it
            if len(finalArr) < 3 or (finalArr[1] != "/"):
                return print("Failed to capture energy successfully // len(finalArr) < 3) or finalArr[1] != '/'")

            # Capture available and total player energy
            player_energy_available = finalArr[0]
            player_energy_total = finalArr[2]

            print("------------------------------------")
            print(player_energy_available, "/", player_energy_total)
            print("------------------------------------")

            time.sleep(1)

            self.game_state["player_energy_available"].insert(0, player_energy_available)
            self.game_state["player_energy_total"].insert(0, player_energy_total)

        else:
            print(player_energy)
            return print("Failed to capture energy successfully // captured energy value doesn't have '/'")

        # Player Health
        player_total_health = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["PLAYER_HEALTH"])
        player_total_health_grayscale = np.array(skimage.color.rgb2gray(player_total_health) * 255, dtype="uint8")

        player_health = serpent.ocr.perform_ocr(image=player_total_health_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1)    

        tempArr = []

        for elem in player_health:
            if (elem.isdigit() == True):
                tempArr.append(elem)
            
            if (elem == "/"):
                break

        player_health = ''.join(tempArr)    

        print("player_health", player_health)   

        self.game_state["player_health"].insert(0, player_health)

        time.sleep(.5)

        self.enemy_action_capture(game_frame)

    def enemy_action_capture(self, game_frame):
        final_cultist_attack = []
        attack_cultist_temp_list= []

        # Unselect anything just incase
        self.input_controller.click(button=MouseButton.RIGHT, duration=0.25)
        time.sleep(.5)

        # Home hover
        self.input_controller.move(x=636, y=375, duration=0.25, absolute=True)
        time.sleep(1)

        # Enemy hover
        self.input_controller.move(x=959, y=410, duration=0.25, absolute=True)

        time.sleep(.75)

        image_data = skimage.io.imread("plugins/SerpentSlayTheSpireGamePlugin/files/data/sprites/sprite_Attack_for_0.png")[..., np.newaxis]

        attack_for_cultist = Sprite("attack_for_cultist", image_data=image_data)

        # Full game frame capture
        # print("-----------Full game frame capture-----------")
        # full_game_frame = FrameGrabber.get_frames(
        #     [0],
        #     frame_shape=(self.game.frame_height, self.game.frame_width),
        #     frame_type="PIPELINE"
        # ).frames[0]

        # Allows for dynamic capture of enemy attack
        sprite_locator = SpriteLocator()
        
        attack_for_cultist_location = sprite_locator.locate(sprite=attack_for_cultist, game_frame=game_frame)
        
        print("attack_for_cultist_location: ", attack_for_cultist_location)

        # Tuples are immutable :(
        if (attack_for_cultist_location != None):
            attack_cultist_temp_list = list(attack_for_cultist_location)

            attack_cultist_temp_list[1] = attack_cultist_temp_list[1] + 45
            attack_cultist_temp_list[3] = attack_cultist_temp_list[3] + 15

            attack_for_cultist_location = tuple(attack_cultist_temp_list)

            print("Updated - attack_for_cultist_location: ", attack_for_cultist_location)
            time.sleep(1)

            cultist_attack = serpent.cv.extract_region_from_image(game_frame.frame, attack_for_cultist_location)
            cultist_attack_grayscale = np.array(skimage.color.rgb2gray(cultist_attack) * 255, dtype="uint8")

            cultist_attack = serpent.ocr.perform_ocr(image=cultist_attack_grayscale, scale=15, order=5, horizontal_closing=2, vertical_closing=1)
            
            # This is actually an awkward work around for limitations in how tesseract works.  By default it doesn't capture single char values so when dynamically 
            # searching and capturing the enemy attack the region it's looking for the region that includes the word "for " + attack value (i.e. "for 6").  There 
            # are ways of swapping the mode of tesseract to do a capture for single char values but because the attack values are dynamic it sometimes is 
            # less than 10 or much greater than 10 which is now multiple char's and messes with the capture. For the sake of just getting it working I did this

            # TLDR: Awkward workaround for limitation in tesseract when capturing single char values.  Likely easier way to capture then parse attack value
            for elem in cultist_attack:
                if (elem.isdigit() == True):
                    final_cultist_attack.append(elem)
                    print("final_cultist_attack", final_cultist_attack)

            final_cultist_attack = ''.join(final_cultist_attack)

            print("final_cultist_attack: ", final_cultist_attack)
            print("------------------------------------")
        
            self.game_state["final_cultist_attack"].insert(0, final_cultist_attack)

            self.poison_check(game_frame)

        else:
            return print("Failed to capture enemy attack")


    def poison_check(self, game_frame):
        image_data = skimage.io.imread("plugins/SerpentSlayTheSpireGamePlugin/files/data/sprites/sprite_poison_check_0.png")[..., np.newaxis]

        poison_check = Sprite("poison_check", image_data=image_data)

        # # Full game frame capture
        # print("-----------Full game frame capture-----------")
        # full_game_frame = FrameGrabber.get_frames(
        #     [0],
        #     frame_shape=(self.game.frame_height, self.game.frame_width),
        #     frame_type="PIPELINE"
        # ).frames[0]

        sprite_locator = SpriteLocator()

        poison_check_location = sprite_locator.locate(sprite=poison_check, game_frame=game_frame)
        
        print("poison_check_location: ", poison_check_location)

        if (poison_check_location != None):
            self.game_state["poison_check"].insert(0, True)
            print("POISON_CHECK == TRUE")

        else:
            self.game_state["poison_check"].insert(0, False)
            print("POISON_CHECK == FALSE")

        self.ddqn_setup(game_frame)

    def ddqn_setup(self, game_frame):

        gc.disable()           
        
        if self.dqn_card_selection.first_run:		
            self.dqn_card_selection.first_run = False
            print("---------------first_run---------------")

            return None

        timestamp_now = datetime.utcnow()
        runtime_total = timestamp_now - self.game_state["run_timestamp"]
              
        time.sleep(1)
        print("self.dqn_card_selection.mode", self.dqn_card_selection.mode)

        if self.dqn_card_selection.frame_stack is None:
            full_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE"
            ).frames[0]
            print("self.dqn_card_selection.frame_stack is None")

            self.dqn_card_selection.build_frame_stack(full_game_frame.frame)

            self.dqn_card_selection.frame_stack = self.dqn_card_selection.frame_stack

        else:
            print("ELSE -- self.dqn_card_selection.frame_stack is None // game_frame_buffer")
            print("INSIDE ELSE self.dqn_card_selection.mode", self.dqn_card_selection.mode)

            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE"
            )

            if self.dqn_card_selection.mode == "TRAIN":
                print("self.dqn_card_selection.mode == TRAIN", self.dqn_card_selection.mode)
                time.sleep(2)
        
                # calculates reward then appends it to replay memory
                reward_selection = self.calculate_reward()

                self.game_state["run_reward_selection"] += reward_selection

                self.dqn_card_selection.append_to_replay_memory(
                    game_frame_buffer,
                    reward_selection,
                    terminal=self.game_state["player_health"] == 0
                )

                if self.dqn_card_selection.current_step % 100 == 0:
                    self.dqn_card_selection.save_model_weights(
                        file_path_prefix=f"datasets/tdar31_slaythespire_selection"
                    )

                if self.dqn_card_selection.current_step % 500 == 0:
                    self.dqn_card_selection.save_model_weights(
                        file_path_prefix=f"datasets/tdar31_slaythespire_selection"
                    )

                if self.dqn_card_selection.current_step % 5000 == 0:
                    self.dqn_card_selection.save_model_weights(
                        file_path_prefix=f"datasets/tdar31_slaythespire_selection",
                        is_checkpoint=True
                    )

            elif self.dqn_card_selection.mode == "RUN":
                self.dqn_card_selection.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds")
            print("")

            print("Selection NN:\n")
            self.dqn_card_selection.output_step_data()

            print("")
            print(f"RUN: {self.game_state['current_run']}")
            print(f"RUN REWARD: {round(self.game_state['run_reward_selection'], 2)}")
            print(f"RUN PREDICTED ACTIONS: {self.game_state['run_predicted_selection']}")
            print(f"PLAYER HEALTH: {self.game_state['player_health'][0]}")                                    
            print(f"PLAYER ENERGY AVAILABLE: {self.game_state['player_energy_available'][0]}")                          
            print(f"PLAYER ENERGY TOTAL: {self.game_state['player_energy_total'][0]}")                          
            print(f"PLAYER ENERGY AVAILABLE: {self.game_state['player_energy_available'][0]}")                          
            print(f"POISON CHECK: {self.game_state['poison_check'][0]}")                          
            print(f"RUN TIME: {runtime_total.seconds} seconds")           
            print(f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds")

        self.dqn_card_selection.pick_action()
        self.dqn_card_selection.generate_action()
        card_selection_keys = self.dqn_card_selection.get_input_values()

        print("card_selection_keys", card_selection_keys)

        ddqnInputSelection = card_selection_keys[0]

        print("ddqnInputSelection", ddqnInputSelection)

        print("self.dqn_card_selection.current_action_type", self.dqn_card_selection.current_action_type)                                             
        # Starts as random?  Once frame stack is built out swaps to PREDICTED? as in delibrate choice when running .action
        # appending to memory + setting up reward calucation?

        if self.dqn_card_selection.current_action_type == "PREDICTED":
            self.game_state["run_predicted_selection"] += 1

        self.dqn_card_selection.erode_epsilon(factor=2)

        self.dqn_card_selection.next_step()

        self.ddqn_action_output(ddqnInputSelection)

    def ddqn_action_output(self, ddqnInputSelection):
        print(ddqnInputSelection)

        # Unselects anything just incase
        self.input_controller.click(button=MouseButton.RIGHT, duration=0.25)            

        if ddqnInputSelection == "KeyboardKey.KEY_E":
            print("ddqnInputSelection = E // End turn")

            self.input_controller.tap_key(ddqnInputSelection)
            time.sleep(.5)

        else:
            # This is where the chosen card is actually selected then played
            self.input_controller.tap_key(ddqnInputSelection)

            time.sleep(.5)

            play_card_Xcoords = [636, 959]
            play_card_Ycoords = [375, 410]

            for elem in range(2):
                self.input_controller.move(x=play_card_Xcoords[elem], y=play_card_Ycoords[elem], duration=0.25, absolute=True)
                time.sleep(1)

            self.input_controller.click(button=MouseButton.LEFT, duration=0.25)
            time.sleep(.5)

    def calculate_reward(self):
        reward = 0

        # 1 to 9 damage taken that turn // -5
        reward -= (5 if (int(self.game_state["player_health"][1])) - (int(self.game_state["player_health"][0])) <= 9 else 0)
        # 10 or more damage taken that turn // -10
        reward -= (10 if (int(self.game_state["player_health"][1])) - (int(self.game_state["player_health"][0])) >= 10 else 0)

        # Slight penalty if no card is played aka no energy used // -3
        reward -= (3 if (int(self.game_state["player_energy_available"][0])) == (int(self.game_state["player_energy_total"][0])) else 0)
        # Energy used to play card regardless of what it does // +10
        reward += (10 if (int(self.game_state["player_energy_available"][0])) < (int(self.game_state["player_energy_total"][1])) else 0)

        # If enemy poisoned // +5
        reward += (5 if (self.game_state["poison_check"][0] == True) else 0)

        return reward

    def handle_REWARD_STAGE(self, game_frame, context):
        print("INSIDE REWARD_STAGE function")
        global prevContext
        prevContext = "REWARD_STAGE"
        
        time.sleep(.5)

        play_card_Xcoords = [1249, 952, 563]
        play_card_Ycoords = [26, 146, 456]

        for elem in range(3):
            self.input_controller.move(x=play_card_Xcoords[elem], y=play_card_Ycoords[elem], duration=0.25, absolute=True)
            time.sleep(.75)
            self.input_controller.click(button=MouseButton.LEFT, duration=0.25)

        time.sleep(.5)

    # def handle_MERCHANT_PRE_MENU(self, game_frame, context):
    #     print("INSIDE MERCHANT_PRE_MENU function")
    #     global prevContext
    #     prevContext = "MERCHANT_PRE_MENU"
    #     time.sleep(1) 

    # def handle_MERCHANT_MENU(self, game_frame, context):
    #     print("INSIDE MERCHANT_MENU function")
    #     global prevContext
    #     prevContext = "MERCHANT_MENU"

    # def handle_MAP_MENU(self, game_frame, context):
    #     print("INSIDE MAP_MENU function")
    #     global prevContext
    #     prevContext = "MAP_MENU"
    #     time.sleep(1)

    # def handle_REST_STAGE(self, game_frame, context):
    #     print("INSIDE REST_STAGE function")
    #     global prevContext
    #     prevContext = "REST_STAGE"
    #     time.sleep(1)

    # def handle_SMITH_DECK_LIST(self, game_frame, context):
    #     print("INSIDE SMITH_DECK_LIST function")
    #     global prevContext
    #     prevContext = "SMITH_DECK_LIST"
    #     time.sleep(1)

示例#7

显示文件

文件： serpent_SpaceInvaders_game_agent.py 项目： itsonlym3/SerpentSpaceInvadersGameAgentPlugin

class SerpentSpaceInvadersGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.analytics_client = None

        self.game_state = None
        self._reset_game_state()

        init()

    def setup_play(self):
        # ALL Key/Input Mappings for SNES Emulator
        input_mapping = {
            "UP": [KeyboardKey.KEY_W],
            "LEFT": [KeyboardKey.KEY_A],
            "DOWN": [KeyboardKey.KEY_S],
            "RIGHT": [KeyboardKey.KEY_D],
            "START": [KeyboardKey.KEY_ENTER],
            "SELECT": [KeyboardKey.KEY_BACKSPACE],
            "B": [KeyboardKey.KEY_Z],
            "A": [KeyboardKey.KEY_V],
            "Y": [KeyboardKey.KEY_X],
            "X": [KeyboardKey.KEY_C],
            "L": [KeyboardKey.KEY_B],
            "R": [KeyboardKey.KEY_N]
        }

        self.key_mapping = {
            KeyboardKey.KEY_W.name: "UP",
            KeyboardKey.KEY_A.name: "LEFT",
            KeyboardKey.KEY_S.name: "DOWN",
            KeyboardKey.KEY_D.name: "RIGHT",
            KeyboardKey.KEY_ENTER.name: "START",
            KeyboardKey.KEY_BACKSPACE.name: "SELECT",
            KeyboardKey.KEY_Z.name: "B",
            KeyboardKey.KEY_V.name: "A",
            KeyboardKey.KEY_X.name: "Y",
            KeyboardKey.KEY_C.name: "X",
            KeyboardKey.KEY_B.name: "L",
            KeyboardKey.KEY_N.name: "R"
        }

        # Game Specific Inputs
        direction_action_space = KeyboardMouseActionSpace(
            direction_keys=[None, "LEFT", "RIGHT"])

        action_space = KeyboardMouseActionSpace(
            action_keys=[None, "B", "A", "Y", "X"])

        direction_model_file_path = "datasets/spaceinvaders_direction_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_direction = DDQN(
            model_file_path=direction_model_file_path
            if os.path.isfile(direction_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=direction_action_space,
            replay_memory_size=40000,
            max_steps=3000000,
            observe_steps=5000,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=1.0,
            final_epsilon=0.1,
            override_epsilon=False)

        action_model_file_path = "datasets/spaceinvaders_action_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_action = DDQN(
            model_file_path=action_model_file_path
            if os.path.isfile(action_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=action_space,
            replay_memory_size=40000,
            max_steps=3000000,
            observe_steps=5000,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=1.0,
            final_epsilon=0.1,
            override_epsilon=False)

    def handle_play(self, game_frame):
        gc.disable()

        if self.dqn_direction.first_run:
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            time.sleep(0.5)
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            time.sleep(0.5)
            self.input_controller.tap_key(KeyboardKey.KEY_S)
            time.sleep(0.5)
            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            time.sleep(0.5)

            self.input_controller.tap_key(KeyboardKey.KEY_N)  # 1 Credit

            self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
            time.sleep(5)

            self.dqn_direction.first_run = False
            self.dqn_action.first_run = False

            return None

        vessel_hp = self._measure_hp(game_frame)
        vessel_score = self._measure_score(game_frame)
        # vessel_credits = self._measure_credits(game_frame)

        self.game_state["health"].appendleft(vessel_hp)
        self.game_state["score"].appendleft(vessel_score)
        # self.game_state["credits"].appendleft(vessel_credits)

        if self.dqn_direction.frame_stack is None:
            pipeline_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE").frames[0]

            self.dqn_direction.build_frame_stack(pipeline_game_frame.frame)
            self.dqn_action.frame_stack = self.dqn_direction.frame_stack
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE")

            if self.dqn_direction.mode == "TRAIN":
                reward_direction, reward_action = self._calculate_reward()

                self.game_state["run_reward_direction"] += reward_direction
                self.game_state["run_reward_action"] += reward_action

                self.dqn_direction.append_to_replay_memory(
                    game_frame_buffer,
                    reward_direction,
                    terminal=self.game_state["health"] == 0)

                self.dqn_action.append_to_replay_memory(
                    game_frame_buffer,
                    reward_action,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_direction.current_step % 2000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/spaceinvaders_direction")

                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"datasets/spaceinvaders_action")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_direction.current_step % 20000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/spaceinvaders_direction",
                        is_checkpoint=True)

                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"datasets/spaceinvaders_action",
                        is_checkpoint=True)
            elif self.dqn_direction.mode == "RUN":
                self.dqn_direction.update_frame_stack(game_frame_buffer)
                self.dqn_action.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                "\033[31m" +
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                + "\033[37m")
            print("GAME: Space Invaders   PLATFORM: SNES   AGENT: DDQN + PER")
            print("")

            print("\033[32m" + "DIRECTION NEURAL NETWORK INFO:\n" + "\033[37m")
            self.dqn_direction.output_step_data()

            print("")
            print("\033[32m" + "ACTION NEURAL NETWORK INFO:\n" + "\033[37m")
            self.dqn_action.output_step_data()

            print("")
            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT SCORE: {self.game_state['score'][0]}")
            # print(f"CURRENT CREDITS: {self.game_state['credits'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )

            if self.game_state["health"][2] <= 0:
                serpent.utilities.clear_terminal()
                print("ENTERING THE HEALTH <= 0 PART")
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_direction.mode == "RUN"
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.handle_keys([])

                if self.dqn_direction.mode == "TRAIN":
                    for i in range(16):
                        serpent.utilities.clear_terminal()
                        print(
                            "\033[31m" +
                            f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                            + "\033[37m")
                        print(
                            "\033[32m" +
                            "GAME: Space Invaders   PLATFORM: Steam   AGENT: DDQN + PER"
                            + "\033[37m")
                        print("")
                        print("TRAINING ON MINI-BATCHES:" + "\033[32m" +
                              f"{i + 1}/16" + "\033[37m")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        self.dqn_direction.train_on_mini_batch()
                        self.dqn_action.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_direction"] = 0
                self.game_state["run_reward_action"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      3),
                                                              maxlen=8)
                self.game_state["score"] = collections.deque(np.full((8, ), 0),
                                                             maxlen=8)

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        if self.dqn_direction.type == "DDQN":
                            self.dqn_direction.update_target_model()
                            self.dqn_action.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_direction.enter_run_mode()
                        self.dqn_action.enter_run_mode()
                    else:
                        self.dqn_direction.enter_train_mode()
                        self.dqn_action.enter_train_mode()

                time.sleep(1)
                self.input_controller.tap_key(KeyboardKey.KEY_N)
                time.sleep(1)
                self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
                time.sleep(6)

                return None

        self.dqn_direction.pick_action()
        self.dqn_direction.generate_action()

        self.dqn_action.pick_action(
            action_type=self.dqn_direction.current_action_type)
        self.dqn_action.generate_action()

        keys = self.dqn_direction.get_input_values(
        ) + self.dqn_action.get_input_values()
        print("")

        print("PRESSING: ", end='')
        print(" + ".join(
            list(map(lambda k: self.key_mapping.get(k.name), keys))))

        self.input_controller.handle_keys(keys)

        if self.dqn_direction.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_direction.erode_epsilon(factor=2)
        self.dqn_action.erode_epsilon(factor=2)

        self.dqn_direction.next_step()
        self.dqn_action.next_step()

        self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 3), maxlen=8),
            "score": collections.deque(np.full((8, ), 0), maxlen=8),
            "run_reward_direction": 0,
            "run_reward_action": 0,
            "current_run": 1,
            "current_run_steps": 0,
            "current_run_health": 3,
            "current_run_score": 0,
            "run_predicted_actions": 0,
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "run_timestamp": datetime.utcnow(),
        }

    def _measure_score(self, game_frame):
        score_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["GAME_CURRENT_SCORE"])

        score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) *
                                   255,
                                   dtype="uint8")

        score = serpent.ocr.perform_ocr(image=score_grayscale,
                                        scale=10,
                                        order=1,
                                        horizontal_closing=1,
                                        vertical_closing=1)

        count = 0

        if len(score) == 4 and score.isdigit() and score != '0000':
            for char in score:
                if char == '0':
                    count = count + 1
                else:
                    break
            score = score[count:]
        else:
            score = '0'

        self.game_state["current_run_score"] = score

        return score

    def _measure_hp(self, game_frame):
        hp_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["GAME_CURRENT_HEALTH"])

        vessel_hp = 0
        max_ssim = 0

        for name, sprite in self.game.sprites.items():
            for i in range(sprite.image_data.shape[3]):
                ssim = skimage.measure.compare_ssim(
                    hp_area_frame,
                    np.squeeze(sprite.image_data[..., i]),
                    multichannel=True)

                if ssim > max_ssim:
                    max_ssim = ssim
                    vessel_hp = int(name[-1])

        return vessel_hp

    # def _measure_credits(self, game_frame):
    #     # OCR or Sprites if inconsistent (see TiamatX health)
    #     credits_area_frame = serpent.cv.extract_region_from_image(game_frame.frame, self.game.screen_regions["GAME_CURRENT_CREDITS"])
    #
    #     credits_grayscale = np.array(skimage.color.rgb2gray(credits_area_frame) * 255, dtype="uint8")
    #
    #     credits = serpent.ocr.perform_ocr(image=credits_grayscale, scale=10, order=5, horizontal_closing=10, vertical_closing=5)
    #
    #     count = 0
    #
    #     if len(credits) == 2 and credits.isdigit():
    #         for char in credits:
    #             if char == '0':
    #                 count = count + 1
    #             else:
    #                 break
    #         credits = credits[count:]
    #     else:
    #         credits = '50'
    #
    #     self.game_state["current_run_credits"] = credits
    #
    #     return credits

    def _calculate_reward(self):
        reward = 0

        # reward += (-1.0 if self.game_state["credits"][0] < self.game_state["credits"][1] else 0.1)
        reward += (-0.5 if
                   self.game_state["health"][0] < self.game_state["health"][1]
                   else 0.05)
        reward += (0.75 if
                   (int(self.game_state["score"][0]) -
                    int(self.game_state["score"][1])) >= 10 else -0.075)

        return reward, reward

示例#8

显示文件

文件： serpent_BindingOfIsaacAfterbirth_game_agent.py 项目： m1ndgames/SerpentBindingOfIsaacAfterbirthGameAgentPlugin

class SerpentBindingOfIsaacAfterbirthGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.game_state = None
        self._reset_game_state()

    @property
    def bosses(self):
        return {"MONSTRO": "1010"}

    def setup_play(self):
        input_mapping = {
            "W": [KeyboardKey.KEY_W],
            "A": [KeyboardKey.KEY_A],
            "S": [KeyboardKey.KEY_S],
            "D": [KeyboardKey.KEY_D],
            "WA": [KeyboardKey.KEY_W, KeyboardKey.KEY_A],
            "WD": [KeyboardKey.KEY_W, KeyboardKey.KEY_D],
            "SA": [KeyboardKey.KEY_S, KeyboardKey.KEY_A],
            "SD": [KeyboardKey.KEY_S, KeyboardKey.KEY_D],
            "UP": [KeyboardKey.KEY_UP],
            "LEFT": [KeyboardKey.KEY_LEFT],
            "DOWN": [KeyboardKey.KEY_DOWN],
            "RIGHT": [KeyboardKey.KEY_RIGHT]
        }

        self.key_mapping = {
            KeyboardKey.KEY_W.name: "MOVE UP",
            KeyboardKey.KEY_A.name: "MOVE LEFT",
            KeyboardKey.KEY_S.name: "MOVE DOWN",
            KeyboardKey.KEY_D.name: "MOVE RIGHT",
            KeyboardKey.KEY_UP.name: "SHOOT UP",
            KeyboardKey.KEY_LEFT.name: "SHOOT LEFT",
            KeyboardKey.KEY_DOWN.name: "SHOOT DOWN",
            KeyboardKey.KEY_RIGHT.name: "SHOOT RIGHT",
        }

        movement_action_space = KeyboardMouseActionSpace(directional_keys=[
            None, "W", "A", "S", "D", "WA", "WD", "SA", "SD"
        ])

        projectile_action_space = KeyboardMouseActionSpace(
            projectile_keys=[None, "UP", "LEFT", "DOWN", "RIGHT"])

        movement_model_file_path = "datasets/binding_of_isaac_movement_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_movement = DDQN(
            model_file_path=movement_model_file_path
            if os.path.isfile(movement_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=movement_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=1000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False)

        projectile_model_file_path = "datasets/binding_of_isaac_projectile_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_projectile = DDQN(
            model_file_path=projectile_model_file_path
            if os.path.isfile(projectile_model_file_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=input_mapping,
            action_space=projectile_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=1000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False)

        if sys.platform in ["linux", "linux2"]:
            pyperclip.set_clipboard("xsel")

        pyperclip.copy(f"goto s.boss.{self.bosses['MONSTRO']}")

    def handle_play(self, game_frame):
        gc.disable()

        if self.dqn_movement.first_run:
            self._goto_boss()

            self.dqn_movement.first_run = False
            self.dqn_projectile.first_run = False

            return None

        hearts = frame_to_hearts(game_frame.frame, self.game)

        # Check for Curse of Unknown
        if not len(hearts):
            self.input_controller.tap_key(KeyboardKey.KEY_R, duration=1.5)
            self._goto_boss()

            return None

        self.game_state["health"].appendleft(24 - hearts.count(None))
        self.game_state["boss_health"].appendleft(
            self._get_boss_health(game_frame))

        if self.dqn_movement.frame_stack is None:
            pipeline_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64").frames[0]

            self.dqn_movement.build_frame_stack(pipeline_game_frame.frame)
            self.dqn_projectile.frame_stack = self.dqn_movement.frame_stack
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE",
                dtype="float64")

            if self.dqn_movement.mode == "TRAIN":
                reward_movement, reward_projectile = self._calculate_reward()

                self.game_state["run_reward_movement"] += reward_movement
                self.game_state["run_reward_projectile"] += reward_projectile

                self.dqn_movement.append_to_replay_memory(
                    game_frame_buffer,
                    reward_movement,
                    terminal=self.game_state["health"] == 0)

                self.dqn_projectile.append_to_replay_memory(
                    game_frame_buffer,
                    reward_projectile,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_movement.current_step % 2000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/binding_of_isaac_movement")

                    self.dqn_projectile.save_model_weights(
                        file_path_prefix=f"datasets/binding_of_isaac_projectile"
                    )

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_movement.current_step % 20000 == 0:
                    self.dqn_movement.save_model_weights(
                        file_path_prefix=f"datasets/binding_of_isaac_movement",
                        is_checkpoint=True)

                    self.dqn_projectile.save_model_weights(
                        file_path_prefix=
                        f"datasets/binding_of_isaac_projectile",
                        is_checkpoint=True)
            elif self.dqn_movement.mode == "RUN":
                self.dqn_movement.update_frame_stack(game_frame_buffer)
                self.dqn_projectile.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
            )
            print("")

            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_movement.output_step_data()

            print("")
            print("PROJECTILE NEURAL NETWORK:\n")
            self.dqn_projectile.output_step_data()

            print("")
            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_movement'] + self.game_state['run_reward_projectile'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT BOSS HEALTH: {self.game_state['boss_health'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'}, Boss HP {self.game_state['record_time_alive'].get('boss_hp')})"
            )
            print(
                f"RECORD BOSS HP: {self.game_state['record_boss_hp'].get('value')} (Run {self.game_state['record_boss_hp'].get('run')}, {'Predicted' if self.game_state['record_boss_hp'].get('predicted') else 'Training'}, Time Alive {self.game_state['record_boss_hp'].get('time_alive')} seconds)"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )
            print(
                f"RANDOM AVERAGE BOSS HP: {self.game_state['random_boss_hp']}")

            is_boss_dead = self._is_boss_dead(
                self.game_frame_buffer.previous_game_frame)

            if self.game_state["health"][1] <= 0 or is_boss_dead:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_movement.mode == "RUN",
                            "boss_hp": self.game_state["boss_health"][0]
                        }

                    if self.game_state["boss_health"][0] < self.game_state[
                            "record_boss_hp"].get("value", 1000):
                        self.game_state["record_boss_hp"] = {
                            "value": self.game_state["boss_health"][0],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_movement.mode == "RUN",
                            "time_alive": self.game_state["last_run_duration"]
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_boss_hps"].append(
                        self.game_state["boss_health"][0])

                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])
                    self.game_state["random_boss_hp"] = np.mean(
                        self.game_state["random_boss_hps"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.handle_keys([])
                self.input_controller.tap_key(KeyboardKey.KEY_R, duration=1.5)

                if self.dqn_movement.mode == "TRAIN":
                    for i in range(16):
                        serpent.utilities.clear_terminal()
                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        self.dqn_movement.train_on_mini_batch()
                        self.dqn_projectile.train_on_mini_batch()

                self.game_state["boss_skull_image"] = None

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_movement"] = 0
                self.game_state["run_reward_projectile"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      6),
                                                              maxlen=8)
                self.game_state["boss_health"] = collections.deque(np.full(
                    (8, ), 654),
                                                                   maxlen=8)

                if self.dqn_movement.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        self.dqn_movement.update_target_model()
                        self.dqn_projectile.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_movement.enter_run_mode()
                        self.dqn_projectile.enter_run_mode()
                    else:
                        self.dqn_movement.enter_train_mode()
                        self.dqn_projectile.enter_train_mode()

                self._goto_boss()

                return None

        self.dqn_movement.pick_action()
        self.dqn_movement.generate_action()

        self.dqn_projectile.pick_action(
            action_type=self.dqn_movement.current_action_type)
        self.dqn_projectile.generate_action()

        movement_keys = self.dqn_movement.get_input_values()
        projectile_keys = self.dqn_projectile.get_input_values()

        print("")
        print(" + ".join(
            list(
                map(lambda k: self.key_mapping.get(k.name),
                    movement_keys + projectile_keys))))

        self.input_controller.handle_keys(movement_keys + projectile_keys)

        if self.dqn_movement.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_movement.erode_epsilon(factor=2)
        self.dqn_projectile.erode_epsilon(factor=2)

        self.dqn_movement.next_step()
        self.dqn_projectile.next_step()

        self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 6), maxlen=8),
            "boss_health": collections.deque(np.full((8, ), 654), maxlen=8),
            "boss_skull_image": None,
            "current_run": 1,
            "current_run_steps": 0,
            "run_reward_movement": 0,
            "run_reward_projectile": 0,
            "run_future_rewards": 0,
            "run_predicted_actions": 0,
            "run_timestamp": datetime.utcnow(),
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "record_boss_hp": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "random_boss_hp": None,
            "random_boss_hps": list()
        }

    def _goto_boss(self):
        self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
        time.sleep(1)
        self.input_controller.tap_key(KeyboardKey.KEY_GRAVE)
        time.sleep(0.5)

        self.input_controller.tap_keys(
            [KeyboardKey.KEY_LEFT_CTRL, KeyboardKey.KEY_V])

        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(0.5)
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(0.5)
        self.input_controller.tap_key(KeyboardKey.KEY_ENTER)
        time.sleep(0.2)

    def _get_boss_health(self, game_frame):
        gray_boss_health_bar = serpent.cv.extract_region_from_image(
            game_frame.grayscale_frame,
            self.game.screen_regions["HUD_BOSS_HP"])

        try:
            threshold = skimage.filters.threshold_otsu(gray_boss_health_bar)
        except ValueError:
            threshold = 1

        bw_boss_health_bar = gray_boss_health_bar > threshold

        return bw_boss_health_bar[bw_boss_health_bar > 0].size

    def _is_boss_dead(self, game_frame):
        gray_boss_skull = serpent.cv.extract_region_from_image(
            game_frame.grayscale_frame,
            self.game.screen_regions["HUD_BOSS_SKULL"])

        if self.game_state["boss_skull_image"] is None:
            self.game_state["boss_skull_image"] = gray_boss_skull

        is_dead = False

        if skimage.measure.compare_ssim(
                gray_boss_skull, self.game_state["boss_skull_image"]) < 0.5:
            is_dead = True

        self.game_state["boss_skull_image"] = gray_boss_skull

        return is_dead

    def _calculate_reward(self):
        reward_movement = 0
        reward_projectile = 0

        reward_movement += (-1 if self.game_state["health"][0] <
                            self.game_state["health"][1] else 0.05)
        reward_projectile += (1 if self.game_state["boss_health"][0] <
                              self.game_state["boss_health"][3] else -0.05)

        return reward_movement, reward_projectile

示例#9

显示文件

文件： serpent_VVVVVV_game_agent.py 项目： ddugovic/SerpentVVVVVVGameAgentPlugin

class SerpentVVVVVVGameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play

        self.frame_handler_setups["PLAY"] = self.setup_play

        self.game_state = None
        self._reset_game_state()

    def setup_play(self):
        input_mapping = {
            #"UP": [KeyboardKey.KEY_UP],
            "LEFT": [KeyboardKey.KEY_LEFT],
            #"DOWN": [KeyboardKey.KEY_DOWN],
            "RIGHT": [KeyboardKey.KEY_RIGHT]
        }

        self.key_mapping = {
            #KeyboardKey.KEY_UP.name: "UP",
            KeyboardKey.KEY_LEFT.name: "LEFT",
            #KeyboardKey.KEY_DOWN.name: "DOWN",
            KeyboardKey.KEY_RIGHT.name: "RIGHT"
        }

        direction_action_space = KeyboardMouseActionSpace(
            direction_keys=[None, "LEFT", "RIGHT"])

        direction_model_file_path = "datasets/vvvvvv_direction_dqn_0_1_.h5".replace(
            "/", os.sep)

        self.dqn_direction = DDQN(
            model_file_path=direction_model_file_path
            if os.path.isfile(direction_model_file_path) else None,
            input_shape=(480, 640, 4),
            input_mapping=input_mapping,
            action_space=direction_action_space,
            replay_memory_size=5000,
            max_steps=1000000,
            observe_steps=600,
            batch_size=32,
            model_learning_rate=1e-4,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=False)

    def handle_play(self, game_frame):
        gc.disable()

        for i, game_frame in enumerate(self.game_frame_buffer.frames):
            self.visual_debugger.store_image_data(game_frame.frame,
                                                  game_frame.frame.shape,
                                                  str(i))

        if self.dqn_direction.first_run:
            #self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
            #time.sleep(5)

            self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

            self.dqn_direction.first_run = False

            time.sleep(5)

            return None

        actor_hp = self._measure_actor_hp(game_frame)
        run_score = self._measure_run_score(game_frame)

        self.game_state["health"].appendleft(actor_hp)
        self.game_state["score"].appendleft(run_score)

        if self.dqn_direction.frame_stack is None:
            full_game_frame = FrameGrabber.get_frames(
                [0],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE").frames[0]

            self.dqn_direction.build_frame_stack(full_game_frame.frame)
        else:
            game_frame_buffer = FrameGrabber.get_frames(
                [0, 4, 8, 12],
                frame_shape=(self.game.frame_height, self.game.frame_width),
                frame_type="PIPELINE")

            if self.dqn_direction.mode == "TRAIN":
                reward_direction, reward_action = self._calculate_reward()

                self.game_state["run_reward_direction"] += reward_direction
                self.game_state["run_reward_action"] += reward_action

                self.dqn_direction.append_to_replay_memory(
                    game_frame_buffer,
                    reward_direction,
                    terminal=self.game_state["health"] == 0)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_direction.current_step % 2000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/vvvvvv_direction")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_direction.current_step % 20000 == 0:
                    self.dqn_direction.save_model_weights(
                        file_path_prefix=f"datasets/vvvvvv_direction",
                        is_checkpoint=True)

            elif self.dqn_direction.mode == "RUN":
                self.dqn_direction.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at

            serpent.utilities.clear_terminal()

            print(
                f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
            )
            print(
                "GAME: VVVVVV                 PLATFORM: Steam                AGENT: DDQN + Prioritized Experience Replay"
            )
            print("")

            self.dqn_direction.output_step_data()

            print(f"CURRENT RUN: {self.game_state['current_run']}")
            print(
                f"CURRENT RUN REWARD: {round(self.game_state['run_reward_direction'] + self.game_state['run_reward_action'], 2)}"
            )
            print(
                f"CURRENT RUN PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(f"CURRENT HEALTH: {self.game_state['health'][0]}")
            print(f"CURRENT SCORE: {self.game_state['score'][0]}")
            print("")
            print(
                f"LAST RUN DURATION: {self.game_state['last_run_duration']} seconds"
            )

            print("")
            print(
                f"RECORD TIME ALIVE: {self.game_state['record_time_alive'].get('value')} seconds (Run {self.game_state['record_time_alive'].get('run')}, {'Predicted' if self.game_state['record_time_alive'].get('predicted') else 'Training'})"
            )
            print("")

            print(
                f"RANDOM AVERAGE TIME ALIVE: {self.game_state['random_time_alive']} seconds"
            )

            if self.game_state["health"][1] <= 0:
                serpent.utilities.clear_terminal()
                timestamp = datetime.utcnow()

                gc.enable()
                gc.collect()
                gc.disable()

                timestamp_delta = timestamp - self.game_state["run_timestamp"]
                self.game_state["last_run_duration"] = timestamp_delta.seconds

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    # Check for Records
                    if self.game_state["last_run_duration"] > self.game_state[
                            "record_time_alive"].get("value", 0):
                        self.game_state["record_time_alive"] = {
                            "value": self.game_state["last_run_duration"],
                            "run": self.game_state["current_run"],
                            "predicted": self.dqn_direction.mode == "RUN"
                        }
                else:
                    self.game_state["random_time_alives"].append(
                        self.game_state["last_run_duration"])
                    self.game_state["random_time_alive"] = np.mean(
                        self.game_state["random_time_alives"])

                self.game_state["current_run_steps"] = 0

                self.input_controller.handle_keys([])

                if self.dqn_direction.mode == "TRAIN":
                    for i in range(8):
                        run_time = datetime.now() - self.started_at
                        serpent.utilities.clear_terminal()
                        print(
                            f"SESSION RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} seconds"
                        )
                        print(
                            "GAME: VVVVVV                 PLATFORM: Steam                AGENT: DDQN + Prioritized Experience Replay"
                        )
                        print("")

                        print(f"TRAINING ON MINI-BATCHES: {i + 1}/2")
                        print(
                            f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 20 == 0 else ''}"
                        )

                        self.dqn_direction.train_on_mini_batch()

                self.game_state["run_timestamp"] = datetime.utcnow()
                self.game_state["current_run"] += 1
                self.game_state["run_reward_direction"] = 0
                self.game_state["run_reward_action"] = 0
                self.game_state["run_predicted_actions"] = 0
                self.game_state["health"] = collections.deque(np.full((8, ),
                                                                      3),
                                                              maxlen=8)
                self.game_state["score"] = collections.deque(np.full((8, ), 0),
                                                             maxlen=8)

                if self.dqn_direction.mode in ["TRAIN", "RUN"]:
                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 100 == 0:
                        if self.dqn_direction.type == "DDQN":
                            self.dqn_direction.update_target_model()

                    if self.game_state["current_run"] > 0 and self.game_state[
                            "current_run"] % 20 == 0:
                        self.dqn_direction.enter_run_mode()
                    else:
                        self.dqn_direction.enter_train_mode()

                #self.input_controller.tap_key(KeyboardKey.KEY_SPACE)
                #time.sleep(3)

                self.input_controller.tap_key(KeyboardKey.KEY_SPACE)

                return None

        self.dqn_direction.pick_action()
        self.dqn_direction.generate_action()

        keys = self.dqn_direction.get_input_values()
        print("")

        print("PRESSING: ", end='')
        print(" + ".join(
            list(map(lambda k: self.key_mapping.get(k.name), keys))))

        self.input_controller.handle_keys(keys)

        if self.dqn_direction.current_action_type == "PREDICTED":
            self.game_state["run_predicted_actions"] += 1

        self.dqn_direction.erode_epsilon(factor=2)

        self.dqn_direction.next_step()

        self.game_state["current_run_steps"] += 1

    def _reset_game_state(self):
        self.game_state = {
            "health": collections.deque(np.full((8, ), 3), maxlen=8),
            "score": collections.deque(np.full((8, ), 0), maxlen=8),
            "run_reward_direction": 0,
            "run_reward_action": 0,
            "current_run": 1,
            "current_run_steps": 0,
            "current_run_health": 0,
            "current_run_score": 0,
            "run_predicted_actions": 0,
            "last_run_duration": 0,
            "record_time_alive": dict(),
            "random_time_alive": None,
            "random_time_alives": list(),
            "run_timestamp": datetime.utcnow(),
        }

    def _measure_actor_hp(self, game_frame):
        hp_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["HP_AREA"])
        hp_area_image = Image.fromarray(hp_area_frame)

        actor_hp = 0

        image_colors = hp_area_image.getcolors(
        )  # TODO: remove in favor of sprite detection and location
        if image_colors:
            actor_hp = len(image_colors) - 7

        for name, sprite in self.game.sprites.items():
            query_sprite = Sprite("QUERY", image_data=sprite.image_data)
            sprite_name = self.sprite_identifier.identify(
                query_sprite, mode="CONSTELLATION_OF_PIXELS"
            )  # Will be "UNKNOWN" if no match
            print(sprite_name)
            sprite_to_locate = Sprite("QUERY", image_data=sprite.image_data)

            sprite_locator = SpriteLocator()
            location = sprite_locator.locate(sprite=sprite_to_locate,
                                             game_frame=game_frame)
            print(location)
            if location:
                actor_hp = 1000000

        return actor_hp

    def _measure_run_score(self, game_frame):
        score_area_frame = serpent.cv.extract_region_from_image(
            game_frame.frame, self.game.screen_regions["SCORE_AREA"])

        score_grayscale = np.array(skimage.color.rgb2gray(score_area_frame) *
                                   255,
                                   dtype="uint8")
        score_image = Image.fromarray(score_grayscale)

        score = '0'

        image_colors = score_image.getcolors()
        if image_colors and len(image_colors) > 1:
            score = serpent.ocr.perform_ocr(image=score_grayscale,
                                            scale=10,
                                            order=5,
                                            horizontal_closing=10,
                                            vertical_closing=5)
            score = score.split(":")[0]

        count = 0

        if not score.isdigit():
            score = '0'

        self.game_state["current_run_score"] = score

        return score

    def _calculate_reward(self):
        reward = 0

        reward += self.game_state["health"][0] / 10.0
        reward += (0.5 if (int(self.game_state["score"][0]) -
                           int(self.game_state["score"][1])) >= 0 else -0.25)

        return reward, reward

示例#10

显示文件

文件： serpent_Pika2_game_agent.py 项目： pionxzh/AI-PikaBall

class SerpentPika2GameAgent(GameAgent):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.frame_handlers["PLAY"] = self.handle_play
        self.frame_handler_setups["PLAY"] = self.setup_play
        self.previous_game_frame = None

        self.lowerY = np.array([255, 255, 0], np.uint8)
        self.upperY = np.array([255, 255, 10], np.uint8)
        self.lowerR = np.array([255, 0, 0], np.uint8)
        self.upperR = np.array([255, 0, 10], np.uint8)

        self.game_state = None
        self._reset_game_state()

    def setup_key(self):
        self.input_mapping = {
            "JUMP": [KeyboardKey.KEY_UP],
            "RIGHT": [KeyboardKey.KEY_RIGHT],
            "LEFT": [KeyboardKey.KEY_LEFT],
            "LEFT JUMP": [KeyboardKey.KEY_LEFT, KeyboardKey.KEY_UP],
            "RIGHT JUMP": [KeyboardKey.KEY_RIGHT, KeyboardKey.KEY_UP],
            "HIT": [KeyboardKey.KEY_RETURN],
            "None": []
        }

        self.key_mapping = {
            KeyboardKey.KEY_UP: "UP",
            KeyboardKey.KEY_RIGHT: "RIGHT",
            KeyboardKey.KEY_DOWN: "DOWN",
            KeyboardKey.KEY_LEFT: "LEFT",
            KeyboardKey.KEY_RETURN: "HIT"
        }
        self.action_space = KeyboardMouseActionSpace(action=['None', 'HIT'])
        self.move_action_space = KeyboardMouseActionSpace(
            action=['None', 'JUMP', 'RIGHT', 'LEFT'])
        '''
        move_inputs = {
            "JUMP": [KeyboardKey.KEY_UP],
            "RIGHT": [KeyboardKey.KEY_RIGHT],
            "LEFT": [KeyboardKey.KEY_LEFT],
            "NO_MOVE": []
        }
        attack_inputs  = {
            "Power Hit": [KeyboardKey.KEY_RETURN],
            "NO_HIT": []
        }
        self.game_inputs = dict()
        for move_label, attack_label in itertools.product(move_inputs, attack_inputs):
            label = f"{move_label.ljust(10)}{attack_label}"
            self.game_inputs[label] = move_inputs[move_label] + attack_inputs[attack_label]
        print(self.game_inputs)
        '''

    def setup_play(self):
        #self.cid = 0
        self.trainID = 0
        self.setup_key()
        self.frame_process = False
        self.rewards = list()
        self.started_at = datetime.now()
        self.started_at_str = self.started_at.isoformat()

        self.save_point_path = 'score.npy'
        if os.path.isfile(self.save_point_path):
            self.score_record = np.load(self.save_point_path)
        else:
            self.score_record = np.zeros(shape=(0, ))

        self.collision_count_path = 'collision.npy'
        self.reward_sum = 0
        if os.path.isfile(self.collision_count_path):
            self.collision_counter = np.load(self.collision_count_path)
        else:
            self.collision_counter = np.zeros(shape=(0, ))

        latest_epsilon = 1
        action_model_path = 'dqn_action_0_1_.h5'
        model_list = os.listdir('model/action')
        for item in model_list:
            for epsilon in re.findall("\d+\.\d+", item):
                if latest_epsilon > float(epsilon):
                    latest_epsilon = float(epsilon)
                    action_model_path = item

        action_model_path = f'model/action/{action_model_path}'.replace(
            '/', os.sep)

        self.dqn_action = DDQN(
            model_file_path=action_model_path
            if os.path.isfile(action_model_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=self.input_mapping,
            action_space=self.action_space,
            replay_memory_size=5000,
            max_steps=2000000,
            observe_steps=100 if os.path.isfile(action_model_path) else 2000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=True)

        latest_epsilon = 1
        move_model_path = 'dqn_move_0_1_.h5'
        model_list = os.listdir('model/move')
        for item in model_list:
            for epsilon in re.findall("\d+\.\d+", item):
                if latest_epsilon > float(epsilon):
                    latest_epsilon = float(epsilon)
                    move_model_path = item

        move_model_path = f'model/move/{move_model_path}'.replace('/', os.sep)

        self.dqn_move = DDQN(
            model_file_path=move_model_path
            if os.path.isfile(move_model_path) else None,
            input_shape=(100, 100, 4),
            input_mapping=self.input_mapping,
            action_space=self.move_action_space,
            replay_memory_size=5000,
            max_steps=2000000,
            observe_steps=100 if os.path.isfile(move_model_path) else 2000,
            batch_size=32,
            initial_epsilon=1,
            final_epsilon=0.01,
            override_epsilon=True)
        print('Starting Game')
        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)

    def getDifference(self, game_frame, previous_game_frame):
        return game_frame.grayscale_frame - previous_game_frame.grayscale_frame

    def handle_play(self, game_frame):
        # append memory data into game state
        (self.game_state["com_x"], self.game_state["com_y"], self.ai_x,
         self.ai_y, self.ball_x, self.ball_y, self.com_sc, self.ai_sc,
         self.col_size, self.game_state["col_x"],
         self.game_state["col_y"]) = readInfo()
        self.game_state["ai_x"].appendleft(self.ai_x)
        self.game_state["ai_y"].appendleft(self.ai_y)
        self.game_state["ball_x"].appendleft(self.ball_x)
        self.game_state["ball_y"].appendleft(self.ball_y)
        self.game_state["ai_score"].appendleft(self.ai_sc)
        self.game_state["com_score"].appendleft(self.com_sc)
        self.game_state["col_size"].appendleft(self.col_size)

        # judge is-in-game by read pixel value (tricky)
        self.game_frame_img = FrameGrabber.get_frames(
            [0], frame_type="PIPELINE").frames[0].frame
        if self.game_frame_img[91, 49] != 0.3607843137254902:
            self.handle_notInGame()
        else:
            self.game_state["playing"] = True
            self.handle_fight(game_frame)

    def handle_notInGame(self):
        serpent.utilities.clear_terminal()
        print('Currently not in game...please wait..')

        playAnimation(self.game_state["animeIndex"])
        self.game_state["animeIndex"] = self.game_state[
            "animeIndex"] + 1 if self.game_state["animeIndex"] < 3 else 0

        #print(self.game_frame_img[75:97,47:52])

        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
        time.sleep(0.3)

    def handle_fight(self, game_frame):
        gc.disable()
        if self.dqn_action.first_run:
            self.dqn_action.first_run = False
            self.dqn_move.first_run = False
            return

        if self.dqn_action.frame_stack is None:
            game_frame_buffer = FrameGrabber.get_frames(
                [0], frame_type="PIPELINE").frames[0]
            self.dqn_action.build_frame_stack(game_frame_buffer.frame)
            self.dqn_move.frame_stack = self.dqn_action.frame_stack
        else:
            game_frame_buffer = FrameGrabber.get_frames([0, 1, 2, 3],
                                                        frame_type="PIPELINE")

            if self.dqn_action.mode == "TRAIN":
                reward = self._calculate_reward()

                self.game_state["reward"] = reward

                self.dqn_action.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["ai_score"][0] == 15)

                self.dqn_move.append_to_replay_memory(
                    game_frame_buffer,
                    reward,
                    terminal=self.game_state["ai_score"][0] == 15)

                # Every 2000 steps, save latest weights to disk
                if self.dqn_action.current_step % 1000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/action/dqn_action")
                    self.dqn_move.save_model_weights(
                        file_path_prefix=f"model/move/dqn_move")

                # Every 20000 steps, save weights checkpoint to disk
                if self.dqn_action.current_step % 10000 == 0:
                    self.dqn_action.save_model_weights(
                        file_path_prefix=f"model/action/dqn_action",
                        is_checkpoint=True)
                    self.dqn_move.save_model_weights(
                        file_path_prefix=f"model/move/dqn_move",
                        is_checkpoint=True)
            elif self.dqn_action.mode == "RUN":
                self.dqn_action.update_frame_stack(game_frame_buffer)
                self.dqn_move.update_frame_stack(game_frame_buffer)

            run_time = datetime.now() - self.started_at
            serpent.utilities.clear_terminal()
            print('')
            print(Fore.YELLOW)
            print(Style.BRIGHT)
            print(f"STARTED AT:{self.started_at_str}")
            print(
                f"RUN TIME: {run_time.days} days, {run_time.seconds // 3600} hours, {(run_time.seconds // 60) % 60} minutes, {run_time.seconds % 60} s"
            )

            print(Style.RESET_ALL)
            #print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            print("MOVEMENT NEURAL NETWORK:\n")
            self.dqn_move.output_step_data()
            print("")
            print("ACTION NEURAL NETWORK:\n")
            self.dqn_action.output_step_data()
            print(Style.RESET_ALL)
            print(Style.BRIGHT)
            print(f"CURRENT RUN: {self.game_state['current_run'] }")
            print("")
            print(
                f"CURRENT RUN   REWARD: {round(self.game_state['reward'], 4)}")
            print(f"CURRENT AI    SCORE: {self.game_state['ai_score'][0]}")
            print(f"CURRENT ENEMY SCORE: {self.game_state['com_score'][0]}")
            print("")
            print(
                f"PREDICTED ACTIONS: {self.game_state['run_predicted_actions']}"
            )
            print(Style.RESET_ALL)

            self.dqn_action.pick_action()
            #self.dqn_action.pick_action(action_type="PREDICTED")
            self.dqn_action.generate_action()

            self.dqn_move.pick_action(
                action_type=self.dqn_action.current_action_type)
            #self.dqn_move.pick_action(action_type="PREDICTED")
            self.dqn_move.generate_action()

            movement_keys = self.dqn_move.get_input_values(
            ) + self.dqn_action.get_input_values()

            print("")
            print(Fore.GREEN)
            print(Style.BRIGHT)
            #print(movement_keys)
            #print(" + ".join(list(map(lambda k: self.key_mapping.get(k), movement_keys))))
            print(" + ".join(
                list(
                    map(lambda k: self.key_mapping.get(k).ljust(5),
                        movement_keys))))
            print(Style.RESET_ALL)
            print("")
            print(
                f"AI:        ({self.game_state['ai_x'][0]}, {self.game_state['ai_y'][0]})"
            )
            print(
                f"COM:       ({self.game_state['com_x']}, {self.game_state['com_y']})"
            )
            print(
                f"BALL:      ({self.game_state['ball_x'][0]}, {self.game_state['ball_y'][0]})"
            )
            print(
                f"Collision: ({self.game_state['col_x']}, {self.game_state['col_y']}, {self.game_state['col_size'][0]})"
            )
            print(f"Distance:   {self.game_state['distance'][0]}")

            self.input_controller.handle_keys(movement_keys)

            if self.dqn_action.current_action_type == "PREDICTED":
                self.game_state["run_predicted_actions"] += 1

            self.dqn_action.erode_epsilon(factor=2)
            self.dqn_move.erode_epsilon(factor=2)

            self.dqn_action.next_step()
            self.dqn_move.next_step()

            self.game_state["current_run"] += 1

            if self.game_state['ai_score'][0] == 15 or self.game_state[
                    'com_score'][0] == 15:
                # Game over
                self.game_state["ai_score"].appendleft(0)
                self.game_state["com_score"].appendleft(0)
                self.score_record = np.append(self.score_record,
                                              self.game_state['ai_score'][1])
                np.save(self.save_point_path, self.score_record)
                self.handle_fight_end(game_frame)

    def handle_fight_end(self, game_frame):
        self.game_state["playing"] = False
        self.input_controller.handle_keys([])
        self.game_state["current_run"] += 1
        self.handle_fight_training(game_frame)

    def handle_fight_training(self, game_frame):
        #self.input_controller.tap_key(KeyboardKey.KEY_ESCAPE)
        serpent.utilities.clear_terminal()
        gc.enable()
        gc.collect()
        gc.disable()
        print("TRAIN MODE")
        self.input_controller.handle_keys([])

        if self.dqn_action.mode == "TRAIN":
            for i in range(16):
                serpent.utilities.clear_terminal()
                print("")
                print(Fore.GREEN)
                print(Style.BRIGHT)
                print(f"TRAINING ON MINI-BATCHES: {i + 1}/16")
                print(
                    f"NEXT RUN: {self.game_state['current_run'] + 1} {'- AI RUN' if (self.game_state['current_run'] + 1) % 25 == 0 else ''}"
                )
                print(Style.RESET_ALL)

                self.dqn_action.train_on_mini_batch()
                self.dqn_move.train_on_mini_batch()

        self.game_state["run_predicted_actions"] = 0

        if self.dqn_action.mode in ["TRAIN", "RUN"]:
            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 100 == 0:
                self.dqn_action.update_target_model()
                self.dqn_move.update_target_model()

            if self.game_state["current_run"] > 0 and self.game_state[
                    "current_run"] % 20 == 0:
                self.dqn_action.enter_run_mode()
                self.dqn_move.enter_run_mode()
            else:
                self.dqn_action.enter_train_mode()
                self.dqn_move.enter_train_mode()

        self.input_controller.tap_key(KeyboardKey.KEY_RETURN)
        time.sleep(2)

    def _reset_game_state(self):
        self.game_state = {
            "reward": 0,
            "animeIndex": 0,
            "current_run": 1,
            "playing": False,
            "run_predicted_actions": 0,
            "ai_x": collections.deque(np.full((4, ), 0), maxlen=4),
            "ai_y": collections.deque(np.full((4, ), 0), maxlen=4),
            "ai_score": collections.deque(np.full((4, ), 0), maxlen=4),
            "ball_x": collections.deque(np.full((4, ), 0), maxlen=4),
            "ball_y": collections.deque(np.full((4, ), 0), maxlen=4),
            "com_score": collections.deque(np.full((4, ), 0), maxlen=4),
            "col_size": collections.deque(np.full((4, ), 6), maxlen=4),
            "com_x": 36,
            "com_y": 244,
            "col_x": 0,
            "col_y": 0,
            "distance": collections.deque(np.full((20, ), 100), maxlen=20),
        }

    def _calculate_reward(self):
        reward = 0
        distance = math.sqrt(
            abs(self.game_state["ai_x"][0] - self.game_state["ball_x"][0])**2 +
            abs(self.game_state["ai_y"][0] - self.game_state["ball_y"][0])**2)
        self.game_state["distance"].appendleft(int(distance))

        # to make ai move lesser
        #if self.game_state["ai_x"][0] == self.game_state["ai_x"][1]:
        #    reward += 0.1

        # collision with ball
        collision = self.game_state["distance"][0] < 80 and self.game_state[
            "distance"][1] < 80 and self.game_state["distance"][
                2] < 80 and self.game_state["distance"][0] > self.game_state[
                    "distance"][1] and self.game_state["distance"][
                        1] < self.game_state["distance"][2]
        if collision:
            reward += 0.25

        # power hit
        if self.game_state["col_size"][0] > 0 and self.game_state["distance"][
                0] < 90 and self.game_state["col_y"] != 272:
            reward += 0.5

        # AI gain score
        if self.game_state["ai_score"][0] > self.game_state["ai_score"][1]:
            reward += 1
            self.collision_counter = np.append(self.collision_counter,
                                               self.reward_sum)
            np.save(self.collision_count_path, self.collision_counter)
            self.reward_sum = 0

        # Com gain score
        if self.game_state["com_score"][0] > self.game_state["com_score"][1]:
            reward += -1
            self.collision_counter = np.append(self.collision_counter,
                                               self.reward_sum)
            np.save(self.collision_count_path, self.collision_counter)
            self.reward_sum = 0

        if reward > 1:
            reward = 1

        self.game_state["reward"] = reward
        self.reward_sum += reward
        return reward