示例#1
0
    def _test_swipe(self, before, left, right, up, down):
        """Helper function. All arguments are strings"""
        bfs = board_from_string
        actions = [Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN]
        for s, a in zip([left, right, up, down], actions):
            B0 = bfs(before)
            actual = Board.apply_action_on_board(B0, a)
            expected = board_from_string(s)

            err_msg = f"Action {a}: Got {actual.render()}, expected {expected.render()}"
            assert actual == expected, err_msg
示例#2
0
文件: environment.py 项目: kvalv/2048
    def step(self, action: int):
        """A single step in the game

        rewards: the natural logarithm of difference of the 2048 scoring.
        We also add some penalty in case of no-op
        """
        self._step_counter += 1

        done = False
        if self._step_counter > 100 and self._step_counter / (self.t + 1) > 5:
            # add 1 to avoid DivisionByZero for `self.t`. Yes, it happenend.
            done = True

        action = Action(1 + action)  # discrete -> enum (which is 1-indexed)
        modified_board = Board.apply_action_on_board(self.board, action)
        info = {"step": self.t}

        if Board.is_game_over(modified_board):
            done = True
            reward = 0
        else:

            # An action is invalid if it doesn't change the board.
            valid_action = modified_board != self.board
            if not valid_action:
                # We penalize the agent for doing no-op moves!!! >:(
                penalty = -0.1
                info["no-op"] = True
            else:
                modified_board = Board.spawn_random_tile(modified_board)
                penalty = 0
                self.t += 1
                info["no-op"] = False

            diff = modified_board.score - self.board.score

            reward = np.log(1 + diff) + penalty
            reward = np.clip(reward, -11, 10)  # TODO: move to a wrapper.

        self.board = modified_board
        return self.board, reward, done, info