示例#1
0
def test_game_over_checking_works_as_expected():
    not_game_over_strings = ["1 1 1 1", "0 0 0 0", "0 1 0 0", "3 2 1 3 4 5 6 7 8"]
    for s in not_game_over_strings:
        board = board_from_string(s)
        assert not Board.is_game_over(board)

    game_over_strings = ["1 2 3 4", "1 2 3 4 5 6 7 8 9", "4 3 7 2"]
    for s in game_over_strings:
        board = board_from_string(s)
        assert Board.is_game_over(board)
示例#2
0
文件: environment.py 项目: kvalv/2048
    def step(self, action: int):
        """A single step in the game

        rewards: the natural logarithm of difference of the 2048 scoring.
        We also add some penalty in case of no-op
        """
        self._step_counter += 1

        done = False
        if self._step_counter > 100 and self._step_counter / (self.t + 1) > 5:
            # add 1 to avoid DivisionByZero for `self.t`. Yes, it happenend.
            done = True

        action = Action(1 + action)  # discrete -> enum (which is 1-indexed)
        modified_board = Board.apply_action_on_board(self.board, action)
        info = {"step": self.t}

        if Board.is_game_over(modified_board):
            done = True
            reward = 0
        else:

            # An action is invalid if it doesn't change the board.
            valid_action = modified_board != self.board
            if not valid_action:
                # We penalize the agent for doing no-op moves!!! >:(
                penalty = -0.1
                info["no-op"] = True
            else:
                modified_board = Board.spawn_random_tile(modified_board)
                penalty = 0
                self.t += 1
                info["no-op"] = False

            diff = modified_board.score - self.board.score

            reward = np.log(1 + diff) + penalty
            reward = np.clip(reward, -11, 10)  # TODO: move to a wrapper.

        self.board = modified_board
        return self.board, reward, done, info