def test_game_over_checking_works_as_expected(): not_game_over_strings = ["1 1 1 1", "0 0 0 0", "0 1 0 0", "3 2 1 3 4 5 6 7 8"] for s in not_game_over_strings: board = board_from_string(s) assert not Board.is_game_over(board) game_over_strings = ["1 2 3 4", "1 2 3 4 5 6 7 8 9", "4 3 7 2"] for s in game_over_strings: board = board_from_string(s) assert Board.is_game_over(board)
def step(self, action: int): """A single step in the game rewards: the natural logarithm of difference of the 2048 scoring. We also add some penalty in case of no-op """ self._step_counter += 1 done = False if self._step_counter > 100 and self._step_counter / (self.t + 1) > 5: # add 1 to avoid DivisionByZero for `self.t`. Yes, it happenend. done = True action = Action(1 + action) # discrete -> enum (which is 1-indexed) modified_board = Board.apply_action_on_board(self.board, action) info = {"step": self.t} if Board.is_game_over(modified_board): done = True reward = 0 else: # An action is invalid if it doesn't change the board. valid_action = modified_board != self.board if not valid_action: # We penalize the agent for doing no-op moves!!! >:( penalty = -0.1 info["no-op"] = True else: modified_board = Board.spawn_random_tile(modified_board) penalty = 0 self.t += 1 info["no-op"] = False diff = modified_board.score - self.board.score reward = np.log(1 + diff) + penalty reward = np.clip(reward, -11, 10) # TODO: move to a wrapper. self.board = modified_board return self.board, reward, done, info