def _test_swipe(self, before, left, right, up, down): """Helper function. All arguments are strings""" bfs = board_from_string actions = [Action.LEFT, Action.RIGHT, Action.UP, Action.DOWN] for s, a in zip([left, right, up, down], actions): B0 = bfs(before) actual = Board.apply_action_on_board(B0, a) expected = board_from_string(s) err_msg = f"Action {a}: Got {actual.render()}, expected {expected.render()}" assert actual == expected, err_msg
def step(self, action: int): """A single step in the game rewards: the natural logarithm of difference of the 2048 scoring. We also add some penalty in case of no-op """ self._step_counter += 1 done = False if self._step_counter > 100 and self._step_counter / (self.t + 1) > 5: # add 1 to avoid DivisionByZero for `self.t`. Yes, it happenend. done = True action = Action(1 + action) # discrete -> enum (which is 1-indexed) modified_board = Board.apply_action_on_board(self.board, action) info = {"step": self.t} if Board.is_game_over(modified_board): done = True reward = 0 else: # An action is invalid if it doesn't change the board. valid_action = modified_board != self.board if not valid_action: # We penalize the agent for doing no-op moves!!! >:( penalty = -0.1 info["no-op"] = True else: modified_board = Board.spawn_random_tile(modified_board) penalty = 0 self.t += 1 info["no-op"] = False diff = modified_board.score - self.board.score reward = np.log(1 + diff) + penalty reward = np.clip(reward, -11, 10) # TODO: move to a wrapper. self.board = modified_board return self.board, reward, done, info