def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append( Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append( Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append(Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append(Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def test_init(choice): choice.side_effect = [0, # First position 1, # First tile 1, # Second position 2] # Second tile game = Game() choice.assert_has_calls([call(16), call([1, 2], p=[0.9, 0.1]), call(15), call([1, 2], p=[0.9, 0.1])]) # Assert correct number of 0s, 1s and 2s game.print_state() assert (np.bincount(game.state().flatten()) == [14, 1, 1]).all() assert game.score() == 0
def test_do_action(choice): choice.side_effect = [0, # First position 1] # First tile state = np.array([[1, 2, 3, 3], [5, 6, 7, 8], [5, 2, 7, 0], [1, 0, 3, 0]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[1, 0, 0, 0], [1, 2, 3, 0], [6, 6, 8, 3], [1, 2, 3, 8]]) game.print_state() assert (game.state() == new_state).all() # Score is 2 ** 6 + 2 ** 8 assert game.score() == 320
def test_do_action(choice): choice.side_effect = [ 0, # First position 1 ] # First tile state = np.array([[[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]], [[1, 2, 3], [5, 6, 7], [5, 2, 7]]]) game = Game(state=state) game.do_action(3) # DOWN new_state = np.array([[[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]], [[0, 2, 0], [1, 6, 3], [6, 2, 8]]]) game.print_state() assert (game.state() == new_state).all() # Score is (2 ** 6 + 2 ** 8)*3 assert game.score() == 960