def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append( Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append( Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def play(strategy, verbose=False, allow_unavailable_action=True): """Plays a single game, using a provided strategy. Args: strategy: A function that takes as argument a state and a list of available actions and returns an action from the list. allow_unavailable_action: Boolean, whether strategy is passed all actions or just the available ones. verbose: If true, prints game states, actions and scores. Returns: score, experiences where score is the final score and experiences is the list Experience instances that represent the collected experience. """ game = Game() state = game.state().copy() game_over = game.game_over() experiences = [] while not game_over: if verbose: print("Score:", game.score()) game.print_state() old_state = state next_action = strategy( old_state, range(4) if allow_unavailable_action else game.available_actions()) if game.is_action_available(next_action): reward = game.do_action(next_action) state = game.state().copy() game_over = game.game_over() if verbose: print("Action:", ACTION_NAMES[next_action]) print("Reward:", reward) experiences.append(Experience(old_state, next_action, reward, state, game_over, False, game.available_actions())) else: experiences.append(Experience(state, next_action, 0, state, False, True, game.available_actions())) if verbose: print("Score:", game.score()) game.print_state() print("Game over.") return game.score(), experiences
def test_available_actions(): state = np.array([[[1, 2, 0], [1, 2, 0], [1, 2, 0]], [[1, 2, 0], [1, 2, 0], [1, 2, 0]], [[1, 2, 0], [1, 2, 0], [1, 2, 0]]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [1, 2, 3, 4, 5]
def test_available_actions_none_available(): state = np.array([[[1, 2, 3], [5, 6, 7], [1, 2, 3]], [[8, 9, 10], [11, 12, 13], [8, 9, 10]], [[14, 15, 16], [17, 18, 19], [14, 15, 16]]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()
def test_available_actions(): state = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 3, 0]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [1, 2, 3]
def test_available_actions_none_available(): state = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, 3, 4], [5, 6, 7, 8]]) game = Game(state=state) actions = game.available_actions() # All actions except left is available assert actions == [] assert game.game_over()