示例#1
0
    def test_game_end(self):
        game = gm.GameState()
        features = fe.state2feature(game)
        start_player = np.sum(features[0, 15, :])
        player = RandomPolicy()

        # Play the end until the end
        while (not game.is_end_of_game()):
            (card, move) = player.get_action(game)
            game.play_round(card, *move)
        features = fe.state2feature(game)

        # At the end 9 cards should fill up the board
        self.assertTrue((np.sum(features[0, 4:13, :],
                                axis=1) == [1, 1, 1, 1, 1, 1, 1, 1, 1]).all())
        # At the end the winner should own more card
        if game.get_winner() == gm.LEFT_PLAYER:
            self.assertTrue(np.sum(features[0, 14, :]) > gm.START_HANDS)
        elif game.get_winner() == gm.RIGHT_PLAYER:
            self.assertTrue(np.sum(features[0, 14, :]) < gm.START_HANDS)
        else:
            self.assertTrue(np.sum(features[0, 14, :]) == gm.START_HANDS)
        # The current player should either be 1 for all the cards, or 0 for all the cards
        self.assertTrue(
            np.sum(features[0, 15, :]) == features.shape[2] - start_player)
 def test_get_action(self):
     player = NNPolicy()
     input = np.zeros((1, fe.get_feature_dim(player.features), 2 * gm.START_HANDS))
     
     game = gm.GameState()
     while(not game.is_end_of_game()): 
         (card, move) = player.get_action(game)
         self.assertTrue(card.position == (-1, -1) and card.owner == game.current_player)
         self.assertTrue(game.board[Helper.tuple2idx(game.board_size, *move)] is None)
         game.play_round(card, *move)
示例#3
0
 def test_run_single_game(self):
     
     game = gm.GameState()
     (states, cards, moves) = su.simulate_single_game(self.target, game)
     self.assertTrue(np.array(states).shape == (gm.BOARD_SIZE **2, fe.get_feature_dim(), gm.START_HANDS * 2))
     self.assertTrue(np.array(cards).shape == (gm.BOARD_SIZE **2, gm.START_HANDS * 2))
     self.assertTrue(np.array(moves).shape == (gm.BOARD_SIZE **2, gm.BOARD_SIZE **2))
     sum_cards = np.sum(np.array(cards), 0)
     sum_moves = np.sum(np.array(moves), 0)
     self.assertTrue( np.all( sum_cards <= 1) )
     self.assertTrue( np.all( sum_moves == 1) )
def simulate_games(player, opponent, metadata):
    """
    Args:
        player: a policy for the player side
        opponent: another policy for the opponent side
        metadata: a dictionary which contains the meta data for this training process
        
    Returns:
        states: a list with n elements, where n is the number of games (in each batch) specified by game_batch in metadata. Each element is another list
                with m elements, where m is the moves made in this game by the player (we only train based on the player actions, not the opponent). Each 
                element in this list is the game feature (basically a ndarray with size 1xDIMx10. DIM is the dimension of all selected features for each card)
        card_actions: Similar to the states, a list of list for each game, and the element in the inner list is a one-hot vector representing the action 
                for picking a card(a 1xn ndarray where n=2*HAND_SIZE. The number one in this array represents the card to pick)
        move_actions: Similar to the states, a list of list for each game, and the element in the inner list is a one-hot vector representing the action 
                for picking a move(a 1xn ndarray where n=BOARD_SIZE**2. The number one in this array represents which grid on the board index to place the card picked)
        rewards: Similar to the actions, a list of list for each game, and the element in the inner list is a number, either 1 or 0 which represent win 
                or lose for the whole game. 
        
    """
    
    states = [[] for _ in range(metadata["game_batch"])] # Feature from the game state, i.e. by default feature a 16 x 10 array
    card_actions = [[] for _ in range(metadata["game_batch"])] # Card is the one-hot vector for the 10 cards from left to right 
    move_actions = [[] for _ in range(metadata["game_batch"])] # Move is the one-hot vector for the 9 possible moves
    rewards = [0 for _ in range(metadata["game_batch"])] # Either player has won (1), tied (0), or lost (-1)
    
    # Learner is always the left player, and the opponent picked from the pool is always the right player
    # Game will start randomly by left or right player by a 50/50
    card_pool = gm.GameState.load_cards_from_file(metadata["card_path"], metadata["card_file"])
    
    for i in range(metadata["game_batch"]):
        default_cards = random.sample(card_pool, gm.START_HANDS)
        left_cards = [card.clone() for card in default_cards]
        right_cards = [card.clone() for card in default_cards]
            
        new_game = gm.GameState(left_cards = left_cards, right_cards = right_cards)
        
        while(not new_game.is_end_of_game()):
            if new_game.current_player == gm.LEFT_PLAYER:
                # Record all the moves made by the learner
                (card, move) = player.get_action(new_game)
                states[i].append(fe.state2feature(new_game))
                (card_vector, move_vector) = player.action_to_vector(new_game, card, move)
                card_actions[i].append(np.expand_dims(card_vector, axis=0))
                move_actions[i].append(np.expand_dims(move_vector, axis=0))
            else:
                (card, move) = opponent.get_action(new_game)
            new_game.play_round(card, *move)
        
        rewards[i] = new_game.get_winner() # treat the loss and tie as the same since we only want to win
        
    return (states, card_actions, move_actions, rewards)
def state_action_generator(target_policy, metadata):
    """
    Args:
        target_policy: a policy for the NNPolicy to learn to. We use the manually crafted policy here.
        metadata: a dictionary which contains the meta data for this training process
        
    Yields:
        states: a nparray with shape (n, dim, 10). Here n is batch_size*9 (total steps for each game is 9). dim is the dimension of all selected features for each card,
                and 10 is for each card in both hands.
        cards: a nparray with shape (n, 10). Here n is batch_size*9 (total steps for each game is 9). The second dimension is a one-hot vector specifying which card to pick.
        moves: a nparray with shape (n, 9). Here n is batch_size*9 (total steps for each game is 9). The second dimension is a one-hot vector specifying which position on the 
                board to play the card.
    """

    left_card_file = gm.GameState.load_cards_from_file(metadata["card_path"],
                                                       metadata["card_file"])
    right_card_file = gm.GameState.load_cards_from_file(
        metadata["card_path"], metadata["card_file"])

    while True:
        all_states = []
        all_cards = []
        all_moves = []
        for idx in range(metadata["batch_size"]):
            left_cards = random.sample(left_card_file, gm.START_HANDS)
            right_cards = random.sample(right_card_file, gm.START_HANDS)
            new_game = gm.GameState(left_cards=left_cards,
                                    right_cards=right_cards)

            (states, cards,
             moves) = simulate_single_game(target_policy, new_game)
            all_states.append(states)
            all_cards.append(cards)
            all_moves.append(moves)
        np_states = np.array(
            all_states
        )  # the shape should be steps_per_game x batch_size x feature_dim x 10. Would need to reshape to merge the first two dims
        np_cards = np.array(
            all_cards
        )  # the shape should be steps_per_game x batch_size x 19. Would need to reshape to merge the first two dims
        np_moves = np.array(
            all_moves
        )  # the shape should be steps_per_game x batch_size x 19. Would need to reshape to merge the first two dims
        yield (np_states.reshape((-1,) + np_states.shape[2:]), \
            {"card_output": np.array(np_cards.reshape((-1,) + np_cards.shape[2:])), \
             "move_output": np.array(np_moves.reshape((-1,) + np_moves.shape[2:]))})
        del all_states
        del all_cards
        del all_moves
 def test_action_to_vector(self):
     player = BaselinePolicy()
     cards = []
     moves = []
     game = gm.GameState()
     while(not game.is_end_of_game()): 
         (card, move) = player.get_action(game)
         (card_vector, move_vector) = player.action_to_vector(game, card, move)
         self.assertTrue( sum(card_vector) == 1 and sum(move_vector) == 1)
         cards.append(card_vector)
         moves.append(move_vector)
         game.play_round(card, *move)
     sum_cards = np.sum(np.array(cards), 0)
     sum_moves = np.sum(np.array(moves), 0)
     self.assertTrue( np.all( sum_cards <= 1) )
     self.assertTrue( np.all( sum_moves == 1) )
示例#7
0
    def test_game_start(self):
        game = gm.GameState()
        self.assertTrue(fe.get_feature_dim() == 16)
        features = fe.state2feature(game)
        self.assertTrue(features.shape[0] == 1)
        self.assertTrue(features.shape[1] == 16)
        self.assertTrue(features.shape[2] == 2 * gm.START_HANDS)

        # At the beginning all the cards are in the hands
        self.assertTrue((np.sum(features[0, 4:14, :],
                                axis=1) == [0, 0, 0, 0, 0, 0, 0, 0, 0,
                                            10]).all())
        # At the beginning all the cards are equally owner by both players
        self.assertTrue(np.sum(features[0, 14, :]) == gm.START_HANDS)
        # The current player should either be 1 for all the cards, or 0 for all the cards
        start_player = np.sum(features[0, 15, :])
        self.assertTrue(start_player == features.shape[2] or start_player == 0)
def compare_policy(player,
                   opponent,
                   num_games,
                   card_file_path="test_cards",
                   card_file_name="cards.csv"):
    default_left_cards = gm.GameState.load_cards_from_file(
        card_file_path, card_file_name)
    default_right_cards = gm.GameState.load_cards_from_file(
        card_file_path, card_file_name)

    winner = []
    for i in range(num_games):
        left_cards = random.sample(default_left_cards, 5)
        right_cards = random.sample(default_right_cards, 5)

        for card in left_cards + right_cards:
            card.reset()

        game = gm.GameState(left_cards=left_cards, right_cards=right_cards)
        while not game.is_end_of_game():
            # Player is always on the left, and the opponent is always on the right. Randomly picks who starts the game.
            if game.current_player == gm.LEFT_PLAYER:
                (card, move) = player.get_action(game)
            else:
                (card, move) = opponent.get_action(game)
            game.play_round(card, *move)

        winner.append(game.get_winner())
        """
        if i%10 == 0 and i > 0:
            won_games = sum(1 for _ in filter(lambda x: x == gm.LEFT_PLAYER, winner))
            tie_games = sum(1 for _ in filter(lambda x: x== gm.NO_ONE, winner))
            lost_games = sum(1 for _ in filter(lambda x: x== gm.RIGHT_PLAYER, winner))
            print("This is the {}th game, current win rate: {}, tie rate: {}, lose rate: {}".format(i, round(won_games / i, 2), \
                                                                          round(tie_games / i, 2), round(lost_games / i, 2)), end='\r')
        """

    won_games = sum(1 for _ in filter(lambda x: x == gm.LEFT_PLAYER, winner))
    tie_games = sum(1 for _ in filter(lambda x: x == gm.NO_ONE, winner))
    lost_games = sum(1 for _ in filter(lambda x: x == gm.RIGHT_PLAYER, winner))
    print(
        "Evaluation done. Player won {} games, tied {} games, and lost {} games"
        .format(won_games, tie_games, lost_games))
    return round(won_games / num_games, 2)