def num_state_action_comb():
    """ Estimate how many state-action combinations there are 
        to evaluate the feasibility of a table-based q-learning approach. """

    empty = np.zeros(NUM_CARD_VALUES, dtype=np.int8)
    total_comb = []

    comb_ap = 1
    for i in range(NUM_CARD_VALUES):
        comb_ap *= (AVAILABLE_CARDS[i] + 1)

    for _ in tqdm.tqdm(range(100000)):
        # Already played can be anything
        ap = np.zeros(NUM_CARD_VALUES, dtype=np.int8)
        for i in range(NUM_CARD_VALUES):
            ap[i] = np.random.randint(0, AVAILABLE_CARDS[i] + 1)

        # Board must be a move that is already included in already played
        comb_b = possible_next_moves(ap, empty).shape[0]

        # Actions from other agents must be within the remaining cards
        remaining = AVAILABLE_CARDS - ap
        hands = random_initial_cards(cards_to_use=remaining)
        comb_actions = 1
        for hand in hands:
            comb_actions *= possible_next_moves(hand, empty).shape[0] \
                if not np.all(hand == 0) else 1

        total_comb.append(comb_ap * comb_b * comb_actions)

    # 68,319,447,356,160,000
    print(max(total_comb))
    #    138,538,758,758,400
    print(min(total_comb))
示例#2
0
    def test_possible_next_moves_empty_board(self):
        """ Tests possible moves on empty board. """

        hand = np.array([0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2])
        board = np.zeros(NUM_CARD_VALUES)
        actions = possible_next_moves(hand, board)
        only_passing = only_passing_possible(hand, board)

        self.assertFalse(only_passing)
        self.assertTrue(
            np.all(actions == np.array(
                [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 2],
                 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2],
                 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2],
                 [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                 [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]])))
示例#3
0
    def do_step(
            # Board state
            self,
            already_played,
            board,
            agents_finished,
            # Possible states before the next move of this agent
            list_next_possible_states=lambda ap, b: ([], []),
            # Other parameters
            always_use_best=False,
            print_luck=False):
        """
            Performs a (partial) step in the game.

            Returns (Player finished, 
                Already played cards, New board, 
                Best decision made randomly)
        """

        # Prepares the step to do
        self.prepare_step()

        # If player has already finished, pass
        if has_finished(self.hand):
            return True, already_played, board, False

        # Possible actions; Pass if no possible play
        possible_actions = possible_next_moves(self.hand, board)
        if len(possible_actions) == 1 and \
                np.all(possible_actions[0] == 0):
            return False, already_played, board, False

        # Decide action to take
        (possible_qvalues, action_index, action_taken,
         random_choice, best_decision_made_randomly) = \
            self.decide_action_to_take(
                already_played, board, always_use_best,
                print_luck, possible_actions)

        # Compute next state
        next_hand = self.hand - action_taken
        next_board = board if np.all(action_taken == 0) else action_taken
        next_already_played = already_played + action_taken

        # Process next state
        self.process_next_board_state(
            already_played, board, list_next_possible_states,
            next_already_played, next_board, next_hand, possible_qvalues,
            action_index, action_taken, random_choice, agents_finished,
            always_use_best)

        # Return next state
        self.hand = next_hand
        return (has_finished(self.hand), next_already_played, next_board,
                best_decision_made_randomly)
示例#4
0
    def test_possible_next_moves_empty_board_lots_of_cards(self):
        """ Tests possible moves on empty board with lots of cards. """

        hand = AVAILABLE_CARDS
        board = np.zeros(NUM_CARD_VALUES)
        actions = possible_next_moves(hand, board)
        only_passing = only_passing_possible(hand, board)

        true_actions = pd.read_csv("./egd/game/tests/test-data/actions.csv",
                                   header=None).values

        self.assertFalse(only_passing)
        self.assertTrue(np.all(actions == true_actions))
示例#5
0
    def test_possible_next_moves_non_empty_board(self):
        """ Tests possible next moves. """

        hand = np.array([0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2])
        board = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0])
        actions = possible_next_moves(hand, board)
        only_passing = only_passing_possible(hand, board)

        self.assertFalse(only_passing)
        self.assertTrue(
            np.all(actions == np.array(
                [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
                 [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])))

        hand = np.array([1, 2, 3, 1, 0, 0, 0, 3, 0, 4, 0, 0, 2])
        board = np.array([0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1])
        actions = possible_next_moves(hand, board)
        only_passing = only_passing_possible(hand, board)

        self.assertFalse(only_passing)
        self.assertTrue(
            np.all(actions == np.array(
                [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2],
                 [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                 [0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 [0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                 [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                 [0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
                 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2],
                 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]])))
示例#6
0
    def test_possible_next_moves_bug_jokers_only(self):
        """ Tests that a historic bug does not occur again. """

        hand = np.array([0, 1, 0, 0, 0, 1, 0, 0, 2, 4, 2, 4, 1])
        board = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
        actions = possible_next_moves(hand, board)
        only_passing = only_passing_possible(hand, board)

        self.assertFalse(only_passing)
        self.assertTrue(
            np.all(actions == np.array([
                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            ])))