Exemplo n.º 1
0
        pot_array_stage3 = convert_pot_to_numpy(total_pot_size)
        state_array_player1_stage3 = np.stack(
            (flop1_array, flop2_array, flop3_array, turn_array, river_array,
             Card6_array_player1, Card7_array_player1, all_card_array_player1,
             pot_array_stage3))

        total_pot_size += pot_size
        player1_bet += player1_new_bet
        player2_bet += player2_new_bet

        if winner == "Player 1":
            print("Player 1 Wins!!", total_pot_size)
            print("Player 1 Gain!!", int(total_pot_size - player1_bet))

        elif winner == "Player 2":
            print("Player 2 Wins!!", total_pot_size)
            print("Player 2 Gain!!", int(total_pot_size - player2_bet))

        else:
            final_score_player1 = evaluator.get_rank_class(
                evaluator._seven(player1_complete_hand))
            final_score_player2 = evaluator.get_rank_class(
                evaluator._seven(player2_complete_hand))
            if final_score_player1 < final_score_player2:
                print("Player 1 Wins!!", total_pot_size)
                print("Player 1 Gain!!", int(total_pot_size - player1_bet))

            else:
                print("Player 2 Wins!!", total_pot_size)
                print("Player 2 Gain!!", int(total_pot_size - player2_bet))
Exemplo n.º 2
0
def run(rounds):
    reward_table = dict()
    reward_count = dict()
    round_number = 0
    print("running heuristic:")
    print_progress(round_number,
                   rounds,
                   prefix='Progress:',
                   suffix='Complete',
                   bar_length=40)
    while round_number < rounds:
        deck = Deck()
        Player_1 = Player()
        Player_2 = Player()
        flop = deck.draw(3)
        #print("Flop Cards")
        # for cards in flop:
        #print(Card.int_to_pretty_str(cards))
        player1_hand = deck.draw(2)
        #print("Player 1 Cards")
        # for cards in player1_hand:
        #print(Card.int_to_pretty_str(cards))
        player2_hand = deck.draw(2)
        #print("Player 2 Cards")
        # for cards in player2_hand:
        #print(Card.int_to_pretty_str(cards))
        evaluator = Evaluator()
        all_card_array_stage3 = np.zeros((4, 13))
        all_card_array_stage1 = np.zeros((4, 13))
        all_card_array_stage2 = np.zeros((4, 13))

        def convert_pot_to_numpy(total_pot):
            pot_array = np.zeros((4, 13))
            number_of_chips = int(total_pot / 25)
            if number_of_chips > 13:
                pot_array[1] = 1
                left_over_chips = number_of_chips - 13
                for i in range(0, left_over_chips):
                    pot_array[2][i] = 1
            else:
                for i in range(0, number_of_chips):
                    pot_array[1][i] = 1
            return pot_array

        def convert_to_numpy_array(Card_str, all_card_array):
            if Card_str[0] == "J":
                index_1 = 9
            elif Card_str[0] == "Q":
                index_1 = 10
            elif Card_str[0] == "K":
                index_1 = 11
            elif Card_str[0] == "A":
                index_1 = 12
            elif Card_str[0] == "T":
                index_1 = 8
            else:
                index_1 = int(Card_str[0]) - 2

            if Card_str[1] == "s":
                index_2 = 0
            elif Card_str[1] == "c":
                index_2 = 1
            elif Card_str[1] == "h":
                index_2 = 2
            else:
                index_2 = 3

            new_card_array = np.zeros((4, 13))
            new_card_array[index_2][index_1] = 1
            all_card_array[index_2][index_1] = 1
            return new_card_array, all_card_array

        def get_possible_actions(player_action):
            if player_action == "Check/Call":
                return ["Fold", "Bet"]
            elif player_action == "Bet":
                return ["Fold", "Check/Call"]
            else:
                return ["Fold", "Bet", "Check/Call"]

        def betting(player1_turn_rank, player2_turn_rank,
                    all_possible_actions):
            action_1, player1_new_bet = Player_1.make_bets(
                player1_turn_rank, all_possible_actions, 0)
            #print("Player 1",action_1)
            if action_1 == "Fold":
                return "Player2", 0, 0, 0, action_1
            elif action_1 == "Bet":
                possible_actions = get_possible_actions(action_1)
                action_2, player2_new_bet = Player_2.make_bets(
                    player2_turn_rank, possible_actions, player1_new_bet)
                #print("Player 2", action_2)
                if action_2 == "Fold":
                    return "Player1", player1_new_bet, 0, player1_new_bet, action_1
                else:
                    return "", player1_new_bet, player1_new_bet, 2 * player1_new_bet, action_1
            else:
                action_2, player2_new_bet = Player_2.make_bets(
                    player2_turn_rank, all_possible_actions, 0)
                #print("Player 2", action_2)
                if action_2 == "Fold":
                    return "Player1", 0, 0, 0, action_1
                elif action_2 == "Check/Call":
                    return "", 0, 0, 0, action_1
                else:
                    possible_actions = get_possible_actions(action_2)
                    action_1, player1_new_bet = Player_1.make_bets(
                        player1_turn_rank, possible_actions, player2_new_bet)
                    #print("Player 1", action_1)
                    if action_1 == "Fold":
                        return "Player2", 0, player2_new_bet, player2_new_bet, action_1
                    else:
                        return "", player2_new_bet, player2_new_bet, 2 * player2_new_bet, action_1

        total_pot_size = 0
        player1_bet = 0
        player2_bet = 0

        player1_turn1 = flop + player1_hand
        player2_turn1 = flop + player2_hand

        player1_turn1_rank = evaluator.class_to_string(
            evaluator.get_rank_class(evaluator._five(player1_turn1)))
        player2_turn1_rank = evaluator.class_to_string(
            evaluator.get_rank_class(evaluator._five(player2_turn1)))

        all_possible_action = ["Fold", "Bet", "Check/Call"]

        winner, player1_round1_bet, player2_round1_bet, round1_pot_size, player1_action_stage1 = betting(
            player1_turn1_rank, player2_turn1_rank, all_possible_action)
        player1_bet += player1_round1_bet
        player2_bet += player2_round1_bet
        total_pot_size += round1_pot_size
        Card1_stage1_array, all_card_array_stage1 = convert_to_numpy_array(
            Card.int_to_str(flop[0]), all_card_array_stage1)
        Card2_stage1_array, all_card_array_stage1 = convert_to_numpy_array(
            Card.int_to_str(flop[1]), all_card_array_stage1)
        Card3_stage1_array, all_card_array_stage1 = convert_to_numpy_array(
            Card.int_to_str(flop[2]), all_card_array_stage1)
        Card4_stage1_array = np.zeros((4, 13))
        Card5_stage1_array = np.zeros((4, 13))
        Card6_stage1_array, all_card_array_stage1 = convert_to_numpy_array(
            Card.int_to_str(player1_hand[0]), all_card_array_stage1)
        Card7_stage1_array, all_card_array_stage1 = convert_to_numpy_array(
            Card.int_to_str(player1_hand[1]), all_card_array_stage1)

        pot_array_stage1 = convert_pot_to_numpy(total_pot_size)
        state_array_stage1 = np.stack(
            (Card1_stage1_array, Card2_stage1_array, Card3_stage1_array,
             Card4_stage1_array, Card5_stage1_array, Card6_stage1_array,
             Card7_stage1_array, all_card_array_stage1, pot_array_stage1))
        hash_key_stage1 = pickle.dumps(state_array_stage1)
        gain_loss_table_stage1 = np.zeros((3))
        gain_loss_count_stage1 = np.zeros((3))

        if winner == "Player1":
            #print(player1_action_stage1)
            if player1_action_stage1 == "Bet":
                if hash_key_stage1 in reward_table.keys():
                    reward_table[hash_key_stage1][2] += int(total_pot_size -
                                                            player1_bet)
                    reward_count[hash_key_stage1][2] += 1
                else:
                    gain_loss_count_stage1[2] += 1
                    gain_loss_table_stage1[2] += int(total_pot_size -
                                                     player1_bet)

            elif player1_action_stage1 == "Check/Call":
                if hash_key_stage1 in reward_table.keys():
                    reward_table[hash_key_stage1][1] += int(total_pot_size -
                                                            player1_bet)
                    reward_count[hash_key_stage1][1] += 1
                else:
                    gain_loss_count_stage1[1] += 1
                    gain_loss_table_stage1[1] += int(total_pot_size -
                                                     player1_bet)

            #print("Player 1 Wins: ",total_pot_size)
            #print("Player 1 Gain: ", total_pot_size-player1_bet)
        elif winner == "Player2":
            if hash_key_stage1 in reward_table.keys():
                if player1_action_stage1 == "Fold":
                    reward_table[hash_key_stage1][0] += -1 * int(player1_bet)
                    reward_count[hash_key_stage1][0] += 1
            else:
                if player1_action_stage1 == "Fold":
                    gain_loss_table_stage1[0] = -1 * int(player1_bet)
                    gain_loss_count_stage1[0] += 1
            #print("Player 2 Wins: ",total_pot_size)
            #print("Player 2 Gain: ", total_pot_size-player2_bet)
        else:
            turn = deck.draw(1)
            #print("Turn ",Card.int_to_pretty_str(turn))
            player1_turn2 = player1_turn1 + [turn]
            player2_turn2 = player2_turn1 + [turn]

            player1_turn2_rank = evaluator.class_to_string(
                evaluator.get_rank_class(evaluator._six(player1_turn2)))
            player2_turn2_rank = evaluator.class_to_string(
                evaluator.get_rank_class(evaluator._six(player2_turn2)))

            winner, player1_round2_bet, player2_round2_bet, round2_pot_size, player1_action_stage2 = betting(
                player1_turn2_rank, player2_turn2_rank, all_possible_action)
            player1_bet += player1_round2_bet
            player2_bet += player2_round2_bet
            total_pot_size += round2_pot_size
            Card1_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(flop[0]), all_card_array_stage2)
            Card2_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(flop[1]), all_card_array_stage2)
            Card3_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(flop[2]), all_card_array_stage2)
            Card4_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(turn), all_card_array_stage2)
            Card5_stage2_array = np.zeros((4, 13))
            Card6_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(player1_hand[0]), all_card_array_stage2)
            Card7_stage2_array, all_card_array_stage2 = convert_to_numpy_array(
                Card.int_to_str(player1_hand[1]), all_card_array_stage2)

            pot_array_stage2 = convert_pot_to_numpy(total_pot_size)
            state_array_stage2 = np.stack(
                (Card1_stage2_array, Card2_stage2_array, Card3_stage2_array,
                 Card4_stage2_array, Card5_stage2_array, Card6_stage2_array,
                 Card7_stage2_array, all_card_array_stage2, pot_array_stage2))
            hash_key_stage2 = pickle.dumps(state_array_stage2)
            gain_loss_table_stage2 = np.zeros((3))
            gain_loss_count_stage2 = np.zeros((3))
            if winner == "Player1":
                #print(player1_action_stage2)
                if player1_action_stage2 == "Bet":
                    if hash_key_stage2 in reward_table.keys():
                        reward_table[hash_key_stage2][2] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage2][2] += 1
                    else:
                        gain_loss_count_stage2[2] += 1
                        gain_loss_table_stage2[2] += int(total_pot_size -
                                                         player1_bet)

                elif player1_action_stage2 == "Check/Call":
                    if hash_key_stage2 in reward_table.keys():
                        reward_table[hash_key_stage2][1] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage2][1] += 1
                    else:
                        gain_loss_count_stage2[1] += 1
                        gain_loss_table_stage2[1] += int(total_pot_size -
                                                         player1_bet)
                if player1_action_stage1 == "Bet":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][2] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage1][2] += 1
                    else:
                        gain_loss_count_stage1[2] += 1
                        gain_loss_table_stage1[2] += int(total_pot_size -
                                                         player1_bet)

                elif player1_action_stage1 == "Check/Call":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][1] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage1][1] += 1
                    else:
                        gain_loss_count_stage1[1] += 1
                        gain_loss_table_stage1[1] += int(total_pot_size -
                                                         player1_bet)
                #print("Player 1 Wins: ",total_pot_size)
                #print("Player 1 Gain: ",total_pot_size-player1_bet)
            elif winner == "Player2":
                if hash_key_stage2 in reward_table.keys():
                    if player1_action_stage2 == "Fold":
                        reward_table[hash_key_stage2][0] += -1 * int(
                            player1_bet)
                        reward_count[hash_key_stage2][0] += 1
                else:
                    if player1_action_stage2 == "Fold":
                        gain_loss_table_stage2[0] = -1 * int(player1_bet)
                        gain_loss_count_stage2[0] += 1
                if player1_action_stage1 == "Bet":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][2] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage1][2] += 1
                    else:
                        gain_loss_count_stage1[2] += 1
                        gain_loss_table_stage1[2] += int(total_pot_size -
                                                         player1_bet)

                elif player1_action_stage1 == "Check/Call":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][1] += -1 * int(
                            player1_bet)
                        reward_count[hash_key_stage1][1] += 1
                    else:
                        gain_loss_count_stage1[1] += 1
                        gain_loss_table_stage1[1] += -1 * int(player1_bet)
                #print("Player 2 Wins: ",total_pot_size)
                #print("Player 2 Gain: ",total_pot_size-player2_bet)
            else:
                river = deck.draw(1)
                #print("River ",Card.int_to_pretty_str(river))
                player1_turn3 = player1_turn2 + [river]
                player2_turn3 = player2_turn2 + [river]

                #Creating state array
                Card1 = Card.int_to_str(flop[0])
                Card2 = Card.int_to_str(flop[1])
                Card3 = Card.int_to_str(flop[2])
                Card4 = Card.int_to_str(turn)
                Card5 = Card.int_to_str(river)
                Card6 = Card.int_to_str(player1_hand[0])
                Card7 = Card.int_to_str(player1_hand[1])
                Card1_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card1, all_card_array_stage3)
                Card2_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card2, all_card_array_stage3)
                Card3_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card3, all_card_array_stage3)
                Card4_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card4, all_card_array_stage3)
                Card5_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card5, all_card_array_stage3)
                Card6_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card6, all_card_array_stage3)
                Card7_stage3_array, all_card_array_stage3 = convert_to_numpy_array(
                    Card7, all_card_array_stage3)

                player1_turn3_rank = evaluator.class_to_string(
                    evaluator.get_rank_class(evaluator._seven(player1_turn3)))
                player2_turn3_rank = evaluator.class_to_string(
                    evaluator.get_rank_class(evaluator._seven(player2_turn3)))

                winner, player1_round3_bet, player2_round3_bet, round3_pot_size, player1_action = betting(
                    player1_turn3_rank, player2_turn3_rank,
                    all_possible_action)
                player1_bet += player1_round3_bet
                player2_bet += player2_round3_bet
                total_pot_size += round3_pot_size
                total_pot_array = convert_pot_to_numpy(total_pot_size)
                state_array_stage3 = np.stack(
                    (Card1_stage3_array, Card2_stage3_array,
                     Card3_stage3_array, Card4_stage3_array,
                     Card5_stage3_array, Card6_stage3_array,
                     Card7_stage3_array, all_card_array_stage3,
                     total_pot_array))
                # #print(state_array)
                hash_key = pickle.dumps(state_array_stage3)
                gain_loss_table_stage3 = np.zeros((3))
                gain_loss_count_stage3 = np.zeros((3))
                if winner == "Player1":
                    #print(player1_action)
                    if hash_key in reward_table.keys():

                        if player1_action == "Check/Call":
                            reward_table[hash_key][1] += int(total_pot_size -
                                                             player1_bet)
                            reward_count[hash_key][1] += 1
                        elif player1_action == "Bet":
                            reward_table[hash_key][2] += int(total_pot_size -
                                                             player1_bet)
                            reward_count[hash_key][2] += 1
                    else:
                        if player1_action == "Check/Call":
                            gain_loss_table_stage3[1] = int(total_pot_size -
                                                            player1_bet)
                            gain_loss_count_stage3[1] += 1
                        elif player1_action == "Bet":
                            gain_loss_table_stage3[2] = int(total_pot_size -
                                                            player1_bet)
                            gain_loss_count_stage3[2] += 1

                    if player1_action_stage2 == "Bet":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][2] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage2][2] += 1
                        else:
                            gain_loss_count_stage2[2] += 1
                            gain_loss_table_stage2[2] += int(total_pot_size -
                                                             player1_bet)

                    elif player1_action_stage2 == "Check/Call":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][1] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage2][1] += 1
                        else:
                            gain_loss_count_stage2[1] += 1
                            gain_loss_table_stage2[1] += int(total_pot_size -
                                                             player1_bet)

                    if player1_action_stage1 == "Bet":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][2] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage1][2] += 1
                        else:
                            gain_loss_count_stage1[2] += 1
                            gain_loss_table_stage1[2] += int(total_pot_size -
                                                             player1_bet)

                    elif player1_action_stage1 == "Check/Call":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][1] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage1][1] += 1
                        else:
                            gain_loss_count_stage1[1] += 1
                            gain_loss_table_stage1[1] += int(total_pot_size -
                                                             player1_bet)

                    #print("Player 1 Wins: ",total_pot_size)
                    #print("Player 1 Gain: ",total_pot_size-player1_bet)
                elif winner == "Player2":
                    if hash_key in reward_table.keys():
                        if player1_action == "Fold":
                            reward_table[hash_key][0] += -1 * int(player1_bet)
                            reward_count[hash_key][0] += 1
                    else:
                        if player1_action == "Fold":
                            gain_loss_table_stage3[0] = -1 * int(player1_bet)
                            gain_loss_count_stage3[0] += 1
                    if player1_action_stage2 == "Bet":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][2] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage2][2] += 1
                        else:
                            gain_loss_count_stage2[2] += 1
                            gain_loss_table_stage2[2] += -1 * int(player1_bet)

                    elif player1_action_stage2 == "Check/Call":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][1] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage2][1] += 1
                        else:
                            gain_loss_count_stage2[1] += 1
                            gain_loss_table_stage2[1] += -1 * int(player1_bet)

                    if player1_action_stage1 == "Bet":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][2] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage1][2] += 1
                        else:
                            gain_loss_count_stage1[2] += 1
                            gain_loss_table_stage1[2] += -1 * int(player1_bet)

                    elif player1_action_stage1 == "Check/Call":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][1] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage1][1] += 1
                        else:
                            gain_loss_count_stage1[1] += 1
                            gain_loss_table_stage1[1] += -1 * int(player1_bet)

                    #print("Player 2 Wins: ",total_pot_size)
                    #print("Player 2 Gain: ",total_pot_size-player2_bet)
                else:
                    final_score_player1 = evaluator.get_rank_class(
                        evaluator._seven(player1_turn3))
                    final_score_player2 = evaluator.get_rank_class(
                        evaluator._seven(player2_turn3))
                    if final_score_player1 < final_score_player2:
                        #print(player1_action)
                        if hash_key in reward_table.keys():
                            if player1_action == "Check/Call":
                                reward_table[hash_key][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key][1] += 1
                            elif player1_action == "Bet":
                                reward_table[hash_key][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key][2] += 1
                        else:
                            if player1_action == "Check/Call":
                                gain_loss_table_stage3[1] = int(
                                    total_pot_size - player1_bet)
                                gain_loss_count_stage3[1] += 1
                            elif player1_action == "Bet":
                                gain_loss_table_stage3[2] = int(
                                    total_pot_size - player1_bet)
                                gain_loss_count_stage3[2] += 1
                        if player1_action_stage2 == "Bet":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage2][2] += 1
                            else:
                                gain_loss_count_stage2[2] += 1
                                gain_loss_table_stage2[2] += int(
                                    total_pot_size - player1_bet)

                        elif player1_action_stage2 == "Check/Call":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage2][1] += 1
                            else:
                                gain_loss_count_stage2[1] += 1
                                gain_loss_table_stage2[1] += int(
                                    total_pot_size - player1_bet)

                        if player1_action_stage1 == "Bet":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage1][2] += 1
                            else:
                                gain_loss_count_stage1[2] += 1
                                gain_loss_table_stage1[2] += int(
                                    total_pot_size - player1_bet)

                        elif player1_action_stage1 == "Check/Call":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage1][1] += 1
                            else:
                                gain_loss_count_stage1[1] += 1
                                gain_loss_table_stage1[1] += int(
                                    total_pot_size - player1_bet)
                        #print("Player 1 Wins: ",total_pot_size)
                        #print("Player 1 Gain: ",total_pot_size-player1_bet)
                    else:
                        if hash_key in reward_table.keys():
                            if player1_action == "Check/Call":
                                reward_table[hash_key][1] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key][1] += 1
                            elif player1_action == "Bet":
                                reward_table[hash_key][2] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key][2] += 1
                        else:
                            if player1_action == "Check/Call":
                                gain_loss_table_stage3[1] = -1 * int(
                                    player1_bet)
                                gain_loss_count_stage3[1] += 1
                            elif player1_action == "Bet":
                                gain_loss_table_stage3[2] = -1 * int(
                                    player1_bet)
                                gain_loss_count_stage3[2] += 1

                        if player1_action_stage2 == "Bet":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][2] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage2][2] += 1
                            else:
                                gain_loss_count_stage2[2] += 1
                                gain_loss_table_stage2[2] += -1 * int(
                                    player1_bet)

                        elif player1_action_stage2 == "Check/Call":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][1] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage2][1] += 1
                            else:
                                gain_loss_count_stage2[1] += 1
                                gain_loss_table_stage2[1] += -1 * int(
                                    player1_bet)

                        if player1_action_stage1 == "Bet":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][2] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage1][2] += 1
                            else:
                                gain_loss_count_stage1[2] += 1
                                gain_loss_table_stage1[2] += -1 * int(
                                    player1_bet)

                        elif player1_action_stage1 == "Check/Call":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][1] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage1][1] += 1
                            else:
                                gain_loss_count_stage1[1] += 1
                                gain_loss_table_stage1[1] += -1 * int(
                                    player1_bet)
                        #print("Player 2 Wins:  ",total_pot_size)
                        #print("Player 2 Gain: ",total_pot_size-player2_bet)

                if hash_key not in reward_table.keys():
                    reward_table[hash_key] = gain_loss_table_stage3
                    reward_count[hash_key] = gain_loss_count_stage3

            if hash_key_stage2 not in reward_table.keys():
                reward_table[hash_key_stage2] = gain_loss_table_stage2
                reward_count[hash_key_stage2] = gain_loss_count_stage2

        if hash_key_stage1 not in reward_table.keys():
            reward_table[hash_key_stage1] = gain_loss_table_stage1
            reward_count[hash_key_stage1] = gain_loss_count_stage1
        round_number += 1
        print_progress(round_number,
                       rounds,
                       prefix='Progress:',
                       suffix='Complete',
                       bar_length=40)

    for key in reward_table.keys():
        count_fold = reward_count[key][0]
        count_check = reward_count[key][1]
        count_bet = reward_count[key][2]
        if count_fold != 0:
            reward_table[key][0] = reward_table[key][0] / count_fold
        if count_check != 0:
            reward_table[key][1] = reward_table[key][1] / count_check
        if count_bet != 0:
            reward_table[key][2] = reward_table[key][2] / count_bet

    return reward_table
Exemplo n.º 3
0
def run(rounds, model):
    reward_table = dict()
    reward_count = dict()
    print("running self-play")
    round_number = 0
    print_progress(round_number,
                   rounds,
                   prefix='Progress:',
                   suffix='Complete',
                   bar_length=40)
    while round_number < rounds:
        deck = Deck()
        evaluator = Evaluator()
        flop = deck.draw(3)
        # for card in flop:
        #     print(Card.int_to_pretty_str(card))
        player1_hand = deck.draw(2)
        # for card in player1_hand:
        #     print(Card.int_to_pretty_str(card))
        player2_hand = deck.draw(3)
        turn = deck.draw(1)
        river = deck.draw(1)
        player1_complete_hand = flop + [turn] + [river] + player1_hand
        player2_complete_hand = flop + [turn] + [river] + player2_hand

        def get_action_position2(y_stage, other_player_action):
            if other_player_action == "Bet":
                out_of_bounds_index = 2
            index = 0
            max_gain_loss = y_stage[0]
            for i in range(1, len(y_stage)):
                if i != out_of_bounds_index:
                    if max_gain_loss < y_stage[i]:
                        index = i
                        max_gain_loss = y_stage[i]
            if index == 0:
                return "Fold"
            elif index == 1:
                return "Check/Call"
            else:
                return "Bet"

        def get_action(y_stage):
            index = 0
            max_gain_loss = y_stage[0][0]
            for i in range(1, len(y_stage[0])):
                if max_gain_loss < y_stage[0][i]:
                    index = i
                    max_gain_loss = y_stage[0][i]
            if index == 0:
                return "Fold"
            elif index == 1:
                return "Check/Call"
            else:
                return "Bet"

        def convert_pot_to_numpy(total_pot):
            pot_array = np.zeros((4, 13))
            number_of_chips = int(total_pot / 25)
            if number_of_chips > 13:
                pot_array[1] = 1
                left_over_chips = number_of_chips - 13
                for i in range(0, left_over_chips):
                    pot_array[2][i] = 1
            else:
                for i in range(0, number_of_chips):
                    pot_array[1][i] = 1
            return pot_array

        def convert_to_numpy_array(Card_str, all_card_array_player1,
                                   all_card_array_player2):
            if Card_str[0] == "J":
                index_1 = 9
            elif Card_str[0] == "Q":
                index_1 = 10
            elif Card_str[0] == "K":
                index_1 = 11
            elif Card_str[0] == "A":
                index_1 = 12
            elif Card_str[0] == "T":
                index_1 = 8
            else:
                index_1 = int(Card_str[0]) - 2

            if Card_str[1] == "s":
                index_2 = 0
            elif Card_str[1] == "c":
                index_2 = 1
            elif Card_str[1] == "h":
                index_2 = 2
            else:
                index_2 = 3

            new_card_array = np.zeros((4, 13))
            new_card_array[index_2][index_1] = 1
            all_card_array_player1[index_2][index_1] = 1
            all_card_array_player2[index_2][index_1] = 1
            return new_card_array, all_card_array_player1, all_card_array_player2

        def convert_to_numpy_array_playercards(Card_str, all_card_array):
            if Card_str[0] == "J":
                index_1 = 9
            elif Card_str[0] == "Q":
                index_1 = 10
            elif Card_str[0] == "K":
                index_1 = 11
            elif Card_str[0] == "A":
                index_1 = 12
            elif Card_str[0] == "T":
                index_1 = 8
            else:
                index_1 = int(Card_str[0]) - 2

            if Card_str[1] == "s":
                index_2 = 0
            elif Card_str[1] == "c":
                index_2 = 1
            elif Card_str[1] == "h":
                index_2 = 2
            else:
                index_2 = 3

            new_card_array = np.zeros((4, 13))
            new_card_array[index_2][index_1] = 1
            all_card_array[index_2][index_1] = 1

            return new_card_array, all_card_array

        def betting(model1_player, model2_player, flop1_array, flop2_array,
                    flop3_array, turn_array, river_array, Card1_array_player1,
                    Card2_array_player1, Card1_array_player2,
                    Card2_array_player2, all_card_array_player1,
                    all_card_array_player2, stage_initial_potsize):

            pot_array_stage = convert_pot_to_numpy(stage_initial_potsize)
            state_array_player1 = np.stack(
                (flop1_array, flop2_array, flop3_array, turn_array,
                 river_array, Card1_array_player1, Card2_array_player1,
                 all_card_array_player1, pot_array_stage))
            state_array_player1 = np.expand_dims(state_array_player1, 0)

            input_shape = [9, 17, 17]
            right_left_pad = input_shape[1] - state_array_player1.shape[2]
            left_pad = right_left_pad // 2
            right_pad = left_pad + (right_left_pad % 2)
            top_bottom_pad = input_shape[2] - state_array_player1.shape[3]
            top_pad = top_bottom_pad // 2
            bottom_pad = top_pad + (top_bottom_pad % 2)
            state_array_player1 = np.pad(state_array_player1,
                                         ((0, 0), (0, 0),
                                          (left_pad, right_pad),
                                          (top_pad, bottom_pad)),
                                         mode='constant')
            y_player1 = Model.predict(model1_player, x=state_array_player1)
            action_player1 = get_action(y_player1)
            if action_player1 == "Fold":
                return "Player 2", stage_initial_potsize, 0, 0, action_player1, ""
            elif action_player1 == "Check/Call":
                state_array_player2 = np.stack(
                    (flop1_array, flop2_array, flop3_array, turn_array,
                     river_array, Card1_array_player2, Card2_array_player2,
                     all_card_array_player2, pot_array_stage))
                state_array_player2 = np.expand_dims(state_array_player2, 0)
                right_left_pad = input_shape[1] - state_array_player2.shape[2]
                left_pad = right_left_pad // 2
                right_pad = left_pad + (right_left_pad % 2)
                top_bottom_pad = input_shape[2] - state_array_player2.shape[3]
                top_pad = top_bottom_pad // 2
                bottom_pad = top_pad + (top_bottom_pad % 2)
                state_array_player2 = np.pad(state_array_player2,
                                             ((0, 0), (0, 0),
                                              (left_pad, right_pad),
                                              (top_pad, bottom_pad)),
                                             mode='constant')
                y_player2 = Model.predict(model2_player, x=state_array_player2)
                action_player2 = get_action(y_player2)

                if action_player2 == "Fold":
                    return "Player 1", stage_initial_potsize, 0, 0, action_player1, action_player2
                elif action_player2 == "Check/Call":
                    return "", stage_initial_potsize, 0, 0, action_player1, action_player2
                else:
                    stage_initial_potsize += 100
                    pot_array_stage = convert_pot_to_numpy(
                        stage_initial_potsize)
                    state_array_player1 = np.stack(
                        (flop1_array, flop2_array, flop3_array, turn_array,
                         river_array, Card1_array_player1, Card2_array_player1,
                         all_card_array_player1, pot_array_stage))
                    state_array_player1 = np.expand_dims(
                        state_array_player1, 0)
                    right_left_pad = input_shape[
                        1] - state_array_player1.shape[2]
                    left_pad = right_left_pad // 2
                    right_pad = left_pad + (right_left_pad % 2)
                    top_bottom_pad = input_shape[
                        2] - state_array_player1.shape[3]
                    top_pad = top_bottom_pad // 2
                    bottom_pad = top_pad + (top_bottom_pad % 2)
                    state_array_player1 = np.pad(state_array_player1,
                                                 ((0, 0), (0, 0),
                                                  (left_pad, right_pad),
                                                  (top_pad, bottom_pad)),
                                                 mode='constant')
                    y_player1 = Model.predict(model1_player,
                                              x=state_array_player1)
                    action_player1 = get_action_position2(y_player1, "Bet")
                    if action_player1 == "Fold":
                        return "Player 2", stage_initial_potsize, 0, 100, action_player1, action_player2
                    else:
                        return "", stage_initial_potsize + 100, 100, 100, action_player1, action_player2
            else:
                stage_initial_potsize += 100
                pot_array_stage = convert_pot_to_numpy(stage_initial_potsize)
                state_array_player2 = np.stack(
                    (flop1_array, flop2_array, flop3_array, turn_array,
                     river_array, Card1_array_player2, Card2_array_player2,
                     all_card_array_player2, pot_array_stage))
                state_array_player2 = np.expand_dims(state_array_player2, 0)
                right_left_pad = input_shape[1] - state_array_player2.shape[2]
                left_pad = right_left_pad // 2
                right_pad = left_pad + (right_left_pad % 2)
                top_bottom_pad = input_shape[2] - state_array_player2.shape[3]
                top_pad = top_bottom_pad // 2
                bottom_pad = top_pad + (top_bottom_pad % 2)
                state_array_player2 = np.pad(state_array_player2,
                                             ((0, 0), (0, 0),
                                              (left_pad, right_pad),
                                              (top_pad, bottom_pad)),
                                             mode='constant')
                y_player2 = Model.predict(model2_player, x=state_array_player2)
                # print(y_player2)
                action_player2 = get_action_position2(y_player2, "Bet")
                if action_player2 == "Fold":
                    return "Player 1", stage_initial_potsize, 100, 0, action_player1, action_player2
                else:
                    return "", stage_initial_potsize + 100, 100, 100, action_player1, action_player2

        model_player1 = model
        model_player2 = model

        player1_bet = 0
        player2_bet = 0
        total_pot_size = 0
        all_card_array_player1 = np.zeros((4, 13))
        all_card_array_player2 = np.zeros((4, 13))
        flop1_array, all_card_array_player1, all_card_array_player2 = convert_to_numpy_array(
            Card.int_to_str(flop[0]), all_card_array_player1,
            all_card_array_player2)
        flop2_array, all_card_array_player1, all_card_array_player2 = convert_to_numpy_array(
            Card.int_to_str(flop[1]), all_card_array_player1,
            all_card_array_player2)
        flop3_array, all_card_array_player1, all_card_array_player2 = convert_to_numpy_array(
            Card.int_to_str(flop[2]), all_card_array_player1,
            all_card_array_player2)
        turn_array = np.zeros((4, 13))
        river_array = np.zeros((4, 13))
        Card6_array_player1, all_card_array_player1 = convert_to_numpy_array_playercards(
            Card.int_to_str(player1_hand[0]), all_card_array_player1)
        Card7_array_player1, all_card_array_player1 = convert_to_numpy_array_playercards(
            Card.int_to_str(player1_hand[1]), all_card_array_player1)

        Card6_array_player2, all_card_array_player2 = convert_to_numpy_array_playercards(
            Card.int_to_str(player2_hand[0]), all_card_array_player2)
        Card7_array_player2, all_card_array_player2 = convert_to_numpy_array_playercards(
            Card.int_to_str(player2_hand[1]), all_card_array_player2)
        pot_array_stage1 = convert_pot_to_numpy(total_pot_size)
        state_array_player1_stage1 = np.stack(
            (flop1_array, flop2_array, flop3_array, turn_array, river_array,
             Card6_array_player1, Card7_array_player1, all_card_array_player1,
             pot_array_stage1))

        hash_key_stage1 = pickle.dumps(state_array_player1_stage1)
        gain_loss_stage1 = np.zeros((3))
        gain_loss_stage1_count = np.zeros((3))
        winner, pot_size, player1_new_bet, player2_new_bet, player1_action_stage1, player2_action = betting(
            model_player1, model_player2, flop1_array, flop2_array,
            flop3_array, turn_array, river_array, Card6_array_player1,
            Card7_array_player1, Card6_array_player2, Card7_array_player2,
            all_card_array_player1, all_card_array_player2, total_pot_size)
        total_pot_size += pot_size
        player1_bet += player1_new_bet
        player2_bet += player2_new_bet

        if winner == "Player 1":
            # print("Player 1 Wins!!",total_pot_size)
            # print("Player 1 Gain!!",int(total_pot_size-player1_bet))
            if player1_action_stage1 == "Bet":
                if hash_key_stage1 in reward_table.keys():
                    reward_table[hash_key_stage1][2] += int(total_pot_size -
                                                            player1_bet)
                    reward_count[hash_key_stage1][2] += 1
                else:
                    gain_loss_stage1[2] += int(total_pot_size - player1_bet)
                    gain_loss_stage1_count[2] += 1
            elif player1_action_stage1 == "Check/Call":
                if hash_key_stage1 in reward_table.keys():
                    reward_table[hash_key_stage1][1] += int(total_pot_size -
                                                            player1_bet)
                    reward_count[hash_key_stage1][1] += 1
                else:
                    gain_loss_stage1[1] += int(total_pot_size - player1_bet)
                    gain_loss_stage1_count[1] += 1

        elif winner == "Player 2":
            # print("Player 2 Wins!!",total_pot_size)
            # print("Player 2 Gain!!",int(total_pot_size-player2_bet))
            if player1_action_stage1 == "Fold":
                if hash_key_stage1 in reward_table.keys():
                    reward_table[hash_key_stage1][0] = -1 * int(player1_bet)
                    reward_count[hash_key_stage1][0] += 1
                else:
                    gain_loss_stage1[0] = -1 * int(player1_bet)
                    gain_loss_stage1_count[0] = 1

        else:
            turn_array, all_card_array_player1, all_card_array_player2 = convert_to_numpy_array(
                Card.int_to_str(turn), all_card_array_player1,
                all_card_array_player2)
            pot_array_stage2 = convert_pot_to_numpy(total_pot_size)
            state_array_player1_stage2 = np.stack(
                (flop1_array, flop2_array, flop3_array, turn_array,
                 river_array, Card6_array_player1, Card7_array_player1,
                 all_card_array_player1, pot_array_stage2))

            hash_key_stage2 = pickle.dumps(state_array_player1_stage2)
            gain_loss_stage2 = np.zeros((3))
            gain_loss_stage2_count = np.zeros((3))
            winner, pot_size, player1_new_bet, player2_new_bet, player1_action_stage2, player2_action = betting(
                model_player1, model_player2, flop1_array, flop2_array,
                flop3_array, turn_array, river_array, Card6_array_player1,
                Card7_array_player1, Card6_array_player2, Card7_array_player2,
                all_card_array_player1, all_card_array_player2, total_pot_size)

            total_pot_size += pot_size
            player1_bet += player1_new_bet
            player2_bet += player2_new_bet

            if winner == "Player 1":
                # print("Player 1 Wins!!", total_pot_size)
                # print("Player 1 Gain!!", int(total_pot_size - player1_bet))
                if player1_action_stage2 == "Bet":
                    if hash_key_stage2 in reward_table.keys():
                        reward_table[hash_key_stage2][2] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage2][2] += 1
                    else:
                        gain_loss_stage2[2] = int(total_pot_size - player1_bet)
                        gain_loss_stage2_count[2] = 1
                elif player1_action_stage2 == "Check/Call":
                    if hash_key_stage2 in reward_table.keys():
                        reward_table[hash_key_stage2][1] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage2][1] += 1
                    else:
                        gain_loss_stage2[1] = int(total_pot_size - player1_bet)
                        gain_loss_stage2_count[1] = 1
                if player1_action_stage1 == "Bet":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][2] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage1][2] += 1
                    else:
                        gain_loss_stage1[2] = int(total_pot_size - player1_bet)
                        gain_loss_stage1_count[2] = 1
                elif player1_action_stage1 == "Check/Call":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][1] += int(
                            total_pot_size - player1_bet)
                        reward_count[hash_key_stage1][1] += 1
                    else:
                        gain_loss_stage1[1] = int(total_pot_size - player1_bet)
                        gain_loss_stage1_count[1] = 1

            elif winner == "Player 2":
                # print("Player 2 Wins!!", total_pot_size)
                # print("Player 2 Gain!!", int(total_pot_size - player2_bet))
                if player1_action_stage2 == "Fold":
                    if hash_key_stage2 in reward_table.keys():
                        reward_table[hash_key_stage2][0] = -1 * int(
                            player1_bet)
                        reward_count[hash_key_stage2][0] += 1
                    else:
                        gain_loss_stage2[0] = -1 * int(player1_bet)
                        gain_loss_stage2_count[0] += 1
                if player1_action_stage1 == "Bet":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][2] += -1 * int(
                            player1_bet)
                        reward_count[hash_key_stage1][2] += 1
                    else:
                        gain_loss_stage1[2] = -1 * int(player1_bet)
                        gain_loss_stage1_count[2] = 1
                elif player1_action_stage1 == "Check/Call":
                    if hash_key_stage1 in reward_table.keys():
                        reward_table[hash_key_stage1][1] += -1 * int(
                            player1_bet)
                        reward_count[hash_key_stage1][1] += 1
                    else:
                        gain_loss_stage1[1] = -1 * int(player1_bet)
                        gain_loss_stage1_count[1] = 1

            else:
                river_array, all_card_array_player1, all_card_array_player2 = convert_to_numpy_array(
                    Card.int_to_str(river), all_card_array_player1,
                    all_card_array_player2)
                winner, pot_size, player1_new_bet, player2_new_bet, player1_action_stage3, player2_action = betting(
                    model_player1, model_player2, flop1_array, flop2_array,
                    flop3_array, turn_array, river_array, Card6_array_player1,
                    Card7_array_player1, Card6_array_player2,
                    Card7_array_player2, all_card_array_player1,
                    all_card_array_player2, total_pot_size)
                pot_array_stage3 = convert_pot_to_numpy(total_pot_size)
                state_array_player1_stage3 = np.stack(
                    (flop1_array, flop2_array, flop3_array, turn_array,
                     river_array, Card6_array_player1, Card7_array_player1,
                     all_card_array_player1, pot_array_stage3))

                hash_key_stage3 = pickle.dumps(state_array_player1_stage3)
                gain_loss_stage3 = np.zeros((3))
                gain_loss_stage3_count = np.zeros((3))

                total_pot_size += pot_size
                player1_bet += player1_new_bet
                player2_bet += player2_new_bet

                if winner == "Player 1":
                    # print("Player 1 Wins!!", total_pot_size)
                    # print("Player 1 Gain!!", int(total_pot_size - player1_bet))
                    if player1_action_stage3 == "Bet":
                        if hash_key_stage3 in reward_table.keys():
                            reward_table[hash_key_stage3][2] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage3][2] += 1
                        else:
                            gain_loss_stage3[2] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage3_count[2] = 1
                    elif player1_action_stage3 == "Check/Call":
                        if hash_key_stage3 in reward_table.keys():
                            reward_table[hash_key_stage3][1] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage3][1] += 1
                        else:
                            gain_loss_stage3[1] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage3_count[1] = 1
                    if player1_action_stage1 == "Bet":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][2] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage1][2] += 1
                        else:
                            gain_loss_stage1[2] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage1_count[2] = 1
                    elif player1_action_stage1 == "Check/Call":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][1] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage1][1] += 1
                        else:
                            gain_loss_stage1[1] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage1_count[1] = 1
                    if player1_action_stage2 == "Bet":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][2] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage2][2] += 1
                        else:
                            gain_loss_stage2[2] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage2_count[2] = 1
                    elif player1_action_stage2 == "Check/Call":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][1] += int(
                                total_pot_size - player1_bet)
                            reward_count[hash_key_stage2][1] += 1
                        else:
                            gain_loss_stage2[1] = int(total_pot_size -
                                                      player1_bet)
                            gain_loss_stage2_count[1] = 1

                elif winner == "Player 2":
                    # print("Player 2 Wins!!", total_pot_size)
                    # print("Player 2 Gain!!", int(total_pot_size - player2_bet))
                    if player1_action_stage3 == "Fold":
                        if hash_key_stage3 in reward_table.keys():
                            reward_table[hash_key_stage3][0] = -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage3][0] += 1
                        else:
                            gain_loss_stage3[0] = -1 * int(player1_bet)
                            gain_loss_stage3_count[0] += 1

                    if player1_action_stage1 == "Bet":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][2] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage1][2] += 1
                        else:
                            gain_loss_stage1[2] = -1 * int(player1_bet)
                            gain_loss_stage1_count[2] = 1
                    elif player1_action_stage1 == "Check/Call":
                        if hash_key_stage1 in reward_table.keys():
                            reward_table[hash_key_stage1][1] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage1][1] += 1
                        else:
                            gain_loss_stage1[1] = -1 * int(player1_bet)
                            gain_loss_stage1_count[1] = 1

                    if player1_action_stage2 == "Bet":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][2] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage2][2] += 1
                        else:
                            gain_loss_stage2[2] = -1 * int(player1_bet)
                            gain_loss_stage2_count[2] = 1
                    elif player1_action_stage2 == "Check/Call":
                        if hash_key_stage2 in reward_table.keys():
                            reward_table[hash_key_stage2][1] += -1 * int(
                                player1_bet)
                            reward_count[hash_key_stage2][1] += 1
                        else:
                            gain_loss_stage2[1] = -1 * int(player1_bet)
                            gain_loss_stage2_count[1] = 1

                else:
                    final_score_player1 = evaluator.get_rank_class(
                        evaluator._seven(player1_complete_hand))
                    final_score_player2 = evaluator.get_rank_class(
                        evaluator._seven(player2_complete_hand))
                    if final_score_player1 < final_score_player2:
                        # print("Player 1 Wins!!",total_pot_size)
                        # print("Player 1 Gain!!",int(total_pot_size - player1_bet))
                        if player1_action_stage3 == "Bet":
                            if hash_key_stage3 in reward_table.keys():
                                reward_table[hash_key_stage3][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage3][2] += 1
                            else:
                                gain_loss_stage3[2] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage3_count[2] = 1
                        elif player1_action_stage3 == "Check/Call":
                            if hash_key_stage3 in reward_table.keys():
                                reward_table[hash_key_stage3][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage3][1] += 1
                            else:
                                gain_loss_stage3[1] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage3_count[1] = 1
                        if player1_action_stage1 == "Bet":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage1][2] += 1
                            else:
                                gain_loss_stage1[2] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage1_count[2] = 1
                        elif player1_action_stage1 == "Check/Call":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage1][1] += 1
                            else:
                                gain_loss_stage1[1] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage1_count[1] = 1
                        if player1_action_stage2 == "Bet":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][2] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage2][2] += 1
                            else:
                                gain_loss_stage2[2] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage2_count[2] = 1
                        elif player1_action_stage2 == "Check/Call":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][1] += int(
                                    total_pot_size - player1_bet)
                                reward_count[hash_key_stage2][1] += 1
                            else:
                                gain_loss_stage2[1] = int(total_pot_size -
                                                          player1_bet)
                                gain_loss_stage2_count[1] = 1

                    else:
                        # print("Player 2 Wins!!",total_pot_size)
                        # print("Player 2 Gain!!",int(total_pot_size-player2_bet))
                        # print("Player 2 Wins!!", total_pot_size)
                        # print("Player 2 Gain!!", int(total_pot_size - player2_bet))
                        if player1_action_stage3 == "Fold":
                            if hash_key_stage3 in reward_table.keys():
                                reward_table[hash_key_stage3][0] = -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage3][0] += 1
                            else:
                                gain_loss_stage3[0] = -1 * int(player1_bet)
                                gain_loss_stage3_count[0] += 1

                        if player1_action_stage1 == "Bet":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][2] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage1][2] += 1
                            else:
                                gain_loss_stage1[2] = -1 * int(player1_bet)
                                gain_loss_stage1_count[2] = 1
                        elif player1_action_stage1 == "Check/Call":
                            if hash_key_stage1 in reward_table.keys():
                                reward_table[hash_key_stage1][1] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage1][1] += 1
                            else:
                                gain_loss_stage1[1] = -1 * int(player1_bet)
                                gain_loss_stage1_count[1] = 1

                        if player1_action_stage2 == "Bet":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][2] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage2][2] += 1
                            else:
                                gain_loss_stage2[2] = -1 * int(player1_bet)
                                gain_loss_stage2_count[2] = 1
                        elif player1_action_stage2 == "Check/Call":
                            if hash_key_stage2 in reward_table.keys():
                                reward_table[hash_key_stage2][1] += -1 * int(
                                    player1_bet)
                                reward_count[hash_key_stage2][1] += 1
                            else:
                                gain_loss_stage2[1] = -1 * int(player1_bet)
                                gain_loss_stage2_count[1] = 1

                if hash_key_stage3 not in reward_table.keys():
                    reward_table[hash_key_stage3] = gain_loss_stage3
                    reward_count[hash_key_stage3] = gain_loss_stage3_count

            if hash_key_stage2 not in reward_table.keys():
                reward_table[hash_key_stage2] = gain_loss_stage2
                reward_count[hash_key_stage2] = gain_loss_stage2_count

        if hash_key_stage1 not in reward_table.keys():
            reward_table[hash_key_stage1] = gain_loss_stage1
            reward_count[hash_key_stage1] = gain_loss_stage1_count
        round_number += 1
        print_progress(round_number,
                       rounds,
                       prefix='Progress:',
                       suffix='Complete',
                       bar_length=40)

    for key in reward_table.keys():
        count_fold = reward_count[key][0]
        count_check = reward_count[key][1]
        count_bet = reward_count[key][2]
        if count_fold != 0:
            reward_table[key][0] = reward_table[key][0] / count_fold
        if count_check != 0:
            reward_table[key][1] = reward_table[key][1] / count_check
        if count_bet != 0:
            reward_table[key][2] = reward_table[key][2] / count_bet
    return reward_table