def dealer_stats_for_given_hand(games: int = 1000000, decks: int = 6) -> tuple: dealer_stats = {} for i in range(1, 11): dealer_stats[str(i)] = { '17': 0, '18': 0, '19': 0, '20': 0, '21': 0, 'F': 0 } deck = BjDeck(decks) for _ in range(games): deck.start_game() start_hand = deck.deal() dealer = [start_hand] d_cards = dealer_hand(dealer, deck) count = count_hand(d_cards) if count > 21: count = 'F' dealer_stats[str(start_hand)][str( count)] = dealer_stats[str(start_hand)][str(count)] + 1. raw_count = pd.DataFrame(dealer_stats) raw_count.index.name = 'end count' raw_count.columns.name = 'start hand' total = raw_count / raw_count.sum(axis=0) return raw_count, total
def init_game(number_of_players: int, deck: BjDeck) -> tuple: players = [] for _ in range(number_of_players): players.append([deck.deal()]) dealer = [deck.deal()] for player in players: player.append(deck.deal()) return players, dealer
def test_game(): player = [10, 10] dealer = [3] bet = 50 deck = BjDeck(6) policy_params = {'dealer_card': dealer[0], 'policy': WEB_POLICY} a = play_hand(player, dealer[0], deck, bet, fixed_policy, **policy_params)
def main(): b = BjDeck(6) start_money = 0 end_money = start_money games = 10000 policy_params = {'policy': WEB_POLICY} for i in range(games): print(i) b.start_game() final_hand, d_hand, return_money = play_game( b, [100, 100, 100, 100, 100, 100], fixed_policy, **policy_params) if final_hand is None or return_money is None: print("bad input") if len(final_hand) < 3 or len(return_money) < 3: print("bad input sum") for hand in final_hand: print(hand.cards) print("dealer", d_hand) print(return_money) end_money = end_money + sum(return_money) print("money", end_money) print(end_money) print("average win/lost per game: ", (end_money - start_money) / games)
def play_split(hand: list, bet: int, dealer: int, deck: BjDeck, policy_func, **policy_params): final_hand = [] # ace means split and one card only. if hand[0] == 1: for _ in range(2): cur_hand = OneHand([hand.pop(), deck.deal()], bet, dealer, 'Y') cur_hand.actions = [{ 'hand': [1, 1], 'dealer': dealer, 'action': 'Y' }] final_hand.append(cur_hand) return final_hand while hand: curr_hand = [hand.pop(), deck.deal()] action = policy_selector(policy_func, cards=curr_hand, **policy_params) if action == 'S': cur_hand = OneHand(curr_hand, bet, dealer, action) add_split_action(cur_hand, dealer) final_hand.append(cur_hand) elif action == 'D': hand_d = play_double(curr_hand, bet, deck, dealer) add_split_action(hand_d, dealer) final_hand.append(hand_d) elif action == 'Y': hand.append(curr_hand.pop()) hand.append(curr_hand.pop()) elif action == 'H': hand_h = play_hit(curr_hand, bet, dealer, deck, policy_func, **policy_params) add_split_action(hand_h, dealer) final_hand.append(hand_h) else: print("action not understood ", action) return final_hand
def player_win_rates_for_start_hands(games: int = 1000000, decks: int = 6) -> tuple: player_stats = {} deck = BjDeck(decks) policy_params = {'policy': WEB_POLICY} for _ in range(games): deck.start_game() hand = [deck.deal()] dealer = [deck.deal()] dealer_index = str(dealer[0]) hand.append(deck.deal()) hand_index = convert_hand_to_index(hand) policy_params['dealer_card'] = dealer[0] curr_hand = play_hand(hand, dealer[0], deck, 1, fixed_policy, **policy_params) d_cards = dealer_hand(dealer, deck) results = rewards(curr_hand, d_cards) for result in results: if result > 0: end_game = 'win' elif result < 0: end_game = 'lost' else: end_game = 'equal' index = f'{hand_index}_{dealer_index}' if player_stats.get(index): player_stats[index][ end_game] = player_stats[index][end_game] + 1 player_stats[index]['total'] = player_stats[index]['total'] + 1 else: player_stats[index] = { 'win': 0, 'lost': 0, 'equal': 0, 'total': 1, 'player_hand': hand_index, 'dealer': dealer_index } player_stats[index][end_game] = 1 raw_count = pd.DataFrame(player_stats) raw_count.index.name = 'Results' raw_count.columns.name = 'Player hand_Dealer hand' total = raw_count.apply(get_percents) return raw_count, total
def player_stats(games: int = 1000000, decks: int = 6) -> tuple: p_stats = {} deck = BjDeck(decks) for _ in range(games): deck.start_game() hand = [deck.deal()] dealer = [deck.deal()] hand.append(deck.deal()) hand_index = convert_hand_to_index(hand) policy_params = {'dealer_card': dealer[0], 'policy': WEB_POLICY} curr_hand = play_hand(hand, dealer[0], deck, 1, fixed_policy, **policy_params) for hand in curr_hand: fill_stats(hand, hand_index, p_stats) raw_count = pd.DataFrame(p_stats) raw_count.index.name = 'end count' raw_count.columns.name = 'start hand' total = raw_count / raw_count.sum(axis=0) return raw_count, total
def test_states(): markov = MDP(HAND_OPTIONS) policy_params = {'actions': markov.actions, 'actions_prob': markov.actions_probs, 'threshold': 90} deck = BjDeck(6) start_money = 0 end_money = start_money for games in range(10000): deck.start_game() final_hand, d_hand, rewards = play_game(deck, [1], random_policy, **policy_params) end_money = end_money + rewards[0] markov.add_game(final_hand[0], rewards[0]) curr_win = (end_money - start_money) / games print("average win/lost per game: ", curr_win) past_win = -1000 while(curr_win > past_win): past_win = curr_win markov.update_probs() start_money = 0 end_money = start_money policy_params = {'actions': markov.actions, 'actions_prob': markov.actions_probs, 'threshold': 90} for games in range(100000): deck.start_game() final_hand, d_hand, rewards = play_game( deck, [1], probability_based_policy, **policy_params) end_money = end_money + rewards[0] markov.add_game(final_hand[0], rewards[0]) curr_win = (end_money - start_money) / games print("average win/lost per game: ", curr_win) print(markov.actions_probs)
def test_game(): b = BjDeck(6) policy_params = {'policy': WEB_POLICY} a, b, c = play_game(b, [100], fixed_policy, **policy_params) print(a, b, c)
def dealer_hand(cards: list, deck: BjDeck) -> list: count = count_hand(cards) while count < 17: cards.append(deck.deal()) count = count_hand(cards) return cards
def hit(self, deck: BjDeck): self.cards.append(deck.deal()) self.count = count_hand(self.cards)