def dealer_stats_for_given_hand(games: int = 1000000, decks: int = 6) -> tuple: dealer_stats = {} for i in range(1, 11): dealer_stats[str(i)] = { '17': 0, '18': 0, '19': 0, '20': 0, '21': 0, 'F': 0 } deck = BjDeck(decks) for _ in range(games): deck.start_game() start_hand = deck.deal() dealer = [start_hand] d_cards = dealer_hand(dealer, deck) count = count_hand(d_cards) if count > 21: count = 'F' dealer_stats[str(start_hand)][str( count)] = dealer_stats[str(start_hand)][str(count)] + 1. raw_count = pd.DataFrame(dealer_stats) raw_count.index.name = 'end count' raw_count.columns.name = 'start hand' total = raw_count / raw_count.sum(axis=0) return raw_count, total
def player_win_rates_for_start_hands(games: int = 1000000, decks: int = 6) -> tuple: player_stats = {} deck = BjDeck(decks) policy_params = {'policy': WEB_POLICY} for _ in range(games): deck.start_game() hand = [deck.deal()] dealer = [deck.deal()] dealer_index = str(dealer[0]) hand.append(deck.deal()) hand_index = convert_hand_to_index(hand) policy_params['dealer_card'] = dealer[0] curr_hand = play_hand(hand, dealer[0], deck, 1, fixed_policy, **policy_params) d_cards = dealer_hand(dealer, deck) results = rewards(curr_hand, d_cards) for result in results: if result > 0: end_game = 'win' elif result < 0: end_game = 'lost' else: end_game = 'equal' index = f'{hand_index}_{dealer_index}' if player_stats.get(index): player_stats[index][ end_game] = player_stats[index][end_game] + 1 player_stats[index]['total'] = player_stats[index]['total'] + 1 else: player_stats[index] = { 'win': 0, 'lost': 0, 'equal': 0, 'total': 1, 'player_hand': hand_index, 'dealer': dealer_index } player_stats[index][end_game] = 1 raw_count = pd.DataFrame(player_stats) raw_count.index.name = 'Results' raw_count.columns.name = 'Player hand_Dealer hand' total = raw_count.apply(get_percents) return raw_count, total
def player_stats(games: int = 1000000, decks: int = 6) -> tuple: p_stats = {} deck = BjDeck(decks) for _ in range(games): deck.start_game() hand = [deck.deal()] dealer = [deck.deal()] hand.append(deck.deal()) hand_index = convert_hand_to_index(hand) policy_params = {'dealer_card': dealer[0], 'policy': WEB_POLICY} curr_hand = play_hand(hand, dealer[0], deck, 1, fixed_policy, **policy_params) for hand in curr_hand: fill_stats(hand, hand_index, p_stats) raw_count = pd.DataFrame(p_stats) raw_count.index.name = 'end count' raw_count.columns.name = 'start hand' total = raw_count / raw_count.sum(axis=0) return raw_count, total
def main(): b = BjDeck(6) start_money = 0 end_money = start_money games = 10000 policy_params = {'policy': WEB_POLICY} for i in range(games): print(i) b.start_game() final_hand, d_hand, return_money = play_game( b, [100, 100, 100, 100, 100, 100], fixed_policy, **policy_params) if final_hand is None or return_money is None: print("bad input") if len(final_hand) < 3 or len(return_money) < 3: print("bad input sum") for hand in final_hand: print(hand.cards) print("dealer", d_hand) print(return_money) end_money = end_money + sum(return_money) print("money", end_money) print(end_money) print("average win/lost per game: ", (end_money - start_money) / games)
def test_states(): markov = MDP(HAND_OPTIONS) policy_params = {'actions': markov.actions, 'actions_prob': markov.actions_probs, 'threshold': 90} deck = BjDeck(6) start_money = 0 end_money = start_money for games in range(10000): deck.start_game() final_hand, d_hand, rewards = play_game(deck, [1], random_policy, **policy_params) end_money = end_money + rewards[0] markov.add_game(final_hand[0], rewards[0]) curr_win = (end_money - start_money) / games print("average win/lost per game: ", curr_win) past_win = -1000 while(curr_win > past_win): past_win = curr_win markov.update_probs() start_money = 0 end_money = start_money policy_params = {'actions': markov.actions, 'actions_prob': markov.actions_probs, 'threshold': 90} for games in range(100000): deck.start_game() final_hand, d_hand, rewards = play_game( deck, [1], probability_based_policy, **policy_params) end_money = end_money + rewards[0] markov.add_game(final_hand[0], rewards[0]) curr_win = (end_money - start_money) / games print("average win/lost per game: ", curr_win) print(markov.actions_probs)