示例#1
0
 def __init__(self, explore_policy='constant epsilon', eps=0.05, natural=False,
              eps_decay=0.9999, show_every=1000000, evaluate_iter=1000, temp_decay=0.9999, init_temp=20):
     self.Q = self.initializeQ()
     self.winrates = []
     self.natural = natural
     self.show_every = show_every
     self.evaluate_iter = evaluate_iter
     self.env = blackjack.BlackjackEnv(natural=self.natural)
     self.n_sub_optimals = []
     self.min_eps = 0.001
     self.min_temp = 0.1
     if explore_policy == 'constant_epsilon':
         self.explore_policy = self.e_greedy
         self.eps = eps
         self.eps_decay = 1
         self.temp = init_temp
         self.temp_decay = 1
     elif explore_policy == 'decay_epsilon':
         self.explore_policy = self.e_greedy
         self.eps = 1
         self.eps_decay = eps_decay
         self.temp = init_temp
         self.temp_decay = 1
     elif explore_policy == 'boltzmann_exploration':
         self.explore_policy = self.boltzmann_exploration
         self.eps = 1
         self.eps_decay = 1
         self.temp = init_temp
         self.temp_decay = temp_decay
示例#2
0
    def __init__(self):
        # main model  # gets trained every step
        self.model = self.create_model()

        # Target model this is what we .predict against every step
        self.target_model = self.create_model()
        self.target_model.set_weights(self.model.get_weights())

        self.replay_memory = deque(maxlen=self.REPLAY_MEMORY_SIZE)
        # self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{self.MODEL_NAME}-{int(time.time())}")

        self.target_update_counter = 0

        self.env = blackjack.BlackjackEnv(natural=self.NATURAL)
        self.ep_rewards = []
        self.eps = 1
示例#3
0
    def evaluate_model(self, n_episodes):
        results = {-1: 0, 0: 0, 1: 0, 1.5: 0}
        q_table = self.get_q_table()
        policy = self.get_best_policy(q_table)
        game = blackjack.BlackjackEnv(natural=self.NATURAL)
        for i in range(n_episodes):
            state = game.reset()
            done = False

            while not done:
                if state[0] < 12:
                    new_state, reward, done, _ = game.step(1)
                else:
                    action = policy[state]
                    new_state, reward, done, _ = game.step(action)
                state = new_state
            results[reward] += 1

        winrate = (results[1] + results[1.5]) / n_episodes * 100
        print('\nWin Rate: {:.2f} % ({} games)'.format(winrate, n_episodes))
        n_sub_optimal = visualization.compare2Optimal(policy)
        print('Suboptimal Actions: {}/200\n'.format(n_sub_optimal))
示例#4
0
    def evaluate_policy(self):
        results = {-1: 0, 0: 0, 1: 0, 1.5: 0}
        policy = self.get_best_policy()
        game = blackjack.BlackjackEnv(natural=self.natural)
        for i in range(self.evaluate_iter):
            state = game.reset()
            done = False

            while not done:
                if state[0] < 12:
                    new_state, reward, done, _ = game.step(1)
                else:
                    action = policy[state]
                    new_state, reward, done, _ = game.step(action)
                state = new_state
            results[reward] += 1

        winrate = (results[1] + results[1.5]) / self.evaluate_iter * 100
        print('\nWin Rate: {:.2f} % ({} games)'.format(winrate,
                                                       self.evaluate_iter))
        n_sub_optimal = visualization.compare2Optimal(policy)
        print('Suboptimal Actions: {}/200\n'.format(n_sub_optimal))
        self.winrates.append(winrate)
示例#5
0
"""
    Gagan Heer A00933997
    Decision Making: Rule Based
    Please look over the README.md file if there is any trouble using this file
"""
import time
import gym
import blackjack as bj
import random

env = bj.BlackjackEnv()
STAND = 0
HIT = 1
numGames = 1001


# 1 = hit, 0 = stand
def rule_based_action(playerTotal, dealerCard):
    nextAction = None
    if playerTotal <= 11:
        nextAction = HIT
    elif playerTotal >= 17:
        nextAction = STAND
    elif dealerCard >= 7 or dealerCard == 1:
        nextAction = HIT
    elif (dealerCard <= 6 and dealerCard != 1) and playerTotal >= 13:
        nextAction = STAND
    elif dealerCard == 2 or dealerCard == 3:
        nextAction = HIT
    else:
        nextAction = STAND
            if done:
                if reward >= 1:
                    win += 1

                elif reward == 0:
                    tie += 1

                elif reward == -1:
                    loss += 1

                break

    # percentage of winning games
    return 100 * win / n_test, 100 * tie / n_test, 100 * loss/n_test



# TEST

# Results
print("WITH REPLACEMENT")
env = bj.BlackjackEnv(1000000)
results = main(algo='random', nb_games = 50000, txt = 'replacement.csv', txt_to_read = 'replacement.csv', n_test = 10000)
print("Wins:", results[0], "% || Ties:", results[1], "%  ||  Losses:", results[2], "%")
print("Espérance : " + str(results[0] - results[2]))
print("WITHOUT REPLACEMENT")
env = bj.BlackjackEnv(nb_deck)
results = main(algo='random', nb_games = 50000, txt = 'no_replacement.csv', txt_to_read = 'no_replacement.csv', n_test = 10000)
print("Wins:", results[0], "% || Ties:", results[1], "%  ||  Losses:", results[2], "%")
print("Espérance : " + str(results[0] - results[2]))