import numpy as np import tkinter as tk import copy import pickle from Q_Learning_Tic_Tac_Toe import Game, QPlayer, RandomPlayer # Classes used for Tic Tac Toe root = tk.Tk() epsilon = 2e-2 lr = 10e-3 player1 = QPlayer(mark="X", epsilon=epsilon, learningRate=lr) ### add parameters! player2 = RandomPlayer(mark="O") game = Game(root, player1, player2) #,board_size=5,streak_size=4) N_episodes = 1000 for episodes in range(N_episodes): game.play() game.reset() Q = game.Q filename = "Q_epsilon_09_Nepisodes_{}.p".format(N_episodes) pickle.dump(Q, open(filename, "wb"))
ws1 = wb.active ws1.title = "Training Results" i = 1 from Q_Learning_Tic_Tac_Toe import Game, HumanPlayer, QPlayer, Board board = Board() Q = pickle.load(open("Q_epsilon_09_Nepisodes_20000.p", "rb")) root = tk.Tk() player1 = QPlayer(mark="X", epsilon=0.01) player2 = QPlayer(mark="O", epsilon=0.01) game = Game(root, player1, player2, Q=Q) N_episodes = 20000 for episodes in range(N_episodes): game.play() game.reset() if game.board.winner() is None: ws1.cell(row=i + 1, column=1, value=0) else: if game.current_player.mark == "X": ws1.cell(row=i + 1, column=1, value=1) elif game.current_player.mark == "O": ws1.cell(row=i + 1, column=1, value=-1) i += 1
import numpy as np import tkinter as tk import copy from tqdm import tqdm import pickle from Q_Learning_Tic_Tac_Toe import Game, QPlayer # Classes used for Tic Tac Toe root = tk.Tk() epsilon = 0.9 player1 = QPlayer(mark="X", epsilon=epsilon) player2 = QPlayer(mark="O", epsilon=epsilon) game = Game(root, player1, player2) N_episodes = 200000 for episodes in tqdm(range(N_episodes)): game.play() game.reset(log=False) Q = game.Q filename = "Q_epsilon_09_Nepisodes_{}.p".format(N_episodes) pickle.dump(Q, open(filename, "wb"))