def train(debug, iterations, table): game = TicTacToe() ai1 = Agent(True, table) ai2 = Agent(True, table) try: while True: if iterations == 0: table.save_q_table() break if iterations > 0: #if iterations < 50: # debug = True iterations -= 1 if iterations % 100_000 == 0: print(iterations) print("q_table len", len(table.table)) if game.is_board_full() or game.get_winner(): if game.get_winner() is Player.ONE: ai1.reward(1) ai2.reward(0) elif game.get_winner() is Player.TWO: ai1.reward(0) ai2.reward(1) else: ai1.reward(0.1) ai2.reward(0.5) game.reset() ai1.reset_history() ai2.reset_history() continue ai1.iterate(game, print_q=debug) if game.get_player( ) == Player.ONE else ai2.iterate(game) if debug: print() print(game.get_hash()) game.print_board() time.sleep(0.1) continue except KeyboardInterrupt: table.save_q_table() exit()
table, )) processes.append(p) p.start() for process in processes: process.join() else: game = TicTacToe() ai1 = Agent(True, table) table.load_q_table() while True: print() game.print_board() if game.is_board_full() or game.get_winner(): if game.get_winner(): print(game.get_winner(), "has won!") else: print("It's a tie!") restart = input("Do you want to restart? (Y/n) ") if restart == "n": break else: game.reset() continue if game.get_player() is Player.TWO: field = input( str(game.get_player(human=True)) + " it's your turn! Input a field between 1-9: ")