class OthelloGame: def __init__(self, net, ai_side, Tau=0, mcts_times=100): self.ai_player = AiPlayer(net, ai_side, Tau, mcts_times) self.game = Othello() self.ai_side = ai_side def playgame(self): side = -1 while not self.game.game_over(): self.game.print_board(side) print('score: ',self.game.getScore()) if len(self.game.possible_moves(side))!=0: if (side == self.ai_side): self.ai_player.get_move(self.game) else: while True: try: x, y = input("输入落子位置:").split() print(x, y) x, y = int(x), int(y) if (x, y) in self.game.possible_moves(side): self.game.play_move(x, y, side) break except Exception as e: print("输入错误, 重试", e) else: print("No where todo") side = -side print(self.game.getScore())
def run_games(config): game = Othello() model = "" x = config.iterations while(x != 0): x -= 1 models = sorted(glob.glob(config.data.model_location+"*.h5")) if model == "": model = models[-1] print("Loading new model: %s" % util.getPlayerName(model)) ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move, model=model) elif models[-1] != model: model = models[-1] print("Loading new model: %s" % util.getPlayerName(model)) ai.load(model) start=time() for j in range(config.nb_game_in_file): util.print_progress_bar(j, config.nb_game_in_file, start=start) side = -1 turn = 1 while not game.game_over(): ai.tau = config.game.tau_1 if config.game.tau_swap < turn: ai.tau = config.game.tau_2 t = ai.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 ai.update_buffer(game.get_winner()) game.reset_board() #print("Average Game Time: ", (time()-start)/(config.nb_game_in_file)) util.print_progress_bar(config.nb_game_in_file, config.nb_game_in_file, start=start) save_games(config, ai.buffer) t.join()
class AppLogic(threading.Thread): def __init__(self, tk_root): self.root = tk_root threading.Thread.__init__(self) self.turn = 0 self.update = False self.x = -1 self.y = -1 self.start() def run(self): self.game_gui = Canvas(self.root, width=600, height=600, background='green') self.game_gui.bind("<Button-1>", self.click) self.game_gui.focus_set() self.game_gui.bind("<Key>", self.key) self.game_gui.pack() for i in range(1, 8): self.game_gui.create_line(0, i*75, 600, i*75) self.game_gui.create_line(i*75, 0, i*75, 600) self.pieces = [] for i in range(8): self.pieces.append([]) for j in range(8): self.pieces[i].append(self.game_gui.create_oval(i*75+5, j*75+5, (i+1)*75-5, (j+1)*75-5, fill="green", outline="green")) self.root.protocol("WM_DELETE_WINDOW", self.on_closing) self.root.resizable(0,0) self.running = True config = EvaluateConfig() tf_util.update_memory(config.gpu_mem_fraction) AIPlayer.create_if_nonexistant(config) self.game = Othello() if(random() > 0.5): self.human = 1 else: self.human = -1 ai = create_player(config.model_1, config) #print("You are playing against", config.model_1) #print("Playing games with %d simulations per move" % config.game.simulation_num_per_move) self.side = -1 self.draw_board() self.value = ai.evaluate(self.game, self.side) while self.running and not self.game.game_over(): #play move if self.side != self.human: self.value = ai.evaluate(self.game, self.side) self.root.title("Othello (Thinking of Move) Current Value: %0.2f (1 white wins, -1 black wins)" % self.value) self.root.config(cursor="wait") t = ai.pick_move(self.game, self.side) self.game.play_move(t[0], t[1], self.side) self.draw_board() self.side *= -1 self.value = ai.evaluate(self.game, self.side) else: if len(self.game.possible_moves(self.side)) == 0: self.side *= -1 continue if self.side == -1: color = "black" else: color = "white" self.root.title("Othello (Play as %s) Current Value: %0.2f (1 white wins, -1 black wins)" % (color, self.value)) self.root.config(cursor="") if self.update: self.update = False if (self.x, self.y) in self.game.possible_moves(self.side): self.game.play_move(self.x, self.y, self.side) self.draw_board() self.side *= -1 time.sleep(0.01) if self.human == self.game.get_winner(): self.root.title("Othello (You Win!)") elif self.game.get_winner() == 0: self.root.title("Othello (Its a draw!)") else: self.root.title("Othello (You Lose!)") def key(self, event): if event.char == "z": self.human *= -1 def click(self, event): self.game_gui.focus_set() if self.human == self.side and not self.update: if self.x != event.x//75 or self.y != event.y//75: self.update = True self.x = event.x//75 self.y = event.y//75 def on_closing(self): self.running = False self.root.destroy() def draw_board(self): for i in range(8): for j in range(8): if self.game.board[i, j] == 1: self.game_gui.itemconfig(self.pieces[i][j], fill="white") if self.game.board[i, j] == -1: self.game_gui.itemconfig(self.pieces[i][j], fill="black")
def run_games(config): game = Othello() model_1 = "" model_2 = "" p1, new_1 = create_player(config.model_1, model_1, config) p2, new_2 = create_player(config.model_2, model_2, config) i = len(glob.glob(config.data.model_location+"*.h5")) avg_wins = [] while True: i += 1 new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) while((config.model_1 == "newest" and new_1 == model_1) or (config.model_2 == "newest" and new_2 == model_2)): #print("Waiting on new model. Sleeping for 1 minute.") sleep(60) new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) model_1 = new_1 model_2 = new_2 wins = 0 losses = 0 ties = 0 print("Iteration %04d"%i) print("Playing %d games with %d simulations per move" % (config.game_num, config.game.simulation_num_per_move)) start=time() for j in range(config.game_num): util.print_progress_bar(j, config.game_num, start=start) side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 if config.model_1 != "random": p1.tau =tau if config.model_2 != "random": p2.tau = tau if j % 2 == 0: if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) else: if side == 1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == 0: ties += 1 elif j % 2 == 0 and game.get_winner() == -1: wins += 1 elif j % 2 == 1 and game.get_winner() == 1: wins += 1 else: losses += 1 game.reset_board() util.print_progress_bar(config.game_num, config.game_num, start=start) print("%s vs %s: (%0.2f%% wins|%0.2f%% ties|%0.2f%% losses) of %d games" % (config.model_1, config.model_2, 100*wins/config.game_num, 100*ties/config.game_num, 100*losses/config.game_num, config.game_num)) avg_wins.append(100*wins/config.game_num) if len(avg_wins) > config.rolling_avg_amount: avg_wins = avg_wins[-1*config.rolling_avg_amount:] print("Average Win Percent: %0.2f%%" % (sum(avg_wins)/float(len(avg_wins)))) if not (config.repeat_with_new_model and (config.model_1 == "newest" or config.model_2 == "newest")): break
def run_games(config): game = Othello() model_1 = "" model_2 = "" p1, new_1 = create_player(config.model_1, model_1, config) p2, new_2 = create_player(config.model_2, model_2, config) if config.model_1 == "newest" or config.model_2 == "newest": i = len(glob.glob(config.data.model_location+"*.h5"))-1 else: i = 0 avg_wins = [] while True: i += 1 new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) while((config.model_1 == "newest" and new_1 == model_1) or (config.model_2 == "newest" and new_2 == model_2)): #print("Waiting on new model. Sleeping for 1 minute.") sleep(60) new_1 = load_player(p1, config.model_1, model_1, config) new_2 = load_player(p2, config.model_2, model_2, config) model_1 = new_1 model_2 = new_2 wins = 0 losses = 0 ties = 0 print("Iteration %04d"%i) print("Playing games between %s and %s" % (config.model_1, config.model_2)) print("Playing %d games with %d simulations per move" % (config.game_num, config.game.simulation_num_per_move)) start=time() for j in range(config.game_num): util.print_progress_bar(j, config.game_num, start=start) side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 if config.model_1 != "random": p1.tau =tau if config.model_2 != "random": p2.tau = tau if j % 2 == 0: if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) else: if side == 1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == 0: ties += 1 savePerformance(config, model_1, model_2, 0, 1, 0) elif (j % 2 == 0 and game.get_winner() == -1) or (j % 2 == 1 and game.get_winner() == 1): wins += 1 savePerformance(config, model_1, model_2, 1, 0, 0) else: losses += 1 savePerformance(config, model_1, model_2, 0, 0, 1) game.reset_board() util.print_progress_bar(config.game_num, config.game_num, start=start) print("%s vs %s: (%0.2f%% wins|%0.2f%% ties|%0.2f%% losses) of %d games" % (config.model_1, config.model_2, 100*wins/config.game_num, 100*ties/config.game_num, 100*losses/config.game_num, config.game_num)) avg_wins.append(100*wins/config.game_num) if len(avg_wins) > config.rolling_avg_amount: avg_wins = avg_wins[-1*config.rolling_avg_amount:] print("Average Win Percent: %0.2f%%" % (sum(avg_wins)/float(len(avg_wins)))) if not (config.repeat_with_new_model and (config.model_1 == "newest" or config.model_2 == "newest")): break
def calc_ranking(config): models = sorted(glob.glob(config.data.model_location + "*.h5")) players = [] for i, model in enumerate(models): if i % config.model_skip == 0 or i == len(models): players.append(model) wtl = np.zeros((len(players), 3)) win_matrix = np.zeros((len(players), len(players))) game = Othello() king_index = len(players) - 1 king = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[king_index], tau=config.game.tau_1) challenger = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[0], tau=config.game.tau_1) total_games = (config.game_num_per_model * (len(players))) // 2 played_games = 0 start = time() print("Playing king of the hill with %d players and %d games per player" % (len(players), config.game_num_per_model)) if config.game_num_per_model < len(players): print( "We suggest that you increase games per player to be greater than players" ) for i in range(math.ceil(total_games / (len(players) - 1))): AIPlayer.clear() king_index = getKingIndex(win_matrix) if king_index == -1: king_index = (len(players) - 1) - i % len(players) msg = "No King Yet" else: msg = "King is " + os.path.basename( players[king_index]).split(".")[0] king.load(players[king_index]) if config.print_king: print(msg.ljust(90)) for j in range(len(players)): util.print_progress_bar(played_games, total_games, start=start) if j == king_index: continue challenger.load(players[j]) if random.random() < 0.5: king_side = -1 p1 = king p2 = challenger else: king_side = 1 p1 = challenger p2 = king side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 p1.tau = tau p2.tau = tau if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == king_side: win_matrix[king_index, j] += 1 wtl[king_index, 0] += 1 wtl[j, 2] += 1 elif game.get_winner() == -1 * king_side: win_matrix[j, king_index] += 1 wtl[king_index, 2] += 1 wtl[j, 0] += 1 else: win_matrix[king_index, j] += 0.5 win_matrix[j, king_index] += 0.5 wtl[king_index, 1] += 1 wtl[j, 1] += 1 game.reset_board() played_games += 1 if played_games == total_games: break util.print_progress_bar(total_games, total_games, start=start) try: params = choix.ilsr_pairwise_dense(win_matrix) print("\nRankings:") for i, player in enumerate(np.argsort(params)[::-1]): print( "%d. %s (expected %d) with %0.2f rating and results of %d-%d-%d" % (i + 1, os.path.basename(players[player]).split(".")[0], len(players) - player, params[player], wtl[player, 0], wtl[player, 1], wtl[player, 2])) print( "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)" ) except Exception: print("\n Not Enough data to calculate rankings") print("\nWin Matrix:") print(win_matrix) print("\nResults:") for player in range(win_matrix.shape[0]): print("%s results of %d-%d-%d" % (os.path.basename(players[player]).split(".")[0], wtl[player, 0], wtl[player, 1], wtl[player, 2]))
class AppLogic(threading.Thread): def __init__(self, tk_root): self.root = tk_root threading.Thread.__init__(self) self.turn = 0 self.update = False self.x = -1 self.y = -1 self.start() def run(self): self.game_gui = Canvas(self.root, width=600, height=600, background='green') self.game_gui.bind("<Button-1>", self.click) self.game_gui.focus_set() self.game_gui.bind("<Key>", self.key) self.game_gui.pack() for i in range(1, 8): self.game_gui.create_line(0, i*75, 600, i*75) self.game_gui.create_line(i*75, 0, i*75, 600) self.pieces = [] for i in range(8): self.pieces.append([]) for j in range(8): self.pieces[i].append(self.game_gui.create_oval(i*75+5, j*75+5, (i+1)*75-5, (j+1)*75-5, fill="green", outline="green")) self.root.protocol("WM_DELETE_WINDOW", self.on_closing) self.root.resizable(0,0) self.running = True config = EvaluateConfig() tf_util.update_memory(config.gpu_mem_fraction) AIPlayer.create_if_nonexistant(config) self.game = Othello() if(random() > 0.5): self.human = 1 else: self.human = -1 ai = create_player(config.model_1, config) #print("You are playing against", config.model_1) #print("Playing games with %d simulations per move" % config.game.simulation_num_per_move) self.side = -1 self.draw_board() self.value = ai.evaluate(self.game, self.side) while self.running and not self.game.game_over(): #play move if self.side != self.human: self.value = ai.evaluate(self.game, self.side) self.root.title("Othello (Thinking of Move) Current Value: %0.2f (1 white wins, -1 black wins)" % self.value) self.root.config(cursor="wait") t = ai.pick_move(self.game, self.side) self.game.play_move(t[0], t[1], self.side) self.draw_board() self.side *= -1 self.value = ai.evaluate(self.game, self.side) else: if len(self.game.possible_moves(self.side)) == 0: self.side *= -1 continue if self.side == -1: color = "black" else: color = "white" self.root.title("Othello (Play as %s) Current Value: %0.2f (1 white wins, -1 black wins)" % (color, self.value)) self.root.config(cursor="") if self.update: self.update = False if (self.x, self.y) in self.game.possible_moves(self.side): self.game.play_move(self.x, self.y, self.side) self.draw_board() self.side *= -1 time.sleep(0.01) self.root.config(cursor="") if self.human == self.game.get_winner(): self.root.title("Othello (You Win!)") elif self.game.get_winner() == 0: self.root.title("Othello (Its a draw!)") else: self.root.title("Othello (You Lose!)") def key(self, event): if event.char == "z": self.human *= -1 def click(self, event): self.game_gui.focus_set() if self.human == self.side and not self.update: if self.x != event.x//75 or self.y != event.y//75: self.update = True self.x = event.x//75 self.y = event.y//75 def on_closing(self): self.running = False self.root.destroy() def draw_board(self): for i in range(8): for j in range(8): if self.game.board[i, j] == 1: self.game_gui.itemconfig(self.pieces[i][j], fill="white") if self.game.board[i, j] == -1: self.game_gui.itemconfig(self.pieces[i][j], fill="black")
def calc_ranking(config): models = sorted(glob.glob(config.data.model_location + "*.h5")) players = [] for i, model in enumerate(models): if i % config.model_skip == 0 or i == len(models): players.append(model) wtl = np.zeros((len(players), 3)) win_matrix = np.zeros((len(players), len(players))) game = Othello() ##give every player a random order to play games against opponents order = [] for i in range(len(players)): nums = [x for x in range(len(players))] nums.remove(i) random.shuffle(nums) order.append(nums) p1 = AIPlayer(1, config.game.simulation_num_per_move, model=players[0]) p2 = AIPlayer(1, config.game.simulation_num_per_move, model=players[order[0][0]]) start = time() print( "Playing random round robin with %d players and %d games per player" % (len(players), config.game_num_per_model)) for i in range(config.game_num_per_model // 2): util.print_progress_bar(i, config.game_num_per_model // 2, start=start) ordering = [x for x in range(len(players))] random.shuffle(ordering) for j in ordering: AIPlayer.clear() x = i if x >= len(order[j]): x %= len(order[j]) if x == 0: random.shuffle(order[j]) p1.load(players[j]) p2.load(players[order[j][x]]) side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 p1.tau = tau p2.tau = tau if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == -1: win_matrix[j, order[j][x]] += 1 wtl[j, 0] += 1 wtl[order[j][x], 2] += 1 elif game.get_winner() == 1: win_matrix[order[j][x], j] += 1 wtl[j, 2] += 1 wtl[order[j][x], 0] += 1 else: win_matrix[j, order[j][x]] += 0.5 win_matrix[order[j][x], j] += 0.5 wtl[j, 1] += 1 wtl[order[j][x], 1] += 1 game.reset_board() util.print_progress_bar(config.game_num_per_model // 2, config.game_num_per_model // 2, start=start) params = choix.ilsr_pairwise_dense(win_matrix) print("\nRankings:") for i, player in enumerate(np.argsort(params)[::-1]): print( "%d. %s (expected %d) with %0.2f rating and results of %d-%d-%d" % (i + 1, os.path.basename(players[player]), len(players) - player, params[player], wtl[player, 0], wtl[player, 1], wtl[player, 2])) print( "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)" )
def calc_ranking(config): models = sorted(glob.glob(config.data.model_location+"*.h5")) players = [] for i, model in enumerate(models): if i % config.model_skip == 0 or i == len(models): players.append(model) wtl = np.zeros((len(players), len(players), 3)) win_matrix = np.zeros((len(players),len(players))) game = Othello() challenger1 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[-1], tau=config.game.tau_1) challenger2 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[0], tau=config.game.tau_1) total_games = (config.game_num_per_model * (len(players)))//2 played_games = 0 finished = False start = time() print("Ranking with %d players and %d games per player" % (len(players), config.game_num_per_model)) if config.game_num_per_model < len(players): print("We suggest that you increase games per player to be greater than players") for i in itertools.count(): ranks = getRankings(win_matrix) if len(ranks) == 0: msg = "No Clear Best Yet" else: msg = "Current Best is "+util.getPlayerName(players[ranks[-1]]) if config.print_best: print(msg.ljust(90)) for j in range(len(players)): util.print_progress_bar(played_games, total_games, start=start) challenger1_index = getLeastPlayed(win_matrix, j) AIPlayer.clear() challenger1.load(players[challenger1_index]) challenger2.load(players[j]) if random.random() < 0.5: challenger1_side = -1 p1 = challenger1 p2 = challenger2 else: challenger1_side = 1 p1 = challenger2 p2 = challenger1 side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 p1.tau = tau p2.tau = tau if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == challenger1_side: win_matrix[challenger1_index,j] += 1 wtl[challenger1_index, j,0] += 1 elif game.get_winner() == -1*challenger1_side: win_matrix[j, challenger1_index] += 1 wtl[challenger1_index, j,2] += 1 else: win_matrix[challenger1_index,j] += 0.5 win_matrix[j, challenger1_index] += 0.5 wtl[challenger1_index, j, 1] += 1 game.reset_board() played_games += 1 if played_games >= total_games: finished = True break saveWTL(config, players, wtl) wtl = np.zeros((len(players), len(players), 3)) if finished: break util.print_progress_bar(total_games, total_games, start=start) print("\n",[util.getPlayerName(player) for player in players]) print("\nWin Matrix(row beat column):") print(win_matrix) try: with np.errstate(divide='ignore', invalid='ignore'): params = choix.ilsr_pairwise_dense(win_matrix) print("\nRankings:") for i, player in enumerate(np.argsort(params)[::-1]): print("%d. %s (expected %d) with %0.2f rating"% (i+1, util.getPlayerName(players[player]), len(players)-player, params[player])) print("\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)") except Exception: print("\nNot Enough data to calculate rankings")
def calc_ranking(config): models = sorted(glob.glob(config.data.model_location + "*.h5")) players = [] for i, model in enumerate(models): if i % config.model_skip == 0 or i == len(models): players.append(model) wtl = np.zeros((len(players), len(players), 3)) win_matrix = np.zeros((len(players), len(players))) game = Othello() challenger1 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[-1], tau=config.game.tau_1) challenger2 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[0], tau=config.game.tau_1) total_games = (config.game_num_per_model * (len(players))) // 2 played_games = 0 finished = False start = time() print("Ranking with %d players and %d games per player" % (len(players), config.game_num_per_model)) if config.game_num_per_model < len(players): print( "We suggest that you increase games per player to be greater than players" ) for i in itertools.count(): ranks = getRankings(win_matrix) if len(ranks) == 0: msg = "No Clear Best Yet" else: msg = "Current Best is " + util.getPlayerName(players[ranks[-1]]) if config.print_best: print(msg.ljust(90)) for j in range(len(players)): util.print_progress_bar(played_games, total_games, start=start) challenger1_index = getLeastPlayed(win_matrix, j) AIPlayer.clear() challenger1.load(players[challenger1_index]) challenger2.load(players[j]) if random.random() < 0.5: challenger1_side = -1 p1 = challenger1 p2 = challenger2 else: challenger1_side = 1 p1 = challenger2 p2 = challenger1 side = -1 turn = 1 while not game.game_over(): tau = config.game.tau_1 if config.game.tau_swap < turn: tau = config.game.tau_2 p1.tau = tau p2.tau = tau if side == -1: t = p1.pick_move(game, side) else: t = p2.pick_move(game, side) game.play_move(t[0], t[1], side) side *= -1 turn += 1 if game.get_winner() == challenger1_side: win_matrix[challenger1_index, j] += 1 wtl[challenger1_index, j, 0] += 1 elif game.get_winner() == -1 * challenger1_side: win_matrix[j, challenger1_index] += 1 wtl[challenger1_index, j, 2] += 1 else: win_matrix[challenger1_index, j] += 0.5 win_matrix[j, challenger1_index] += 0.5 wtl[challenger1_index, j, 1] += 1 game.reset_board() played_games += 1 if played_games >= total_games: finished = True break saveWTL(config, players, wtl) wtl = np.zeros((len(players), len(players), 3)) if finished: break util.print_progress_bar(total_games, total_games, start=start) print("\n", [util.getPlayerName(player) for player in players]) print("\nWin Matrix(row beat column):") print(win_matrix) try: with np.errstate(divide='ignore', invalid='ignore'): params = choix.ilsr_pairwise_dense(win_matrix) print("\nRankings:") for i, player in enumerate(np.argsort(params)[::-1]): print("%d. %s (expected %d) with %0.2f rating" % (i + 1, util.getPlayerName( players[player]), len(players) - player, params[player])) print( "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)" ) except Exception: print("\nNot Enough data to calculate rankings")
parser.add_argument("--iter", type=int, default=100) parser.add_argument("--start_iter", type=int) parser.add_argument("--log_dir", type=str, default="./logDQN.txt") args = parser.parse_args() log = open(args.log_dir,'a+',encoding='utf8') dqn = DQN() if args.start_iter: dqn.load("./model",args.start_iter) else: args.start_iter = -1 side = -1 for i_episode in range(args.start_iter+1,args.iter): game = Othello() while not game.game_over(): s = convert_board_to_feature(game.board, side) a = dqn.choose_action(s, side) game.play_move(a[0],a[1],side) s_p = convert_board_to_feature(game.board, side) # MCTS subGame = game.copy() subSide = side rp1 = RandomPlayer(side) rp2 = RandomPlayer(-side) while not subGame.game_over(): subGame.play_move(*rp2.pick_move(subGame),-side) subGame.play_move(*rp1.pick_move(subGame),side) winner = subGame.get_winner() # 注意,这个实现不区分执子方,只要记录s和a对应的r和s_p即可 if winner == side:
def self_play(i, net): # print("Begin %d process..." % i) st = time.time() net.optimizer.zero_grad() batch_size = 128 state_data = [] game = Othello() mctsTest = MCTS(net, 1000) mctsTest.virtualLoss(game) side = -1 Tau = 1 while not game.game_over(): # print(i) # game.print_board(side) game.board *= -side probs = mctsTest.search(game, Tau) # Tau *= 0.9 state_data.append([game.board.copy(), probs, side]) # print(probs) if np.sum(probs) > 0: action = np.sum(np.random.rand() > np.cumsum(probs)) # action = np.argmax(probs) game.board *= -side game.play_move(*convert_mv_ind_to_tuple(action), side) else: game.play_move(-1, -1, -1) side = -side # print("finish search ", i) winner = game.get_winner() # print(winner) for state, _ in enumerate(state_data): state_data[state][2] *= -winner expand_data = [] for s in state_data: # print("------------------------") # print('board: ') # print(s[0], type(s[0]), s[0].shape) # print('probs: ') # print(s[1], type(s[1]), s[1].shape) # print('side: ') # print(s[2]) for func_index in np.random.permutation(7)[:2]: expand_data.append(expand_func(s[0], s[1], s[2], func_index)) # print("=======================") # print(s[0], s[1], s[2]) # print(expand_data[-1]) # print('s',i) np.random.shuffle(expand_data) batch_data = np.concatenate( [state_data, expand_data[:batch_size - len(state_data)]], axis=0) inputs = np.concatenate(batch_data[:, 0]).reshape(-1, 8, 8)[:, np.newaxis, :, :] rollout_prob = np.concatenate(batch_data[:, 1]).reshape(-1, 64) labels = batch_data[:, 2] # print('b',i) # for kkk in range(1000): my_probs, my_value = net(inputs) # print('aa',i) # print(my_value) loss = loss_fn(my_value, labels, my_probs, rollout_prob) net.optimizer.zero_grad() # clear gradients for next train loss.backward(retain_graph=True) net.optimizer.step() # print('lllllllllll.lllllllllllllllllllll',kkk, float(loss)) # print('kk',i) ed = time.time() print("%6d game, time=%4.4fs, loss = %5.5f" % (i, ed - st, float(loss))) return inputs, rollout_prob, labels