Exemplo n.º 1
0
class OthelloGame:
    def __init__(self, net, ai_side, Tau=0, mcts_times=100):
        self.ai_player = AiPlayer(net, ai_side, Tau, mcts_times)
        self.game = Othello()
        self.ai_side = ai_side

    def playgame(self):
        side = -1
        while not self.game.game_over():
            self.game.print_board(side)
            print('score: ',self.game.getScore())
            if len(self.game.possible_moves(side))!=0:
                if (side == self.ai_side):
                    self.ai_player.get_move(self.game)
                else:
                    while True:
                        try:
                            x, y = input("输入落子位置:").split()
                            print(x, y)
                            x, y = int(x), int(y)
                            if (x, y) in self.game.possible_moves(side):
                                self.game.play_move(x, y, side)
                                break
                        except Exception as e:
                            print("输入错误, 重试", e)
            else:
                print("No where todo")
            side = -side
        print(self.game.getScore())
def run_games(config):
    game = Othello()
    model = ""
    x = config.iterations
    while(x != 0):
        x -= 1
        models = sorted(glob.glob(config.data.model_location+"*.h5"))
        if model == "":
            model = models[-1]
            print("Loading new model: %s" % util.getPlayerName(model))
            ai = AIPlayer(config.buffer_size, config.game.simulation_num_per_move, model=model)
        elif models[-1] != model:
            model = models[-1]
            print("Loading new model: %s" % util.getPlayerName(model))
            ai.load(model)
		
        start=time()
        for j in range(config.nb_game_in_file):
            util.print_progress_bar(j, config.nb_game_in_file, start=start)
            side = -1
            turn = 1
            while not game.game_over():
                ai.tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    ai.tau = config.game.tau_2
                t = ai.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            ai.update_buffer(game.get_winner())
            game.reset_board()
        #print("Average Game Time: ", (time()-start)/(config.nb_game_in_file))
        util.print_progress_bar(config.nb_game_in_file, config.nb_game_in_file, start=start)
        save_games(config, ai.buffer)
    t.join()
Exemplo n.º 3
0
class AppLogic(threading.Thread):

    def __init__(self, tk_root):
        self.root = tk_root
        threading.Thread.__init__(self)
        self.turn = 0
        self.update = False
        self.x = -1
        self.y = -1
        self.start()
    
    def run(self):
        self.game_gui = Canvas(self.root, width=600, height=600, background='green')
        self.game_gui.bind("<Button-1>", self.click)
        self.game_gui.focus_set()
        self.game_gui.bind("<Key>", self.key)
        self.game_gui.pack()
        for i in range(1, 8):
            self.game_gui.create_line(0, i*75, 600, i*75)
            self.game_gui.create_line(i*75, 0, i*75, 600)
        
        self.pieces = []
        for i in range(8):
            self.pieces.append([])
            for j in range(8):
                self.pieces[i].append(self.game_gui.create_oval(i*75+5, j*75+5, (i+1)*75-5, (j+1)*75-5, fill="green", outline="green"))
        
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
        self.root.resizable(0,0)
        self.running = True
        config = EvaluateConfig()
        tf_util.update_memory(config.gpu_mem_fraction)
        AIPlayer.create_if_nonexistant(config)
        self.game = Othello()
        if(random() > 0.5):
            self.human = 1
        else:
            self.human = -1
        
        ai = create_player(config.model_1, config)
        #print("You are playing against", config.model_1)
        #print("Playing games with %d simulations per move" % config.game.simulation_num_per_move)
        self.side = -1
        self.draw_board()
        self.value = ai.evaluate(self.game, self.side)
        while self.running and not self.game.game_over():
            #play move
            if self.side != self.human:
                self.value = ai.evaluate(self.game, self.side)
                self.root.title("Othello (Thinking of Move) Current Value: %0.2f (1 white wins, -1 black wins)" % self.value)
                self.root.config(cursor="wait")
                t = ai.pick_move(self.game, self.side)
                self.game.play_move(t[0], t[1], self.side)
                self.draw_board()
                self.side *= -1
                self.value = ai.evaluate(self.game, self.side)
            else:
                if len(self.game.possible_moves(self.side)) == 0:
                    self.side *= -1
                    continue
                if self.side == -1:
                    color = "black"
                else:
                    color = "white"
                self.root.title("Othello (Play as %s) Current Value: %0.2f (1 white wins, -1 black wins)" % (color, self.value))
                self.root.config(cursor="")
                if self.update:
                    self.update = False
                    if (self.x, self.y) in self.game.possible_moves(self.side):
                        self.game.play_move(self.x, self.y, self.side)
                        self.draw_board()
                        self.side *= -1
            time.sleep(0.01)
        if self.human == self.game.get_winner():
            self.root.title("Othello (You Win!)")
        elif self.game.get_winner() == 0:
            self.root.title("Othello (Its a draw!)")
        else:
            self.root.title("Othello (You Lose!)")

    def key(self, event):
        if event.char == "z":
            self.human *= -1

    def click(self, event):
        self.game_gui.focus_set()
        if self.human == self.side and not self.update:
            if self.x != event.x//75 or self.y != event.y//75:
                self.update = True
                self.x = event.x//75
                self.y = event.y//75
    
    def on_closing(self):
        self.running = False
        self.root.destroy()

    def draw_board(self):    
        for i in range(8):
            for j in range(8):
                if self.game.board[i, j] == 1:
                    self.game_gui.itemconfig(self.pieces[i][j], fill="white")
                if self.game.board[i, j] == -1:
                    self.game_gui.itemconfig(self.pieces[i][j], fill="black")
Exemplo n.º 4
0
def run_games(config):
    game = Othello()
    model_1 = ""
    model_2 = ""
    p1, new_1 = create_player(config.model_1, model_1, config)
    p2, new_2 = create_player(config.model_2, model_2, config)
    i = len(glob.glob(config.data.model_location+"*.h5"))
    avg_wins = []
    while True:
        i += 1
        new_1 = load_player(p1, config.model_1, model_1, config)
        new_2 = load_player(p2, config.model_2, model_2, config)
        while((config.model_1 == "newest" and new_1 == model_1) or (config.model_2 == "newest" and new_2 == model_2)):
            #print("Waiting on new model. Sleeping for 1 minute.")
            sleep(60)
            new_1 = load_player(p1, config.model_1, model_1, config)
            new_2 = load_player(p2, config.model_2, model_2, config)
        model_1 = new_1
        model_2 = new_2
        wins = 0
        losses = 0
        ties = 0
        print("Iteration %04d"%i)
        print("Playing %d games with %d simulations per move" % (config.game_num, config.game.simulation_num_per_move))
        start=time()
        for j in range(config.game_num):
            util.print_progress_bar(j, config.game_num, start=start)
            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                if config.model_1 != "random":
                    p1.tau =tau
                if config.model_2 != "random":
                    p2.tau = tau
                if j % 2 == 0:
                    if side == -1:
                        t = p1.pick_move(game, side)
                    else:
                        t = p2.pick_move(game, side)
                else:
                    if side == 1:
                        t = p1.pick_move(game, side)
                    else:
                        t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == 0:
                ties += 1
            elif j % 2 == 0 and game.get_winner() == -1:
                wins += 1
            elif j % 2 == 1 and game.get_winner() == 1:
                wins += 1
            else:
                losses += 1
            game.reset_board()
        util.print_progress_bar(config.game_num, config.game_num, start=start)
        print("%s vs %s: (%0.2f%% wins|%0.2f%% ties|%0.2f%% losses) of %d games" % (config.model_1, config.model_2, 
              100*wins/config.game_num, 100*ties/config.game_num, 100*losses/config.game_num, config.game_num))
        avg_wins.append(100*wins/config.game_num)
        if len(avg_wins) > config.rolling_avg_amount:
            avg_wins = avg_wins[-1*config.rolling_avg_amount:]
        print("Average Win Percent: %0.2f%%" % (sum(avg_wins)/float(len(avg_wins))))
        if not (config.repeat_with_new_model and (config.model_1 == "newest" or config.model_2 == "newest")):
            break
Exemplo n.º 5
0
def run_games(config):
    game = Othello()
    model_1 = ""
    model_2 = ""
    p1, new_1 = create_player(config.model_1, model_1, config)
    p2, new_2 = create_player(config.model_2, model_2, config)
    if config.model_1 == "newest" or config.model_2 == "newest":
        i = len(glob.glob(config.data.model_location+"*.h5"))-1
    else:
        i = 0
    avg_wins = []
    while True:
        i += 1
        new_1 = load_player(p1, config.model_1, model_1, config)
        new_2 = load_player(p2, config.model_2, model_2, config)
        while((config.model_1 == "newest" and new_1 == model_1) or (config.model_2 == "newest" and new_2 == model_2)):
            #print("Waiting on new model. Sleeping for 1 minute.")
            sleep(60)
            new_1 = load_player(p1, config.model_1, model_1, config)
            new_2 = load_player(p2, config.model_2, model_2, config)
        model_1 = new_1
        model_2 = new_2
        wins = 0
        losses = 0
        ties = 0
        print("Iteration %04d"%i)
        print("Playing games between %s and %s" % (config.model_1, config.model_2))
        print("Playing %d games with %d simulations per move" % (config.game_num, config.game.simulation_num_per_move))
        start=time()
        for j in range(config.game_num):
            util.print_progress_bar(j, config.game_num, start=start)
            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                if config.model_1 != "random":
                    p1.tau =tau
                if config.model_2 != "random":
                    p2.tau = tau
                if j % 2 == 0:
                    if side == -1:
                        t = p1.pick_move(game, side)
                    else:
                        t = p2.pick_move(game, side)
                else:
                    if side == 1:
                        t = p1.pick_move(game, side)
                    else:
                        t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == 0:
                ties += 1
                savePerformance(config, model_1, model_2, 0, 1, 0)
            elif (j % 2 == 0 and game.get_winner() == -1) or (j % 2 == 1 and game.get_winner() == 1):
                wins += 1
                savePerformance(config, model_1, model_2, 1, 0, 0)
            else:
                losses += 1
                savePerformance(config, model_1, model_2, 0, 0, 1)
            game.reset_board()
        util.print_progress_bar(config.game_num, config.game_num, start=start)
        print("%s vs %s: (%0.2f%% wins|%0.2f%% ties|%0.2f%% losses) of %d games" % (config.model_1, config.model_2, 
              100*wins/config.game_num, 100*ties/config.game_num, 100*losses/config.game_num, config.game_num))
        avg_wins.append(100*wins/config.game_num)
        if len(avg_wins) > config.rolling_avg_amount:
            avg_wins = avg_wins[-1*config.rolling_avg_amount:]
        print("Average Win Percent: %0.2f%%" % (sum(avg_wins)/float(len(avg_wins))))
        
        if not (config.repeat_with_new_model and (config.model_1 == "newest" or config.model_2 == "newest")):
            break
Exemplo n.º 6
0
def calc_ranking(config):
    models = sorted(glob.glob(config.data.model_location + "*.h5"))
    players = []
    for i, model in enumerate(models):
        if i % config.model_skip == 0 or i == len(models):
            players.append(model)

    wtl = np.zeros((len(players), 3))
    win_matrix = np.zeros((len(players), len(players)))
    game = Othello()

    king_index = len(players) - 1
    king = AIPlayer(0,
                    config.game.simulation_num_per_move,
                    train=False,
                    model=players[king_index],
                    tau=config.game.tau_1)
    challenger = AIPlayer(0,
                          config.game.simulation_num_per_move,
                          train=False,
                          model=players[0],
                          tau=config.game.tau_1)
    total_games = (config.game_num_per_model * (len(players))) // 2
    played_games = 0
    start = time()
    print("Playing king of the hill with %d players and %d games per player" %
          (len(players), config.game_num_per_model))
    if config.game_num_per_model < len(players):
        print(
            "We suggest that you increase games per player to be greater than players"
        )
    for i in range(math.ceil(total_games / (len(players) - 1))):
        AIPlayer.clear()
        king_index = getKingIndex(win_matrix)
        if king_index == -1:
            king_index = (len(players) - 1) - i % len(players)
            msg = "No King Yet"
        else:
            msg = "King is " + os.path.basename(
                players[king_index]).split(".")[0]
        king.load(players[king_index])
        if config.print_king:
            print(msg.ljust(90))
        for j in range(len(players)):
            util.print_progress_bar(played_games, total_games, start=start)

            if j == king_index:
                continue

            challenger.load(players[j])

            if random.random() < 0.5:
                king_side = -1
                p1 = king
                p2 = challenger
            else:
                king_side = 1
                p1 = challenger
                p2 = king
            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                p1.tau = tau
                p2.tau = tau
                if side == -1:
                    t = p1.pick_move(game, side)
                else:
                    t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == king_side:
                win_matrix[king_index, j] += 1
                wtl[king_index, 0] += 1
                wtl[j, 2] += 1
            elif game.get_winner() == -1 * king_side:
                win_matrix[j, king_index] += 1
                wtl[king_index, 2] += 1
                wtl[j, 0] += 1
            else:
                win_matrix[king_index, j] += 0.5
                win_matrix[j, king_index] += 0.5
                wtl[king_index, 1] += 1
                wtl[j, 1] += 1
            game.reset_board()
            played_games += 1
            if played_games == total_games:
                break
    util.print_progress_bar(total_games, total_games, start=start)
    try:
        params = choix.ilsr_pairwise_dense(win_matrix)
        print("\nRankings:")
        for i, player in enumerate(np.argsort(params)[::-1]):
            print(
                "%d. %s (expected %d) with %0.2f rating and results of %d-%d-%d"
                % (i + 1, os.path.basename(players[player]).split(".")[0],
                   len(players) - player, params[player], wtl[player, 0],
                   wtl[player, 1], wtl[player, 2]))
        print(
            "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)"
        )
    except Exception:
        print("\n Not Enough data to calculate rankings")
        print("\nWin Matrix:")
        print(win_matrix)
        print("\nResults:")
        for player in range(win_matrix.shape[0]):
            print("%s results of %d-%d-%d" %
                  (os.path.basename(players[player]).split(".")[0],
                   wtl[player, 0], wtl[player, 1], wtl[player, 2]))
Exemplo n.º 7
0
class AppLogic(threading.Thread):

    def __init__(self, tk_root):
        self.root = tk_root
        threading.Thread.__init__(self)
        self.turn = 0
        self.update = False
        self.x = -1
        self.y = -1
        self.start()
    
    def run(self):
        self.game_gui = Canvas(self.root, width=600, height=600, background='green')
        self.game_gui.bind("<Button-1>", self.click)
        self.game_gui.focus_set()
        self.game_gui.bind("<Key>", self.key)
        self.game_gui.pack()
        for i in range(1, 8):
            self.game_gui.create_line(0, i*75, 600, i*75)
            self.game_gui.create_line(i*75, 0, i*75, 600)
        
        self.pieces = []
        for i in range(8):
            self.pieces.append([])
            for j in range(8):
                self.pieces[i].append(self.game_gui.create_oval(i*75+5, j*75+5, (i+1)*75-5, (j+1)*75-5, fill="green", outline="green"))
        
        self.root.protocol("WM_DELETE_WINDOW", self.on_closing)
        self.root.resizable(0,0)
        self.running = True
        config = EvaluateConfig()
        tf_util.update_memory(config.gpu_mem_fraction)
        AIPlayer.create_if_nonexistant(config)
        self.game = Othello()
        if(random() > 0.5):
            self.human = 1
        else:
            self.human = -1
        
        ai = create_player(config.model_1, config)
        #print("You are playing against", config.model_1)
        #print("Playing games with %d simulations per move" % config.game.simulation_num_per_move)
        self.side = -1
        self.draw_board()
        self.value = ai.evaluate(self.game, self.side)
        while self.running and not self.game.game_over():
            #play move
            if self.side != self.human:
                self.value = ai.evaluate(self.game, self.side)
                self.root.title("Othello (Thinking of Move) Current Value: %0.2f (1 white wins, -1 black wins)" % self.value)
                self.root.config(cursor="wait")
                t = ai.pick_move(self.game, self.side)
                self.game.play_move(t[0], t[1], self.side)
                self.draw_board()
                self.side *= -1
                self.value = ai.evaluate(self.game, self.side)
            else:
                if len(self.game.possible_moves(self.side)) == 0:
                    self.side *= -1
                    continue
                if self.side == -1:
                    color = "black"
                else:
                    color = "white"
                self.root.title("Othello (Play as %s) Current Value: %0.2f (1 white wins, -1 black wins)" % (color, self.value))
                self.root.config(cursor="")
                if self.update:
                    self.update = False
                    if (self.x, self.y) in self.game.possible_moves(self.side):
                        self.game.play_move(self.x, self.y, self.side)
                        self.draw_board()
                        self.side *= -1
            time.sleep(0.01)
        self.root.config(cursor="")
        if self.human == self.game.get_winner():
            self.root.title("Othello (You Win!)")
        elif self.game.get_winner() == 0:
            self.root.title("Othello (Its a draw!)")
        else:
            self.root.title("Othello (You Lose!)")

    def key(self, event):
        if event.char == "z":
            self.human *= -1

    def click(self, event):
        self.game_gui.focus_set()
        if self.human == self.side and not self.update:
            if self.x != event.x//75 or self.y != event.y//75:
                self.update = True
                self.x = event.x//75
                self.y = event.y//75
    
    def on_closing(self):
        self.running = False
        self.root.destroy()

    def draw_board(self):    
        for i in range(8):
            for j in range(8):
                if self.game.board[i, j] == 1:
                    self.game_gui.itemconfig(self.pieces[i][j], fill="white")
                if self.game.board[i, j] == -1:
                    self.game_gui.itemconfig(self.pieces[i][j], fill="black")
Exemplo n.º 8
0
def calc_ranking(config):
    models = sorted(glob.glob(config.data.model_location + "*.h5"))
    players = []
    for i, model in enumerate(models):
        if i % config.model_skip == 0 or i == len(models):
            players.append(model)

    wtl = np.zeros((len(players), 3))
    win_matrix = np.zeros((len(players), len(players)))
    game = Othello()

    ##give every player a random order to play games against opponents
    order = []
    for i in range(len(players)):
        nums = [x for x in range(len(players))]
        nums.remove(i)
        random.shuffle(nums)
        order.append(nums)

    p1 = AIPlayer(1, config.game.simulation_num_per_move, model=players[0])
    p2 = AIPlayer(1,
                  config.game.simulation_num_per_move,
                  model=players[order[0][0]])

    start = time()
    print(
        "Playing random round robin with %d players and %d games per player" %
        (len(players), config.game_num_per_model))
    for i in range(config.game_num_per_model // 2):
        util.print_progress_bar(i, config.game_num_per_model // 2, start=start)
        ordering = [x for x in range(len(players))]
        random.shuffle(ordering)
        for j in ordering:
            AIPlayer.clear()
            x = i
            if x >= len(order[j]):
                x %= len(order[j])
                if x == 0:
                    random.shuffle(order[j])

            p1.load(players[j])
            p2.load(players[order[j][x]])

            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                p1.tau = tau
                p2.tau = tau
                if side == -1:
                    t = p1.pick_move(game, side)
                else:
                    t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == -1:
                win_matrix[j, order[j][x]] += 1
                wtl[j, 0] += 1
                wtl[order[j][x], 2] += 1
            elif game.get_winner() == 1:
                win_matrix[order[j][x], j] += 1
                wtl[j, 2] += 1
                wtl[order[j][x], 0] += 1
            else:
                win_matrix[j, order[j][x]] += 0.5
                win_matrix[order[j][x], j] += 0.5
                wtl[j, 1] += 1
                wtl[order[j][x], 1] += 1
            game.reset_board()
    util.print_progress_bar(config.game_num_per_model // 2,
                            config.game_num_per_model // 2,
                            start=start)
    params = choix.ilsr_pairwise_dense(win_matrix)
    print("\nRankings:")
    for i, player in enumerate(np.argsort(params)[::-1]):
        print(
            "%d. %s (expected %d) with %0.2f rating and results of %d-%d-%d" %
            (i + 1, os.path.basename(players[player]), len(players) - player,
             params[player], wtl[player, 0], wtl[player, 1], wtl[player, 2]))
    print(
        "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)"
    )
Exemplo n.º 9
0
def calc_ranking(config):
    models = sorted(glob.glob(config.data.model_location+"*.h5"))
    players = []
    for i, model in enumerate(models):
        if i % config.model_skip == 0 or i == len(models):
            players.append(model)
    
    wtl = np.zeros((len(players), len(players), 3))
    win_matrix = np.zeros((len(players),len(players)))
    game = Othello()
    
    challenger1 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[-1], tau=config.game.tau_1)
    challenger2 = AIPlayer(0, config.game.simulation_num_per_move, train=False, model=players[0], tau=config.game.tau_1)
    total_games = (config.game_num_per_model * (len(players)))//2
    played_games = 0
    finished = False
    start = time()
    print("Ranking with %d players and %d games per player" % (len(players), config.game_num_per_model))
    if config.game_num_per_model < len(players):
        print("We suggest that you increase games per player to be greater than players")
        
    for i in itertools.count():
        ranks = getRankings(win_matrix)

        if len(ranks) == 0:
            msg = "No Clear Best Yet"
        else:
            msg = "Current Best is "+util.getPlayerName(players[ranks[-1]])   
        if config.print_best:
            print(msg.ljust(90))
        for j in range(len(players)):
            util.print_progress_bar(played_games, total_games, start=start)
            
            challenger1_index = getLeastPlayed(win_matrix, j)
            
            AIPlayer.clear()
            challenger1.load(players[challenger1_index])
            challenger2.load(players[j])
            
            if random.random() < 0.5:
                challenger1_side = -1
                p1 = challenger1
                p2 = challenger2
            else:
                challenger1_side = 1
                p1 = challenger2
                p2 = challenger1
            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                p1.tau = tau
                p2.tau = tau
                if side == -1:
                    t = p1.pick_move(game, side)
                else:
                    t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == challenger1_side:
                win_matrix[challenger1_index,j] += 1
                wtl[challenger1_index, j,0] += 1
            elif game.get_winner() == -1*challenger1_side:
                win_matrix[j, challenger1_index] += 1
                wtl[challenger1_index, j,2] += 1
            else:
                win_matrix[challenger1_index,j] += 0.5
                win_matrix[j, challenger1_index] += 0.5
                wtl[challenger1_index, j, 1] += 1
            game.reset_board()
            played_games += 1
            if played_games >= total_games:
                finished = True
                break
        saveWTL(config, players, wtl)
        wtl = np.zeros((len(players), len(players), 3))
        if finished:
            break
    util.print_progress_bar(total_games, total_games, start=start) 
    
    print("\n",[util.getPlayerName(player) for player in players])
    print("\nWin Matrix(row beat column):")
    print(win_matrix)
    try:
        with np.errstate(divide='ignore', invalid='ignore'):
            params = choix.ilsr_pairwise_dense(win_matrix)
        print("\nRankings:")
        for i, player in enumerate(np.argsort(params)[::-1]):
            print("%d. %s (expected %d) with %0.2f rating"% 
                  (i+1, util.getPlayerName(players[player]), len(players)-player, params[player]))
        print("\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)")
    except Exception:
        print("\nNot Enough data to calculate rankings")
def calc_ranking(config):
    models = sorted(glob.glob(config.data.model_location + "*.h5"))
    players = []
    for i, model in enumerate(models):
        if i % config.model_skip == 0 or i == len(models):
            players.append(model)

    wtl = np.zeros((len(players), len(players), 3))
    win_matrix = np.zeros((len(players), len(players)))
    game = Othello()

    challenger1 = AIPlayer(0,
                           config.game.simulation_num_per_move,
                           train=False,
                           model=players[-1],
                           tau=config.game.tau_1)
    challenger2 = AIPlayer(0,
                           config.game.simulation_num_per_move,
                           train=False,
                           model=players[0],
                           tau=config.game.tau_1)
    total_games = (config.game_num_per_model * (len(players))) // 2
    played_games = 0
    finished = False
    start = time()
    print("Ranking with %d players and %d games per player" %
          (len(players), config.game_num_per_model))
    if config.game_num_per_model < len(players):
        print(
            "We suggest that you increase games per player to be greater than players"
        )

    for i in itertools.count():
        ranks = getRankings(win_matrix)

        if len(ranks) == 0:
            msg = "No Clear Best Yet"
        else:
            msg = "Current Best is " + util.getPlayerName(players[ranks[-1]])
        if config.print_best:
            print(msg.ljust(90))
        for j in range(len(players)):
            util.print_progress_bar(played_games, total_games, start=start)

            challenger1_index = getLeastPlayed(win_matrix, j)

            AIPlayer.clear()
            challenger1.load(players[challenger1_index])
            challenger2.load(players[j])

            if random.random() < 0.5:
                challenger1_side = -1
                p1 = challenger1
                p2 = challenger2
            else:
                challenger1_side = 1
                p1 = challenger2
                p2 = challenger1
            side = -1
            turn = 1
            while not game.game_over():
                tau = config.game.tau_1
                if config.game.tau_swap < turn:
                    tau = config.game.tau_2
                p1.tau = tau
                p2.tau = tau
                if side == -1:
                    t = p1.pick_move(game, side)
                else:
                    t = p2.pick_move(game, side)
                game.play_move(t[0], t[1], side)
                side *= -1
                turn += 1
            if game.get_winner() == challenger1_side:
                win_matrix[challenger1_index, j] += 1
                wtl[challenger1_index, j, 0] += 1
            elif game.get_winner() == -1 * challenger1_side:
                win_matrix[j, challenger1_index] += 1
                wtl[challenger1_index, j, 2] += 1
            else:
                win_matrix[challenger1_index, j] += 0.5
                win_matrix[j, challenger1_index] += 0.5
                wtl[challenger1_index, j, 1] += 1
            game.reset_board()
            played_games += 1
            if played_games >= total_games:
                finished = True
                break
        saveWTL(config, players, wtl)
        wtl = np.zeros((len(players), len(players), 3))
        if finished:
            break
    util.print_progress_bar(total_games, total_games, start=start)

    print("\n", [util.getPlayerName(player) for player in players])
    print("\nWin Matrix(row beat column):")
    print(win_matrix)
    try:
        with np.errstate(divide='ignore', invalid='ignore'):
            params = choix.ilsr_pairwise_dense(win_matrix)
        print("\nRankings:")
        for i, player in enumerate(np.argsort(params)[::-1]):
            print("%d. %s (expected %d) with %0.2f rating" %
                  (i + 1, util.getPlayerName(
                      players[player]), len(players) - player, params[player]))
        print(
            "\n(Rating Diff, Winrate) -> (0.5, 62%), (1, 73%), (2, 88%), (3, 95%), (5, 99%)"
        )
    except Exception:
        print("\nNot Enough data to calculate rankings")
Exemplo n.º 11
0
parser.add_argument("--iter", type=int, default=100)
parser.add_argument("--start_iter", type=int)
parser.add_argument("--log_dir", type=str, default="./logDQN.txt")
args = parser.parse_args()

log = open(args.log_dir,'a+',encoding='utf8')

dqn = DQN()
if args.start_iter:
    dqn.load("./model",args.start_iter)
else:
    args.start_iter = -1
side = -1
for i_episode in range(args.start_iter+1,args.iter):
    game = Othello()
    while not game.game_over():
        s = convert_board_to_feature(game.board, side)
        a = dqn.choose_action(s, side)
        game.play_move(a[0],a[1],side)
        s_p = convert_board_to_feature(game.board, side)
        # MCTS
        subGame = game.copy()
        subSide = side
        rp1 = RandomPlayer(side)
        rp2 = RandomPlayer(-side)
        while not subGame.game_over():
            subGame.play_move(*rp2.pick_move(subGame),-side)
            subGame.play_move(*rp1.pick_move(subGame),side)
        winner = subGame.get_winner()
        # 注意,这个实现不区分执子方,只要记录s和a对应的r和s_p即可
        if winner == side:
Exemplo n.º 12
0
def self_play(i, net):
    # print("Begin %d process..." % i)
    st = time.time()
    net.optimizer.zero_grad()

    batch_size = 128
    state_data = []
    game = Othello()
    mctsTest = MCTS(net, 1000)
    mctsTest.virtualLoss(game)
    side = -1
    Tau = 1
    while not game.game_over():
        # print(i)
        # game.print_board(side)
        game.board *= -side
        probs = mctsTest.search(game, Tau)
        # Tau *= 0.9
        state_data.append([game.board.copy(), probs, side])
        # print(probs)
        if np.sum(probs) > 0:
            action = np.sum(np.random.rand() > np.cumsum(probs))
            #             action = np.argmax(probs)
            game.board *= -side
            game.play_move(*convert_mv_ind_to_tuple(action), side)
        else:
            game.play_move(-1, -1, -1)

        side = -side

    # print("finish search ", i)
    winner = game.get_winner()
    #     print(winner)
    for state, _ in enumerate(state_data):
        state_data[state][2] *= -winner

    expand_data = []
    for s in state_data:
        # print("------------------------")
        # print('board: ')
        # print(s[0], type(s[0]), s[0].shape)
        # print('probs: ')
        # print(s[1], type(s[1]), s[1].shape)
        # print('side: ')
        # print(s[2])
        for func_index in np.random.permutation(7)[:2]:
            expand_data.append(expand_func(s[0], s[1], s[2], func_index))
            # print("=======================")
            # print(s[0], s[1], s[2])
            # print(expand_data[-1])

    # print('s',i)
    np.random.shuffle(expand_data)
    batch_data = np.concatenate(
        [state_data, expand_data[:batch_size - len(state_data)]], axis=0)
    inputs = np.concatenate(batch_data[:, 0]).reshape(-1, 8,
                                                      8)[:, np.newaxis, :, :]
    rollout_prob = np.concatenate(batch_data[:, 1]).reshape(-1, 64)
    labels = batch_data[:, 2]
    # print('b',i)
    # for kkk in range(1000):
    my_probs, my_value = net(inputs)
    # print('aa',i)
    #     print(my_value)
    loss = loss_fn(my_value, labels, my_probs, rollout_prob)
    net.optimizer.zero_grad()  # clear gradients for next train
    loss.backward(retain_graph=True)
    net.optimizer.step()
    # print('lllllllllll.lllllllllllllllllllll',kkk, float(loss))
    # print('kk',i)
    ed = time.time()
    print("%6d game, time=%4.4fs, loss = %5.5f" % (i, ed - st, float(loss)))
    return inputs, rollout_prob, labels