Exemplo n.º 1
0
def load_player(model_path):
  print("Loading weights from %s ... " % model_path)
  with timer("Loading weights from %s ... " % model_path):
      network = dual_net.DualNetwork(model_path)
      network.name = os.path.basename(model_path)
  player = MCTSPlayer(network, verbosity=2)
  return player
Exemplo n.º 2
0
def predict_move(filename, network, tries_per_move=1, readouts=1000):
  replay = []

  if filename not in REPLAY_CACHE:
    with open(filename) as f:
        text = f.read()
        for position_w_context in sgf_wrapper.replay_sgf(text):
          replay.append(position_w_context)
    REPLAY_CACHE[filename] = replay
  replay = REPLAY_CACHE[filename]


  black_net = network

  player = MCTSPlayer(
        black_net, verbosity=0, two_player_mode=True, num_parallel=4)

  tried = 0
  correct = 0
  move_ratings = []
  for position_w_context in replay:
      if position_w_context.next_move is None:
          continue

      num_correct = 0
      for i in range(tries_per_move):
        move, correct_move, is_correct = predict_position(position_w_context, player, readouts=readouts)
        if is_correct:
          num_correct += 1
      move_ratings.append(num_correct * 1.0 / tries_per_move)
      print('RATING: ', sum(move_ratings) / len(move_ratings))
  return move_ratings
Exemplo n.º 3
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -0.9999

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        # Sets is_done to be True if player.should resign.
        if player.should_resign():  # TODO: make this less side-effecty.
            break
        move = player.pick_move()
        player.play_move(move)
        if player.is_done():
            # TODO: actually handle the result instead of ferrying it around as a property.
            player.result = player.position.result()
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

        # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...)

    return player
def predict_move(filename, network):
    # Strategies: def initialize_game(self, position=None):

    #filename = '/usr/local/google/home/vbittorf/projects/minigo/benchmark_sgf/prob_0001.sgf'
    replay = []

    with open(filename) as f:
        text = f.read()
        print(text)
        for position_w_context in sgf_wrapper.replay_sgf(text):
            replay.append(position_w_context)

    print(replay)

    # model_path = '/usr/local/google/home/vbittorf/Documents/minigo/rl_pipeline/models/000003-leopard'
    #model_path = '/usr/local/google/home/vbittorf/Documents/minigo/20hour1000game/models/000002-nassau'
    #white_net = dual_net.DualNetwork(model_path)
    #black_net = dual_net.DualNetwork(model_path)

    #print(evaluation.play_match(white_net, black_net, 1, 50, "/tmp/sgf", 0))

    black_net = network

    player = MCTSPlayer(black_net,
                        verbosity=0,
                        two_player_mode=True,
                        num_parallel=4)

    readouts = 361 * 10
    tried = 0
    correct = 0
    for position_w_context in replay:
        if position_w_context.next_move is None:
            continue
        player.initialize_game(position_w_context.position)

        current_readouts = player.root.N
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        move = player.pick_move()
        # if player.should_resign():  # Force resign
        #  move = 'R'
        # else:
        #  move = player.suggest_move(position_w_context.position)
        tried += 1
        if move == position_w_context.next_move:
            correct += 1
        player.play_move(move)
        print(player.root.position)
        print(move, position_w_context.next_move)
        return move, position_w_context.next_move, move == position_w_context.next_move
    print('Correct: ', correct * 1.0 / tried)
Exemplo n.º 5
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
        print(player.root.position,
              player.root.position.score(), file=sys.stderr)

    return player
Exemplo n.º 6
0
def play_match(black_net, white_net, games, sgf_dir, verbosity):
    """Plays matches between two neural nets.

    black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow
        convolutional network.
    white_net: Instance of the minigo.DualNetwork.
    games: number of games to play. We play all the games at the same time.
    sgf_dir: directory to write the sgf results.
    """
    readouts = flags.FLAGS.num_readouts  # Flag defined in strategies.py

    black = MCTSPlayer(
        black_net, verbosity=verbosity, two_player_mode=True)
    white = MCTSPlayer(
        white_net, verbosity=verbosity, two_player_mode=True)

    black_name = os.path.basename(black_net.save_file)
    white_name = os.path.basename(white_net.save_file)

    for i in range(games):
        num_move = 0  # The move number of the current game

        black.initialize_game()
        white.initialize_game()

        while True:
            start = time.time()
            active = white if num_move % 2 else black
            inactive = black if num_move % 2 else white

            current_readouts = active.root.N
            while active.root.N < current_readouts + readouts:
                active.tree_search()

            # print some stats on the search
            if verbosity >= 3:
                print(active.root.position)

            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                active.set_result(-1 *
                                  active.root.position.to_play, was_resign=True)
                inactive.set_result(
                    active.root.position.to_play, was_resign=True)

            if active.is_done():
                fname = "{:d}-{:s}-vs-{:s}-{:d}.sgf".format(int(time.time()),
                                                            white_name, black_name, i)
                with gfile.GFile(os.path.join(sgf_dir, fname), 'w') as _file:
                    sgfstr = sgf_wrapper.make_sgf(active.position.recent,
                                                  active.result_string, black_name=black_name,
                                                  white_name=white_name)
                    _file.write(sgfstr)
                print("Finished game", i, active.result_string)
                break

            move = active.pick_move()
            active.play_move(move)
            inactive.play_move(move)

            dur = time.time() - start
            num_move += 1

            if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9):
                timeper = (dur / readouts) * 100.0
                print(active.root.position)
                print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (num_move,
                                                                   readouts,
                                                                   timeper,
                                                                   dur))
Exemplo n.º 7
0
def play_match(black_net, white_net, games, readouts, sgf_dir, verbosity):
    """Plays matches between two neural nets.

    black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow
        convolutional network.
    white_net: Instance of the minigo.DualNetwork.
    games: number of games to play. We play all the games at the same time.
    sgf_dir: directory to write the sgf results.
    readouts: number of readouts to perform for each step in each game.
    """

    # For n games, we create lists of n black and n white players
    black = MCTSPlayer(black_net,
                       verbosity=verbosity,
                       two_player_mode=True,
                       num_parallel=SIMULTANEOUS_LEAVES)
    white = MCTSPlayer(white_net,
                       verbosity=verbosity,
                       two_player_mode=True,
                       num_parallel=SIMULTANEOUS_LEAVES)

    black_name = os.path.basename(black_net.save_file)
    white_name = os.path.basename(white_net.save_file)

    winners = []
    for i in range(games):
        num_move = 0  # The move number of the current game

        black.initialize_game()
        white.initialize_game()

        while True:
            start = time.time()
            active = white if num_move % 2 else black
            inactive = black if num_move % 2 else white

            current_readouts = active.root.N
            while active.root.N < current_readouts + readouts:
                active.tree_search()

            # print some stats on the search
            if verbosity >= 3:
                print(active.root.position)

            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                active.set_result(-1 * active.root.position.to_play,
                                  was_resign=True)
                inactive.set_result(active.root.position.to_play,
                                    was_resign=True)

            if active.is_done():
                fname = "{:d}-{:s}-vs-{:s}-{:d}.sgf".format(
                    int(time.time()), white_name, black_name, i)
                if active.result_string is None:
                    # This is an exceptionally  rare corner case where we don't get a winner.
                    # Our temporary solution is to just drop this game.
                    break
                winners.append(active.result_string[0])
                with open(os.path.join(sgf_dir, fname), 'w') as _file:
                    sgfstr = sgf_wrapper.make_sgf(active.position.recent,
                                                  active.result_string,
                                                  black_name=black_name,
                                                  white_name=white_name)
                    _file.write(sgfstr)
                print("Finished game", i, active.result_string)
                break

            move = active.pick_move()
            # print('DBUG Picked move: ', move, active, num_move)
            active.play_move(move)
            inactive.play_move(move)

            dur = time.time() - start
            num_move += 1

            if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9):
                timeper = (dur / readouts) * 100.0
                print(active.root.position)
                print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                      (num_move, readouts, timeper, dur))
    return winners
def play(board_size,
         network,
         readouts,
         resign_threshold,
         simultaneous_leaves,
         verbosity=0):
    """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
    player = MCTSPlayer(board_size,
                        network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=simultaneous_leaves)
    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if verbosity >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1
                                and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur))
        if verbosity >= 3:
            print("Played >>",
                  coords.to_kgs(coords.from_flat(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q),
              file=sys.stderr)
        print(player.root.position,
              player.root.position.score(),
              file=sys.stderr)

    return player
Exemplo n.º 9
0
def play_match(black_net, white_net, games, readouts, verbosity):
    black_players = [
        MCTSPlayer(black_net, verbosity=verbosity, two_player_mode=True)
        for i in range(games)
    ]
    white_players = [
        MCTSPlayer(white_net, verbosity=verbosity, two_player_mode=True)
        for i in range(games)
    ]
    player_pairs = [(b, w) for b, w in zip(black_players, white_players)]

    done_pairs = []
    global_n = 0

    for p1, p2 in player_pairs:
        p1.initialize_game()
        p2.initialize_game()

    while player_pairs:
        start = time.time()

        for i in range(readouts):
            leaves = [
                pair[global_n % 2].root.select_leaf() for pair in player_pairs
            ]
            probs, vals = (black_net, white_net)[global_n % 2].run_many(
                [leaf.position for leaf in leaves])

            [
                leaf.incorporate_results(prob,
                                         val,
                                         up_to=pair[global_n % 2].root) for
                pair, leaf, prob, val in zip(player_pairs, leaves, probs, vals)
            ]

        # print some stats on the search
        if (verbosity >= 3):
            print(player_pairs[0][0].root.position)

        for black, white in player_pairs:
            active = white if global_n % 2 else black
            inactive = black if global_n % 2 else white
            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                continue
            move = active.pick_move()
            active.play_move(move)
            inactive.play_move(move)

        dur = time.time() - start
        global_n += 1
        if (verbosity > 1) or (verbosity == 1 and global_n % 10 == 9):
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (global_n, readouts * len(player_pairs), dur /
                   (readouts * len(player_pairs) / 100.0), dur))

        done_pairs.extend(
            [p for p in player_pairs if p[0].is_done() or p[1].is_done()])
        player_pairs = [
            p for p in player_pairs if not (p[0].is_done() or p[1].is_done())
        ]

    return done_pairs
Exemplo n.º 10
0
def play_match(params, black_net, white_net, games, readouts, sgf_dir,
               verbosity):
    """Plays matches between two neural nets.

  One net that wins by a margin of 55% will be the winner.

  Args:
    params: An object of hyperparameters.
    black_net: Instance of the DualNetRunner class to play as black.
    white_net: Instance of the DualNetRunner class to play as white.
    games: Number of games to play. We play all the games at the same time.
    readouts: Number of readouts to perform for each step in each game.
    sgf_dir: Directory to write the sgf results.
    verbosity: Verbosity to show evaluation process.

  Returns:
    'B' is the winner is black_net, otherwise 'W'.
  """
    # For n games, we create lists of n black and n white players
    black = MCTSPlayer(params.board_size,
                       black_net,
                       verbosity=verbosity,
                       two_player_mode=True,
                       num_parallel=params.simultaneous_leaves)
    white = MCTSPlayer(params.board_size,
                       white_net,
                       verbosity=verbosity,
                       two_player_mode=True,
                       num_parallel=params.simultaneous_leaves)

    black_name = os.path.basename(black_net.save_file)
    white_name = os.path.basename(white_net.save_file)

    black_win_counts = 0
    white_win_counts = 0

    for i in range(games):
        num_move = 0  # The move number of the current game

        black.initialize_game()
        white.initialize_game()

        while True:
            start = time.time()
            active = white if num_move % 2 else black
            inactive = black if num_move % 2 else white

            current_readouts = active.root.N
            while active.root.N < current_readouts + readouts:
                active.tree_search()

            # print some stats on the search
            if verbosity >= 3:
                print(active.root.position)

            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                active.set_result(-active.root.position.to_play,
                                  was_resign=True)
                inactive.set_result(active.root.position.to_play,
                                    was_resign=True)

            if active.is_done():
                fname = '{:d}-{:s}-vs-{:s}-{:d}.sgf'.format(
                    int(time.time()), white_name, black_name, i)
                with open(os.path.join(sgf_dir, fname), 'w') as f:
                    sgfstr = sgf_wrapper.make_sgf(params.board_size,
                                                  active.position.recent,
                                                  active.result_string,
                                                  black_name=black_name,
                                                  white_name=white_name)
                    f.write(sgfstr)
                print('Finished game', i, active.result_string)
                if active.result_string is not None:
                    if active.result_string[0] == 'B':
                        black_win_counts += 1
                    elif active.result_string[0] == 'W':
                        white_win_counts += 1

                break

            move = active.pick_move()
            active.play_move(move)
            inactive.play_move(move)

            dur = time.time() - start
            num_move += 1

            if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9):
                timeper = (dur / readouts) * 100.0
                print(active.root.position)
                print('{:d}: {:d} readouts, {:.3f} s/100. ({:.2f} sec)'.format(
                    num_move, readouts, timeper, dur))

    if (black_win_counts - white_win_counts) > params.eval_win_rate * games:
        return go.BLACK_NAME
    else:
        return go.WHITE_NAME
Exemplo n.º 11
0
def play_match(black_net, white_net, games, readouts, verbosity):
    """Plays matches between two neural nets.

    black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow
        convolutional network.
    white_net: Instance of the minigo.DualNetwork.
    games: number of games to play. We play all the games at the same time.
    readouts: number of readouts to perform for each step in each game.
    """

    # For n games, we create lists of n black and n white players
    black_players = [
        MCTSPlayer(black_net, verbosity=verbosity, two_player_mode=True)
        for i in range(games)
    ]
    white_players = [
        MCTSPlayer(white_net, verbosity=verbosity, two_player_mode=True)
        for i in range(games)
    ]

    # Each player pair represents two players that are going to play a game.
    player_pairs = [(b, w) for b, w in zip(black_players, white_players)]

    done_pairs = []

    # The number of moves that have been played
    num_moves = 0

    for black, white in player_pairs:
        black.initialize_game()
        white.initialize_game()

    # The heart of the game-playing loop. Each iteration through the while loop
    # plays one move for each player. That means we:
    #   - Do a bunch of MTCS readouts (for each active player, for each game)
    #   - Play a move (for each active player, for each game)
    #   - Remove any finished player-pairs
    while player_pairs:
        start = time.time()

        for _ in range(readouts):
            leaves = [
                pair[num_moves % 2].root.select_leaf() for pair in player_pairs
            ]
            probs, vals = (black_net, white_net)[num_moves % 2].run_many(
                [leaf.position for leaf in leaves])

            for pair, leaf, prob, val in zip(player_pairs, leaves, probs,
                                             vals):
                leaf.incorporate_results(prob,
                                         val,
                                         up_to=pair[num_moves % 2].root)

        # print some stats on the search
        if verbosity >= 3:
            print(player_pairs[0][0].root.position)

        for black, white in player_pairs:
            active = white if num_moves % 2 else black
            inactive = black if num_moves % 2 else white
            # First, check the roots for hopeless games.
            if active.should_resign():  # Force resign
                continue
            move = active.pick_move()
            active.play_move(move)
            inactive.play_move(move)

        dur = time.time() - start
        num_moves += 1
        if (verbosity > 1) or (verbosity == 1 and num_moves % 10 == 9):
            rdcnt = readouts * len(player_pairs)
            timeper = dur / (readouts * len(player_pairs) / 100.0)
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (num_moves, rdcnt, timeper, dur))

        done_pairs.extend(
            [p for p in player_pairs if p[0].is_done() or p[1].is_done()])
        player_pairs = [
            p for p in player_pairs if not (p[0].is_done() or p[1].is_done())
        ]

    return done_pairs