Exemplo n.º 1
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -0.9999

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        # Sets is_done to be True if player.should resign.
        if player.should_resign():  # TODO: make this less side-effecty.
            break
        move = player.pick_move()
        player.play_move(move)
        if player.is_done():
            # TODO: actually handle the result instead of ferrying it around as a property.
            player.result = player.position.result()
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

        # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...)

    return player
def predict_move(filename, network):
    # Strategies: def initialize_game(self, position=None):

    #filename = '/usr/local/google/home/vbittorf/projects/minigo/benchmark_sgf/prob_0001.sgf'
    replay = []

    with open(filename) as f:
        text = f.read()
        print(text)
        for position_w_context in sgf_wrapper.replay_sgf(text):
            replay.append(position_w_context)

    print(replay)

    # model_path = '/usr/local/google/home/vbittorf/Documents/minigo/rl_pipeline/models/000003-leopard'
    #model_path = '/usr/local/google/home/vbittorf/Documents/minigo/20hour1000game/models/000002-nassau'
    #white_net = dual_net.DualNetwork(model_path)
    #black_net = dual_net.DualNetwork(model_path)

    #print(evaluation.play_match(white_net, black_net, 1, 50, "/tmp/sgf", 0))

    black_net = network

    player = MCTSPlayer(black_net,
                        verbosity=0,
                        two_player_mode=True,
                        num_parallel=4)

    readouts = 361 * 10
    tried = 0
    correct = 0
    for position_w_context in replay:
        if position_w_context.next_move is None:
            continue
        player.initialize_game(position_w_context.position)

        current_readouts = player.root.N
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        move = player.pick_move()
        # if player.should_resign():  # Force resign
        #  move = 'R'
        # else:
        #  move = player.suggest_move(position_w_context.position)
        tried += 1
        if move == position_w_context.next_move:
            correct += 1
        player.play_move(move)
        print(player.root.position)
        print(move, position_w_context.next_move)
        return move, position_w_context.next_move, move == position_w_context.next_move
    print('Correct: ', correct * 1.0 / tried)
Exemplo n.º 3
0
def predict_move(filename, network):
  # Strategies: def initialize_game(self, position=None):

  #filename = '/usr/local/google/home/vbittorf/projects/minigo/benchmark_sgf/prob_0001.sgf'
  replay = []

  with open(filename) as f:
      text = f.read()
      print(text)
      for position_w_context in sgf_wrapper.replay_sgf(text):
        replay.append(position_w_context)

  print(replay)

  # model_path = '/usr/local/google/home/vbittorf/Documents/minigo/rl_pipeline/models/000003-leopard'
  #model_path = '/usr/local/google/home/vbittorf/Documents/minigo/20hour1000game/models/000002-nassau'
  #white_net = dual_net.DualNetwork(model_path)
  #black_net = dual_net.DualNetwork(model_path)

  #print(evaluation.play_match(white_net, black_net, 1, 50, "/tmp/sgf", 0))


  black_net = network

  player = MCTSPlayer(
        black_net, verbosity=0, two_player_mode=True, num_parallel=4)

  readouts = 361 * 10
  tried = 0
  correct = 0
  for position_w_context in replay:
      if position_w_context.next_move is None:
          continue
      player.initialize_game(position_w_context.position)

      current_readouts = player.root.N
      while player.root.N < current_readouts + readouts:
         player.tree_search()


      move = player.pick_move()
      #if player.should_resign():  # Force resign
      #  move = 'R'
      #else:
      #  move = player.suggest_move(position_w_context.position)
      tried += 1
      if move == position_w_context.next_move:
        correct += 1
      player.play_move(move)
      print(player.root.position)
      print(move, position_w_context.next_move)
      return move, position_w_context.next_move, move == position_w_context.next_move
  print('Correct: ', correct * 1.0 / tried)
Exemplo n.º 4
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
        print(player.root.position,
              player.root.position.score(), file=sys.stderr)

    return player
Exemplo n.º 5
0
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves,
         verbosity=0):
  """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
  player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold,
                      verbosity=verbosity, num_parallel=simultaneous_leaves)
  # Disable resign in 5% of games
  if random.random() < 0.05:
    player.resign_threshold = -1.0

  player.initialize_game()

  # Must run this once at the start, so that noise injection actually
  # affects the first move of the game.
  first_node = player.root.select_leaf()
  prob, val = network.run(first_node.position)
  first_node.incorporate_results(prob, val, first_node)

  while True:
    start = time.time()
    player.root.inject_noise()
    current_readouts = player.root.N
    # we want to do "X additional readouts", rather than "up to X readouts".
    while player.root.N < current_readouts + readouts:
      player.tree_search()

    if verbosity >= 3:
      print(player.root.position)
      print(player.root.describe())

    if player.should_resign():
      player.set_result(-1 * player.root.position.to_play, was_resign=True)
      break
    move = player.pick_move()
    player.play_move(move)
    if player.root.is_done():
      player.set_result(player.root.position.result(), was_resign=False)
      break

    if (verbosity >= 2) or (
        verbosity >= 1 and player.root.position.n % 10 == 9):
      print("Q: {:.5f}".format(player.root.Q))
      dur = time.time() - start
      print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
          player.root.position.n, readouts, dur / readouts * 100.0, dur))
    if verbosity >= 3:
      print("Played >>",
            coords.to_kgs(coords.from_flat(player.root.fmove)))

  if verbosity >= 2:
    print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
    print(player.root.position,
          player.root.position.score(), file=sys.stderr)

  return player
def play(board_size,
         network,
         readouts,
         resign_threshold,
         simultaneous_leaves,
         verbosity=0):
    """Plays out a self-play match.

  Args:
    board_size: the go board size
    network: the DualNet model
    readouts: the number of readouts in MCTS
    resign_threshold: the threshold to resign at in the match
    simultaneous_leaves: the number of simultaneous leaves in MCTS
    verbosity: the verbosity of the self-play match

  Returns:
    the final position
    the n x 362 tensor of floats representing the mcts search probabilities
    the n-ary tensor of floats representing the original value-net estimate
      where n is the number of moves in the game.
  """
    player = MCTSPlayer(board_size,
                        network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=simultaneous_leaves)
    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if verbosity >= 3:
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1
                                and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" %
                  (player.root.position.n, readouts, dur / readouts * 100.0,
                   dur))
        if verbosity >= 3:
            print("Played >>",
                  coords.to_kgs(coords.from_flat(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q),
              file=sys.stderr)
        print(player.root.position,
              player.root.position.score(),
              file=sys.stderr)

    return player