Пример #1
0
 def test_flatten(self):
     self.assertEqual(coords.flatten_coords((0, 0)), 0)
     self.assertEqual(coords.flatten_coords((0, 3)), 3)
     self.assertEqual(coords.flatten_coords((3, 0)), 27)
     self.assertEqual(coords.unflatten_coords(27), (3, 0))
     self.assertEqual(coords.unflatten_coords(10), (1, 1))
     self.assertEqual(coords.unflatten_coords(80), (8, 8))
     self.assertEqual(coords.flatten_coords(coords.unflatten_coords(10)),
                      10)
     self.assertEqual(
         coords.unflatten_coords(coords.flatten_coords((5, 4))), (5, 4))
Пример #2
0
 def test_proper_move_transform(self):
     # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry
     # application is consistent with coords.unflatten_coords
     move_array = np.arange(go.N ** 2 + 1)
     coord_array = np.zeros([go.N, go.N])
     for c in range(go.N ** 2):
         coord_array[coords.unflatten_coords(c)] = c
     for s in symmetries.SYMMETRIES:
         with self.subTest(symmetry=s):
             transformed_moves = apply_p(s, move_array)
             transformed_board = apply_f(s, coord_array)
             for new_coord, old_coord in enumerate(transformed_moves[:-1]):
                 self.assertEqual(
                     old_coord,
                     transformed_board[coords.unflatten_coords(new_coord)])
Пример #3
0
 def test_proper_move_transform(self):
     # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry
     # application is consistent with coords.unflatten_coords
     move_array = np.arange(go.N**2 + 1)
     coord_array = np.zeros([go.N, go.N])
     for c in range(go.N**2):
         coord_array[coords.unflatten_coords(c)] = c
     for s in symmetries.SYMMETRIES:
         with self.subTest(symmetry=s):
             transformed_moves = apply_p(s, move_array)
             transformed_board = apply_f(s, coord_array)
             for new_coord, old_coord in enumerate(transformed_moves[:-1]):
                 self.assertEqual(
                     old_coord,
                     transformed_board[coords.unflatten_coords(new_coord)])
Пример #4
0
 def describe(self):
     sort_order = list(range(go.N * go.N + 1))
     sort_order.sort(key=lambda i:
                     (self.child_N[i], self.child_action_score[i]),
                     reverse=True)
     soft_n = self.child_N / sum(self.child_N)
     p_delta = soft_n - self.child_prior
     p_rel = p_delta / self.child_prior
     # Dump out some statistics
     output = []
     output.append("{q:.4f}\n".format(q=self.Q))
     output.append(self.most_visited_path())
     output.append(
         "move:  action      Q      U      P    P-Dir    N  soft-N  p-delta  p-rel\n"
     )
     output.append("\n".join([
         "{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}"
         .format(coords.to_human_coord(coords.unflatten_coords(key)),
                 self.child_action_score[key], self.child_Q[key],
                 self.child_U[key],
                 self.child_prior[key], self.original_prior[key],
                 int(self.child_N[key]), soft_n[key], p_delta[key],
                 p_rel[key]) for key in sort_order
     ][:15]))
     return ''.join(output)
Пример #5
0
 def heatmap(self, sort_order, node, prop):
     return "\n".join([
         "{!s:6} {}".format(
             coords.to_human_coord(coords.unflatten_coords(key)),
             node.__dict__.get(prop)[key]) for key in sort_order
         if node.child_N[key] > 0
     ][:20])
Пример #6
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -0.9999

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        # Sets is_done to be True if player.should resign.
        if player.should_resign():  # TODO: make this less side-effecty.
            break
        move = player.pick_move()
        player.play_move(move)
        if player.is_done():
            # TODO: actually handle the result instead of ferrying it around as a property.
            player.result = player.position.result()
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

        # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...)

    return player
Пример #7
0
 def maybe_add_child(self, fcoord):
     """ Adds child node for fcoord if it doesn't already exist, and returns it. """
     if fcoord not in self.children:
         new_position = self.position.play_move(
             coords.unflatten_coords(fcoord))
         self.children[fcoord] = MCTSNode(new_position,
                                          fmove=fcoord,
                                          parent=self)
     return self.children[fcoord]
Пример #8
0
    def test_upperleft(self):
        self.assertEqual(coords.parse_sgf_coords('aa'), (0, 0))
        self.assertEqual(coords.unflatten_coords(0), (0, 0))
        self.assertEqual(coords.parse_kgs_coords('A9'), (0, 0))
        self.assertEqual(coords.parse_pygtp_coords((1, 9)), (0, 0))

        self.assertEqual(coords.unparse_sgf_coords((0, 0)), 'aa')
        self.assertEqual(coords.flatten_coords((0, 0)), 0)
        self.assertEqual(coords.to_human_coord((0, 0)), 'A9')
        self.assertEqual(coords.unparse_pygtp_coords((0, 0)), (1, 9))
Пример #9
0
    def test_topleft(self):
        self.assertEqual(coords.parse_sgf_coords('ia'), (0, 8))
        self.assertEqual(coords.unflatten_coords(8), (0, 8))
        self.assertEqual(coords.parse_kgs_coords('J9'), (0, 8))
        self.assertEqual(coords.parse_pygtp_coords((9, 9)), (0, 8))

        self.assertEqual(coords.unparse_sgf_coords((0, 8)), 'ia')
        self.assertEqual(coords.flatten_coords((0, 8)), 8)
        self.assertEqual(coords.to_human_coord((0, 8)), 'J9')
        self.assertEqual(coords.unparse_pygtp_coords((0, 8)), (9, 9))
Пример #10
0
    def test_pass(self):
        self.assertEqual(coords.parse_sgf_coords(''), None)
        self.assertEqual(coords.unflatten_coords(81), None)
        self.assertEqual(coords.parse_kgs_coords('pass'), None)
        self.assertEqual(coords.parse_pygtp_coords((0, 0)), None)

        self.assertEqual(coords.unparse_sgf_coords(None), '')
        self.assertEqual(coords.flatten_coords(None), 81)
        self.assertEqual(coords.to_human_coord(None), 'pass')
        self.assertEqual(coords.unparse_pygtp_coords(None), (0, 0))
Пример #11
0
 def mvp_gg(self):
     """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17..."""
     node = self
     output = []
     while node.children and max(node.child_N) > 1:
         next_kid = np.argmax(node.child_N)
         node = node.children[next_kid]
         output.append(
             "%s" %
             coords.to_human_coord(coords.unflatten_coords(node.fmove)))
     return ' '.join(output)
Пример #12
0
    def test_legal_moves(self):
        board = test_utils.load_board('''
            .O.O.XOX.
            O..OOOOOX
            ......O.O
            OO.....OX
            XO.....X.
            .O.......
            OX.....OO
            XX...OOOX
            .....O.X.
        ''')
        position = Position(board=board, to_play=BLACK)
        illegal_moves = parse_kgs_coords_set('A9 E9 J9')
        legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None}
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        # check that the bulk legal test agrees with move-by-move illegal test.
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=unflatten_coords(i)):
                self.assertEqual(bulk_legal,
                                 position.is_move_legal(unflatten_coords(i)))

        # flip the colors and check that everything is still (il)legal
        position = Position(board=-board, to_play=WHITE)
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=unflatten_coords(i)):
                self.assertEqual(bulk_legal,
                                 position.is_move_legal(unflatten_coords(i)))
Пример #13
0
    def test_legal_moves(self):
        board = test_utils.load_board('''
            .O.O.XOX.
            O..OOOOOX
            ......O.O
            OO.....OX
            XO.....X.
            .O.......
            OX.....OO
            XX...OOOX
            .....O.X.
        ''')
        position = Position(board=board, to_play=BLACK)
        illegal_moves = parse_kgs_coords_set('A9 E9 J9')
        legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None}
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        # check that the bulk legal test agrees with move-by-move illegal test.
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=unflatten_coords(i)):
                self.assertEqual(
                    bulk_legal, position.is_move_legal(unflatten_coords(i)))

        # flip the colors and check that everything is still (il)legal
        position = Position(board=-board, to_play=WHITE)
        for move in illegal_moves:
            with self.subTest(type='illegal', move=move):
                self.assertFalse(position.is_move_legal(move))
        for move in legal_moves:
            with self.subTest(type='legal', move=move):
                self.assertTrue(position.is_move_legal(move))
        bulk_legality = position.all_legal_moves()
        for i, bulk_legal in enumerate(bulk_legality):
            with self.subTest(type='bulk', move=unflatten_coords(i)):
                self.assertEqual(
                    bulk_legal, position.is_move_legal(unflatten_coords(i)))
Пример #14
0
 def most_visited_path(self):
     node = self
     output = []
     while node.children:
         next_kid = np.argmax(node.child_N)
         node = node.children.get(next_kid)
         if node is None:
             output.append("GAME END")
             break
         output.append("%s (%d) ==> " % (coords.to_human_coord(
             coords.unflatten_coords(node.fmove)), node.N))
     output.append("Q: {:.5f}\n".format(node.Q))
     return ''.join(output)
Пример #15
0
    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n > self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-1]
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.unflatten_coords(fcoord)
Пример #16
0
    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n > self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-1]
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.unflatten_coords(fcoord)
Пример #17
0
 def heatmap(self, sort_order, node, prop):
     return "\n".join(["{!s:6} {}".format(
         coords.to_human_coord(coords.unflatten_coords(key)),
         node.__dict__.get(prop)[key])
         for key in sort_order if node.child_N[key] > 0][:20])
Пример #18
0
def eval_player(player, positions, moves, results):
  probs, values = batch_run_many(player, positions)
  policy_moves = [coords.unflatten_coords(c) for c in np.argmax(probs, axis=1)]
  top_move_agree = [moves[idx] == policy_moves[idx] for idx in range(len(moves))]
  square_err = (values - results)**2/4
  return top_move_agree, square_err
Пример #19
0
def play(network, readouts, resign_threshold, verbosity=0):
    ''' Plays out a self-play match, returning
    - the final position
    - the n x 362 tensor of floats representing the mcts search probabilities
    - the n-ary tensor of floats representing the original value-net estimate
    where n is the number of moves in the game'''
    player = MCTSPlayer(network,
                        resign_threshold=resign_threshold,
                        verbosity=verbosity,
                        num_parallel=SIMULTANEOUS_LEAVES)
    global_n = 0

    # Disable resign in 5% of games
    if random.random() < 0.05:
        player.resign_threshold = -1.0

    player.initialize_game()

    # Must run this once at the start, so that noise injection actually
    # affects the first move of the game.
    first_node = player.root.select_leaf()
    prob, val = network.run(first_node.position)
    first_node.incorporate_results(prob, val, first_node)

    while True:
        start = time.time()
        player.root.inject_noise()
        current_readouts = player.root.N
        # we want to do "X additional readouts", rather than "up to X readouts".
        while player.root.N < current_readouts + readouts:
            player.tree_search()

        if (verbosity >= 3):
            print(player.root.position)
            print(player.root.describe())

        if player.should_resign():
            player.set_result(-1 * player.root.position.to_play,
                              was_resign=True)
            break
        move = player.pick_move()
        player.play_move(move)
        if player.root.is_done():
            player.set_result(player.root.position.result(), was_resign=False)
            break

        if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9):
            print("Q: {:.5f}".format(player.root.Q))
            dur = time.time() - start
            print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (
                player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True)
        if verbosity >= 3:
            print("Played >>",
                  coords.to_human_coord(coords.unflatten_coords(player.root.fmove)))

    if verbosity >= 2:
        print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr)
        print(player.root.position,
              player.root.position.score(), file=sys.stderr)

    return player