def play(network): """Plays out a self-play match, returning a MCTSPlayer object containing: - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game """ readouts = FLAGS.num_readouts # defined in strategies.py # Disable resign in 5% of games if random.random() < FLAGS.resign_disable_pct: resign_threshold = -1.0 else: resign_threshold = None player = MCTSPlayer(network, resign_threshold=resign_threshold) player.initialize_game() # Must run this once at the start to expand the root node. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if FLAGS.verbose >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if FLAGS.verbose >= 3: print("Played >>", coords.to_gtp(coords.from_flat(player.root.fmove))) if FLAGS.verbose >= 2: utils.dbg("%s: %.3f" % (player.result_string, player.root.Q)) utils.dbg(player.root.position, player.root.position.score()) return player
def test_extract_data_normal_end(self): player = MCTSPlayer(DummyNet()) player.initialize_game() player.tree_search() player.play_move(None) player.tree_search() player.play_move(None) self.assertTrue(player.root.is_done()) player.set_result(player.root.position.result(), was_resign=False) data = list(player.extract_data()) self.assertEqual(2, len(data)) position, _, result = data[0] # White wins by komi self.assertEqual(go.WHITE, result) self.assertEqual("W+{}".format(player.root.position.komi), player.result_string)
def test_extract_data_resign_end(self): player = MCTSPlayer(DummyNet()) player.initialize_game() player.tree_search() player.play_move((0, 0)) player.tree_search() player.play_move(None) player.tree_search() # Black is winning on the board self.assertEqual(go.BLACK, player.root.position.result()) # But if Black resigns player.set_result(go.WHITE, was_resign=True) data = list(player.extract_data()) position, _, result = data[0] # Result should say White is the winner self.assertEqual(go.WHITE, result) self.assertEqual("W+R", player.result_string)