Exemplo n.º 1
0
def run_game(b1_role, mcts_role, b1_N, mcts_N, model, rand=0):
    print(f'b1 ({" ".join(b1_role)}), N={b1_N}')
    print(f'mcts ({" ".join(mcts_role)}), N={mcts_N}')
    curb1 = B1Node(propnet, data, model=model)
    curmcts = MCTSNode(propnet, data)
    board = [list('.' * 8) for i in range(6)]
    for step in range(1000):
        print(*(''.join(b) for b in board[::-1]), sep='\n')
        legal = curb1.propnet.legal_moves_dict(curb1.data)
        b1_moves = choose_move(curb1, b1_role, b1_N, legal, step < rand)
        mcts_moves = choose_move(curmcts, mcts_role, mcts_N, legal,
                                 step < rand)
        taken_moves = dict(
            list(zip(b1_role, b1_moves)) + list(zip(mcts_role, mcts_moves)))
        moves = tuple(taken_moves[role] for role in propnet.roles)
        curb1 = curb1.get_or_make_child(moves)
        curmcts = curmcts.get_or_make_child(moves)
        print('Moves were:')
        for move in propnet.legal:
            if move.id in moves and move.move_gdl.strip() != 'noop':
                print(move.move_role, move.move_gdl)
                if 'drop' in move.move_gdl:
                    col = int(move.move_gdl.split()[2]) - 1
                    for i in range(len(board)):
                        if board[i][col] == '.':
                            board[i][col] = move.move_role[0]
                            break
        if curb1.terminal:
            print(*(''.join(b) for b in board[::-1]), sep='\n')
            break
    print('Results:', curb1.scores)
    return (sum(curb1.scores[role] for role in b1_role),
            sum(curb1.scores[role] for role in mcts_role))
Exemplo n.º 2
0
 def test_add_child_idempotency(self):
     root = MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     current_children = copy.copy(root.children)
     child2 = root.maybe_add_child(17)
     self.assertEqual(child, child2)
     self.assertEqual(current_children, root.children)
Exemplo n.º 3
0
 def test_add_child_idempotency(self):
     root = MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     current_children = copy.copy(root.children)
     child2 = root.maybe_add_child(17)
     self.assertEqual(child, child2)
     self.assertEqual(current_children, root.children)
Exemplo n.º 4
0
def play(agentBlack, agentWhite, vizualize=False):
    state = game.State.init()
    agents = [agentBlack, agentWhite]
    mctsNodes = [MCTSNode(state)] * 2 if agentBlack is agentWhite else [
        MCTSNode(state), MCTSNode(state)
    ]

    experience = []
    end = False
    i = 0
    while not end:
        turn = i % 2
        agent = agents[turn]
        mctsNode = mctsNodes[turn]
        action = agent.get_action(mctsNode)
        mcts_policy = agent.get_mcts_policy(mctsNode)
        mctsNodes[0] = mctsNodes[0].next(action)
        mctsNodes[1] = mctsNodes[1].next(action)

        experience.append([state, mcts_policy])
        state = game.transition(state, action)
        if vizualize:
            print_board(state)
        end = state.isEnd
        i += 1
    z = state.endResult
    return experience, z
Exemplo n.º 5
0
 def test_add_child(self):
     root = MCTSNode(utils_test.BOARD_SIZE,
                     go.Position(utils_test.BOARD_SIZE))
     child = root.maybe_add_child(17)
     self.assertIn(17, root.children)
     self.assertEqual(child.parent, root)
     self.assertEqual(child.fmove, 17)
Exemplo n.º 6
0
 def initialize_game(self, position=None):
     if position is None:
         position = go.Position()
     self.root = MCTSNode(position)
     self.result = 0
     self.comments = []
     self.searches_pi = []
     self.qs = []
Exemplo n.º 7
0
    def test_select_leaf(self):
        probs = np.array([.02] * (go.N * go.N + 1))
        probs[kgs_to_flat('D9')] = 0.4
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)

        self.assertEqual(root.position.to_play, go.WHITE)
        self.assertEqual(root.select_leaf(), root.children[kgs_to_flat('D9')])
 def initialize_game(self, position=None):
   if position is None:
     position = go.Position(self.board_size)
   self.root = MCTSNode(self.board_size, position)
   self.result = 0
   self.result_string = None
   self.comments = []
   self.searches_pi = []
   self.qs = []
Exemplo n.º 9
0
    def test_select_leaf(self):
        flattened = coords.to_flat(coords.from_kgs('D9'))
        probs = np.array([.02] * (go.N * go.N + 1))
        probs[flattened] = 0.4
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)

        self.assertEqual(root.position.to_play, go.WHITE)
        self.assertEqual(root.select_leaf(), root.children[flattened])
Exemplo n.º 10
0
 def play(self, endtime):
     root = MCTSNode(self.propnet)
     for i in range(500):
         simulation(root)
     root.print_node()
     best, choice = -1, None
     for i, c in root.move_counts[self.role].items():
         if c > best:
             best, choice = c, i
     move = self.propnet.id_to_move[choice].move_gdl
     print('Made move', move)
     return move
Exemplo n.º 11
0
    def test_select_leaf(self):
        flattened = coords.to_flat(
            utils_test.BOARD_SIZE, coords.from_kgs(utils_test.BOARD_SIZE,
                                                   'D9'))
        probs = np.array([.02] *
                         (utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1))
        probs[flattened] = 0.4
        root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)

        self.assertEqual(root.position.to_play, go.WHITE)
        self.assertEqual(root.select_leaf(), root.children[flattened])
Exemplo n.º 12
0
 def test_do_not_explore_past_finish(self):
     probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32)
     root = MCTSNode(go.Position())
     root.select_leaf().incorporate_results(probs, 0, root)
     first_pass = root.maybe_add_child(coords.to_flat(None))
     first_pass.incorporate_results(probs, 0, root)
     second_pass = first_pass.maybe_add_child(coords.to_flat(None))
     with self.assertRaises(AssertionError):
         second_pass.incorporate_results(probs, 0, root)
     node_to_explore = second_pass.select_leaf()
     # should just stop exploring at the end position.
     self.assertEqual(node_to_explore, second_pass)
Exemplo n.º 13
0
 def test_do_not_explore_past_finish(self):
     probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32)
     root = MCTSNode(go.Position())
     root.select_leaf().incorporate_results(probs, 0, root)
     first_pass = root.maybe_add_child(coords.flatten_coords(None))
     first_pass.incorporate_results(probs, 0, root)
     second_pass = first_pass.maybe_add_child(coords.flatten_coords(None))
     with self.assertRaises(AssertionError):
         second_pass.incorporate_results(probs, 0, root)
     node_to_explore = second_pass.select_leaf()
     # should just stop exploring at the end position.
     self.assertEqual(node_to_explore, second_pass)
Exemplo n.º 14
0
 def test_dont_pick_unexpanded_child(self):
     probs = np.array([0.001] * (go.N * go.N + 1))
     # make one move really likely so that tree search goes down that path twice
     # even with a virtual loss
     probs[17] = 0.999
     root = MCTSNode(go.Position())
     root.incorporate_results(probs, 0, root)
     leaf1 = root.select_leaf()
     self.assertEqual(leaf1.fmove, 17)
     leaf1.add_virtual_loss(up_to=root)
     # the second select_leaf pick should return the same thing, since the child
     # hasn't yet been sent to neural net for eval + result incorporation
     leaf2 = root.select_leaf()
     self.assertIs(leaf1, leaf2)
Exemplo n.º 15
0
 def test_action_flipping(self):
     np.random.seed(1)
     probs = np.array([.02] * (go.N * go.N + 1))
     probs = probs + np.random.random([go.N * go.N + 1]) * 0.001
     black_root = MCTSNode(go.Position())
     white_root = MCTSNode(go.Position(to_play=go.WHITE))
     black_root.select_leaf().incorporate_results(probs, 0, black_root)
     white_root.select_leaf().incorporate_results(probs, 0, white_root)
     # No matter who is to play, when we know nothing else, the priors
     # should be respected, and the same move should be picked
     black_leaf = black_root.select_leaf()
     white_leaf = white_root.select_leaf()
     self.assertEqual(black_leaf.fmove, white_leaf.fmove)
     self.assertEqualNPArray(black_root.child_action_score,
                             white_root.child_action_score)
Exemplo n.º 16
0
 def initialize_game(self, position=None):
     if position is None:
         position = go.Position()
     self.root = MCTSNode(position)
     self.result = 0
     self.result_string = None
     self.comments = []
     self.searches_pi = []
     self.qs = []
Exemplo n.º 17
0
def play_game(model_first, model_second):
    config_first, config_second = model_first.config, model_second.config

    def get_eval_fn(model):
        def eval_fn(state):
            with torch.no_grad():
                v, p = model.fit_batch((np.array([state]), ), train=False)
                return v, p[0]

        return eval_fn

    eval_first, eval_second = map(get_eval_fn, [model_first, model_second])

    set_config(config_first)
    start_state = get_start_state()
    curr = MCTSNode(start_state, evaluator=eval_first)
    next = MCTSNode(start_state, evaluator=eval_second)
    config, next_config = config_first, config_second

    info = []
    for _ in RangeProgress(0, config_first.board_dim**2, desc='Moves'):
        set_config(config)

        start = time()
        if config.eval_mcts_iterations == 0:
            score = curr.p
        else:
            for _ in RangeProgress(0, config.eval_mcts_iterations,
                                   desc='MCTS'):
                curr.select()
            score = curr.N
        move = np.unravel_index(score.argmax(), score.shape)

        info.append(
            dict(state=curr.state,
                 curr_p=curr.p,
                 curr_v=curr.value,
                 curr_W=curr.W,
                 curr_N=curr.N,
                 next_p=next.p,
                 next_v=next.value,
                 move=move,
                 time=time() - start))

        next, curr = curr.step(move), next.step(move)
        config, next_config = next_config, config
        if curr.terminal:
            break

    merged_info = {k: [info_i[k] for info_i in info] for k in info[0].keys()}
    merged_info['state'].append(curr.state)
    merged_info['curr_v'].append(-1)
    merged_info['next_v'].append(-1)
    return {k: np.array(v) for k, v in merged_info.items()}
Exemplo n.º 18
0
  def test_backup_incorporate_results(self):
    probs = np.array([.02] * (
        utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1))
    root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE)
    root.select_leaf().incorporate_results(probs, 0, root)

    leaf = root.select_leaf()
    leaf.incorporate_results(probs, -1, root)  # white wins!

    # Root was visited twice: first at the root, then at this child.
    self.assertEqual(root.N, 2)
    # Root has 0 as a prior and two visits with value 0, -1
    self.assertAlmostEqual(root.Q, -1/3)  # average of 0, 0, -1
    # Leaf should have one visit
    self.assertEqual(root.child_N[leaf.fmove], 1)
    self.assertEqual(leaf.N, 1)
    # And that leaf's value had its parent's Q (0) as a prior, so the Q
    # should now be the average of 0, -1
    self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5)
    self.assertAlmostEqual(leaf.Q, -0.5)

    # We're assuming that select_leaf() returns a leaf like:
    #   root
    #     \
    #     leaf
    #       \
    #       leaf2
    # which happens in this test because root is W to play and leaf was a W win.
    self.assertEqual(root.position.to_play, go.WHITE)
    leaf2 = root.select_leaf()
    leaf2.incorporate_results(probs, -0.2, root)  # another white semi-win
    self.assertEqual(root.N, 3)
    # average of 0, 0, -1, -0.2
    self.assertAlmostEqual(root.Q, -0.3)

    self.assertEqual(leaf.N, 2)
    self.assertEqual(leaf2.N, 1)
    # average of 0, -1, -0.2
    self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove])
    self.assertAlmostEqual(leaf.Q, -0.4)
    # average of -1, -0.2
    self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6)
    self.assertAlmostEqual(leaf2.Q, -0.6)
Exemplo n.º 19
0
 def test_action_flipping(self):
     np.random.seed(1)
     probs = np.array([.02] * (go.N * go.N + 1))
     probs = probs + np.random.random([go.N * go.N + 1]) * 0.001
     black_root = MCTSNode(go.Position())
     white_root = MCTSNode(go.Position(to_play=go.WHITE))
     black_root.select_leaf().incorporate_results(probs, 0, black_root)
     white_root.select_leaf().incorporate_results(probs, 0, white_root)
     # No matter who is to play, when we know nothing else, the priors
     # should be respected, and the same move should be picked
     black_leaf = black_root.select_leaf()
     white_leaf = white_root.select_leaf()
     self.assertEqual(black_leaf.fmove, white_leaf.fmove)
     self.assertEqualNPArray(
         black_root.child_action_score, white_root.child_action_score)
Exemplo n.º 20
0
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(leaf.fmove, 1)

        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(leaf.fmove, 1)
Exemplo n.º 21
0
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(leaf.fmove, 1)

        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(leaf.fmove, 1)
Exemplo n.º 22
0
 def test_dont_pick_unexpanded_child(self):
     probs = np.array([0.001] * (go.N * go.N + 1))
     # make one move really likely so that tree search goes down that path twice
     # even with a virtual loss
     probs[17] = 0.999
     root = MCTSNode(go.Position())
     root.incorporate_results(probs, 0, root)
     leaf1 = root.select_leaf()
     self.assertEqual(leaf1.fmove, 17)
     leaf1.add_virtual_loss(up_to=root)
     # the second select_leaf pick should return the same thing, since the child
     # hasn't yet been sent to neural net for eval + result incorporation
     leaf2 = root.select_leaf()
     self.assertIs(leaf1, leaf2)
Exemplo n.º 23
0
    def test_backup_incorporate_results(self):
        probs = np.array([.02] *
                         (utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1))
        root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)

        leaf = root.select_leaf()
        leaf.incorporate_results(probs, -1, root)  # white wins!

        # Root was visited twice: first at the root, then at this child.
        self.assertEqual(root.N, 2)
        # Root has 0 as a prior and two visits with value 0, -1
        self.assertAlmostEqual(root.Q, -1 / 3)  # average of 0, 0, -1
        # Leaf should have one visit
        self.assertEqual(root.child_N[leaf.fmove], 1)
        self.assertEqual(leaf.N, 1)
        # And that leaf's value had its parent's Q (0) as a prior, so the Q
        # should now be the average of 0, -1
        self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5)
        self.assertAlmostEqual(leaf.Q, -0.5)

        # We're assuming that select_leaf() returns a leaf like:
        #   root
        #     \
        #     leaf
        #       \
        #       leaf2
        # which happens in this test because root is W to play and leaf was a W win.
        self.assertEqual(root.position.to_play, go.WHITE)
        leaf2 = root.select_leaf()
        leaf2.incorporate_results(probs, -0.2, root)  # another white semi-win
        self.assertEqual(root.N, 3)
        # average of 0, 0, -1, -0.2
        self.assertAlmostEqual(root.Q, -0.3)

        self.assertEqual(leaf.N, 2)
        self.assertEqual(leaf2.N, 1)
        # average of 0, -1, -0.2
        self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove])
        self.assertAlmostEqual(leaf.Q, -0.4)
        # average of -1, -0.2
        self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6)
        self.assertAlmostEqual(leaf2.Q, -0.6)
Exemplo n.º 24
0
    def select_move(self, game_state):
        possible_moves = game_state.legal_moves(self)
        nodes = []
        cur_round = 0

        if self.DEBUG:
            print(
                '******************************************************************************************'
            )
            print(
                'MCTSAgent {0} thinking about move on world tick {1}.'.format(
                    self.name, game_state.world_tick))
            print('...There are {0} moves that can be explored in {1} rounds.'.
                  format(len(possible_moves), self.num_rounds))

        # Init all possible moves that can be explored
        for possible_move in possible_moves:
            nodes.append(MCTSNode(possible_move))

        for cur_round in range(1, self.num_rounds + 1):
            simulation_node = self.select_child(nodes)
            agent_score = self.simulate_random_game(game_state,
                                                    simulation_node.move, 50)
            if self.DEBUG:
                print('......Explored move {0} which scored {1}.'.format(
                    simulation_node.move, agent_score))
            simulation_node.num_rollouts += 1
            simulation_node.total_score += agent_score

        # Now we need to pick from the best moves (in the case of a tie, we just choose a random one)
        best_score = -99999999
        best_moves = []

        for node in nodes:
            if node.num_rollouts > 0:  # Only choose a move if we've done a simulation on it.
                if (not best_moves) or node.avg_score() > best_score:
                    best_moves = [node.move]
                    best_score = node.avg_score()
                elif node.avg_score(
                ) == best_score:  # This is as good as our previous best move
                    best_moves.append(node.move)

        if self.DUMP:
            print(
                '******************************************************************************************'
            )
            unique_rollouts = 0
            print('................. DUMP...................')
            print(
                'MCTSAgent {0} thinking about move on world tick {1}.'.format(
                    self.name, game_state.world_tick))
            for node in nodes:
                print(
                    '...Action {0} was explored {1} times with avg score of {2}'
                    .format(node.move, node.num_rollouts, node.avg_score()))
                if node.num_rollouts > 0:
                    unique_rollouts += 1
            print('.........................................')
            print('')
            print(
                'MCTSAgent {0} finished thinking about move on world tick {1}.'
                .format(self.name, game_state.world_tick))
            print(
                '   -> Found {0} possible moves within {1} unique rollouts, having a score of {2}'
                .format(len(best_moves), unique_rollouts, best_score))
            if len(best_moves) == 1:
                print('   -> Best move was {0} for a score of {1}.'.format(
                    best_moves[0], best_score))
                print('   -> Ships remaining: {0}'.format(
                    game_state.world.get_ship_count()))
            else:
                print('   -> Multiple best moves: {0}'.format(len(best_moves)))
                print('   -> Ships remaining: {0}'.format(
                    game_state.world.get_ship_count()))
            print(
                '******************************************************************************************'
            )
            print('')

        if len(best_moves) == 0:
            return None

        # For variety, randomly select among all equally good moves.
        return random.choice(best_moves)
Exemplo n.º 25
0
 def test_add_child(self):
   root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE))
   child = root.maybe_add_child(17)
   self.assertIn(17, root.children)
   self.assertEqual(child.parent, root)
   self.assertEqual(child.fmove, 17)
Exemplo n.º 26
0
from collections import deque

from atari.pytorch.NNet import NNetWrapper
from atari.AtariGame import AtariGame
from mcts import MCTSNode
from GameSession import GameSession

game_i = AtariGame()
view = game_i.getInitBoard()
nnet_checkpoint = NNetWrapper(game_i)
mtsc_root = MCTSNode(view, action_size=game_i.getActionSize())

nnet_checkpoint.load_checkpoint()

gameSession = GameSession(nnet_checkpoint, game_i, mtsc_root)
for x in range(1500):
    gameSession.execute_episode()
    print(gameSession.max_score)

train_examples = []
for y in range(50):
    train_examples += gameSession.play_game()

#gameSession.play_game(render=True)

nnet_checkpoint.train(train_examples)

nnet_checkpoint.save_checkpoint()
Exemplo n.º 27
0
class MCTSPlayerMixin:
    # If `simulations_per_move` is nonzero, it will perform that many reads
    # before playing. Otherwise, it uses `seconds_per_move` of wall time.
    def __init__(self, network, seconds_per_move=5, simulations_per_move=0,
                 resign_threshold=-0.90, verbosity=0, two_player_mode=False,
                 num_parallel=8):
        self.network = network
        self.seconds_per_move = seconds_per_move
        self.simulations_per_move = simulations_per_move
        self.verbosity = verbosity
        self.two_player_mode = two_player_mode
        if two_player_mode:
            self.temp_threshold = -1
        else:
            self.temp_threshold = TEMPERATURE_CUTOFF
        self.num_parallel = num_parallel
        self.qs = []
        self.comments = []
        self.searches_pi = []
        self.root = None
        self.result = 0
        self.result_string = None
        self.resign_threshold = -abs(resign_threshold)
        super().__init__()

    def initialize_game(self, position=None):
        if position is None:
            position = go.Position()
        self.root = MCTSNode(position)
        self.result = 0
        self.result_string = None
        self.comments = []
        self.searches_pi = []
        self.qs = []

    def suggest_move(self, position):
        ''' Used for playing a single game.
        For parallel play, use initialize_move, select_leaf,
        incorporate_results, and pick_move
        '''
        start = time.time()

        if self.simulations_per_move == 0:
            while time.time() - start < self.seconds_per_move:
                self.tree_search()
        else:
            current_readouts = self.root.N
            while self.root.N < current_readouts + self.simulations_per_move:
                self.tree_search()
            if self.verbosity > 0:
                print("%d: Searched %d times in %s seconds\n\n" % (
                    position.n, self.simulations_per_move, time.time() - start), file=sys.stderr)

        # print some stats on anything with probability > 1%
        if self.verbosity > 2:
            print(self.root.describe(), file=sys.stderr)
            print('\n\n', file=sys.stderr)
        if self.verbosity > 3:
            print(self.root.position, file=sys.stderr)

        return self.pick_move()

    def play_move(self, c):
        '''
        Notable side effects:
          - finalizes the probability distribution according to
          this roots visit counts into the class' running tally, `searches_pi`
          - Makes the node associated with this move the root, for future
            `inject_noise` calls.
        '''
        if not self.two_player_mode:
            self.searches_pi.append(
                self.root.children_as_pi(self.root.position.n < self.temp_threshold))
        self.qs.append(self.root.Q)  # Save our resulting Q.
        self.comments.append(self.root.describe())
        try:
            self.root = self.root.maybe_add_child(coords.to_flat(c))
        except go.IllegalMove:
            print("Illegal move")
            if not self.two_player_mode:
                self.searches_pi.pop()
            self.qs.pop()
            self.comments.pop()
            return False
        self.position = self.root.position  # for showboard
        del self.root.parent.children
        return True  # GTP requires positive result.

    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n > self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-1]
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.from_flat(fcoord)

    def tree_search(self, num_parallel=None):
        if num_parallel is None:
            num_parallel = self.num_parallel
        leaves = []
        failsafe = 0
        while len(leaves) < num_parallel and failsafe < num_parallel * 2:
            failsafe += 1
            leaf = self.root.select_leaf()
            if self.verbosity >= 4:
                print(self.show_path_to_root(leaf))
            # if game is over, override the value estimate with the true score
            if leaf.is_done():
                value = 1 if leaf.position.score() > 0 else -1
                leaf.backup_value(value, up_to=self.root)
                continue
            leaf.add_virtual_loss(up_to=self.root)
            leaves.append(leaf)
        if leaves:
            move_probs, values = self.network.run_many(
                [leaf.position for leaf in leaves])
            for leaf, move_prob, value in zip(leaves, move_probs, values):
                leaf.revert_virtual_loss(up_to=self.root)
                leaf.incorporate_results(move_prob, value, up_to=self.root)

    def show_path_to_root(self, node):
        pos = node.position
        diff = node.position.n - self.root.position.n
        if len(pos.recent) == 0:
            return

        def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w',
                                             coords.to_kgs(move.move))
        path = " ".join(fmt(move) for move in pos.recent[-diff:])
        if node.position.n >= MAX_DEPTH:
            path += " (depth cutoff reached) %0.1f" % node.position.score()
        elif node.position.is_game_over():
            path += " (game over) %0.1f" % node.position.score()
        return path

    def should_resign(self):
        '''Returns true if the player resigned.  No further moves should be played'''
        return self.root.Q_perspective < self.resign_threshold

    def set_result(self, winner, was_resign):
        self.result = winner
        if was_resign:
            string = "B+R" if winner == go.BLACK else "W+R"
        else:
            string = self.root.position.result_string()
        self.result_string = string

    def to_sgf(self, use_comments=True):
        assert self.result_string is not None
        pos = self.root.position
        if use_comments:
            comments = self.comments or ['No comments.']
            comments[0] = ("Resign Threshold: %0.3f\n" %
                           self.resign_threshold) + comments[0]
        else:
            comments = []
        return sgf_wrapper.make_sgf(pos.recent, self.result_string,
                                    white_name=os.path.basename(
                                        self.network.save_file) or "Unknown",
                                    black_name=os.path.basename(
                                        self.network.save_file) or "Unknown",
                                    comments=comments)

    def is_done(self):
        return self.result != 0 or self.root.is_done()

    def extract_data(self):
        assert len(self.searches_pi) == self.root.position.n
        assert self.result != 0
        for pwc, pi in zip(go.replay_position(self.root.position, self.result),
                           self.searches_pi):
            yield pwc.position, pi, pwc.result

    def chat(self, msg_type, sender, text):
        default_response = "Supported commands are 'winrate', 'nextplay', 'fortune', and 'help'."
        if self.root is None or self.root.position.n == 0:
            return "I'm not playing right now.  " + default_response

        if 'winrate' in text.lower():
            wr = (abs(self.root.Q) + 1.0) / 2.0
            color = "Black" if self.root.Q > 0 else "White"
            return "{:s} {:.2f}%".format(color, wr * 100.0)
        elif 'nextplay' in text.lower():
            return "I'm thinking... " + self.root.most_visited_path()
        elif 'fortune' in text.lower():
            return "You're feeling lucky!"
        elif 'help' in text.lower():
            return "I can't help much with go -- try ladders!  Otherwise: " + default_response
        else:
            return default_response
Exemplo n.º 28
0
class MCTSPlayerMixin:
    # If 'simulations_per_move' is nonzero, it will perform that many reads before playing.
    # Otherwise, it uses 'seconds_per_move' of wall time'
    def __init__(self, network, seconds_per_move=5, simulations_per_move=0,
                 resign_threshold=-0.90, verbosity=0, two_player_mode=False,
                 num_parallel=8):
        self.network = network
        self.seconds_per_move = seconds_per_move
        self.simulations_per_move = simulations_per_move
        self.verbosity = verbosity
        self.two_player_mode = two_player_mode
        if two_player_mode:
            self.temp_threshold = -1
        else:
            self.temp_threshold = TEMPERATURE_CUTOFF
        self.num_parallel = num_parallel
        self.qs = []
        self.comments = []
        self.searches_pi = []
        self.root = None
        self.result = 0
        self.result_string = None
        self.resign_threshold = -abs(resign_threshold)
        super().__init__()

    def initialize_game(self, position=None):
        if position is None:
            position = go.Position()
        self.root = MCTSNode(position)
        self.result = 0
        self.result_string = None
        self.comments = []
        self.searches_pi = []
        self.qs = []

    def suggest_move(self, position):
        ''' Used for playing a single game.
        For parallel play, use initialize_move, select_leaf,
        incorporate_results, and pick_move
        '''
        start = time.time()

        if self.simulations_per_move == 0:
            while time.time() - start < self.seconds_per_move:
                self.tree_search()
        else:
            current_readouts = self.root.N
            while self.root.N < current_readouts + self.simulations_per_move:
                self.tree_search()
            if self.verbosity > 0:
                print("%d: Searched %d times in %s seconds\n\n" % (
                    position.n, self.simulations_per_move, time.time() - start), file=sys.stderr)

        # print some stats on anything with probability > 1%
        if self.verbosity > 2:
            print(self.root.describe(), file=sys.stderr)
            print('\n\n', file=sys.stderr)
        if self.verbosity > 3:
            print(self.root.position, file=sys.stderr)

        return self.pick_move()

    def play_move(self, c):
        '''
        Notable side effects:
          - finalizes the probability distribution according to
          this roots visit counts into the class' running tally, `searches_pi`
          - Makes the node associated with this move the root, for future
            `inject_noise` calls.
        '''
        if not self.two_player_mode:
            self.searches_pi.append(
                self.root.children_as_pi(self.root.position.n < self.temp_threshold))
        self.qs.append(self.root.Q)  # Save our resulting Q.
        self.comments.append(self.root.describe())
        self.root = self.root.maybe_add_child(coords.to_flat(c))
        self.position = self.root.position  # for showboard
        del self.root.parent.children
        return True  # GTP requires positive result.

    def pick_move(self):
        '''Picks a move to play, based on MCTS readout statistics.

        Highest N is most robust indicator. In the early stage of the game, pick
        a move weighted by visit count; later on, pick the absolute max.'''
        if self.root.position.n > self.temp_threshold:
            fcoord = np.argmax(self.root.child_N)
        else:
            cdf = self.root.child_N.cumsum()
            cdf /= cdf[-1]
            selection = random.random()
            fcoord = cdf.searchsorted(selection)
            assert self.root.child_N[fcoord] != 0
        return coords.from_flat(fcoord)

    def tree_search(self, num_parallel=None):
        if num_parallel is None:
            num_parallel = self.num_parallel
        leaves = []
        failsafe = 0
        while len(leaves) < num_parallel and failsafe < num_parallel * 2:
            failsafe += 1
            leaf = self.root.select_leaf()
            if self.verbosity >= 4:
                print(self.show_path_to_root(leaf))
            # if game is over, override the value estimate with the true score
            if leaf.is_done():
                value = 1 if leaf.position.score() > 0 else -1
                leaf.backup_value(value, up_to=self.root)
                continue
            leaf.add_virtual_loss(up_to=self.root)
            leaves.append(leaf)
        if leaves:
            move_probs, values = self.network.run_many(
                [leaf.position for leaf in leaves])
            for leaf, move_prob, value in zip(leaves, move_probs, values):
                leaf.revert_virtual_loss(up_to=self.root)
                leaf.incorporate_results(move_prob, value, up_to=self.root)

    def show_path_to_root(self, node):
        pos = node.position
        diff = node.position.n - self.root.position.n
        if len(pos.recent) == 0:
            return

        def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w',
                                             coords.to_kgs(move.move))
        path = " ".join(fmt(move) for move in pos.recent[-diff:])
        if node.position.n >= MAX_DEPTH:
            path += " (depth cutoff reached) %0.1f" % node.position.score()
        elif node.position.is_game_over():
            path += " (game over) %0.1f" % node.position.score()
        return path

    def should_resign(self):
        '''Returns true if the player resigned.  No further moves should be played'''
        return self.root.Q_perspective < self.resign_threshold

    def set_result(self, winner, was_resign):
        self.result = winner
        if was_resign:
            string = "B+R" if winner == go.BLACK else "W+R"
        else:
            string = self.root.position.result_string()
        self.result_string = string

    def to_sgf(self, use_comments=True):
        assert self.result_string is not None
        pos = self.root.position
        if use_comments:
            comments = self.comments or ['No comments.']
            comments[0] = ("Resign Threshold: %0.3f\n" %
                                    self.resign_threshold) + comments[0]
        else:
            comments = []
        return sgf_wrapper.make_sgf(pos.recent, self.result_string,
                                    white_name=self.network.name or "Unknown",
                                    black_name=self.network.name or "Unknown",
                                    comments=comments) 


    def extract_data(self):
        assert len(self.searches_pi) == self.root.position.n
        assert self.result != 0
        for pwc, pi in zip(go.replay_position(self.root.position, self.result),
                           self.searches_pi):
            yield pwc.position, pi, pwc.result

    def chat(self, msg_type, sender, text):
        default_response = "Supported commands are 'winrate', 'nextplay', 'fortune', and 'help'."
        if self.root is None or self.root.position.n == 0:
            return "I'm not playing right now.  " + default_response

        if 'winrate' in text.lower():
            wr = (abs(self.root.Q) + 1.0) / 2.0
            color = "Black" if self.root.Q > 0 else "White"
            return "{:s} {:.2f}%".format(color, wr * 100.0)
        elif 'nextplay' in text.lower():
            return "I'm thinking... " + self.root.most_visited_path()
        elif 'fortune' in text.lower():
            return "You're feeling lucky!"
        elif 'help' in text.lower():
            return "I can't help much with go -- try ladders!  Otherwise: " + default_response
        else:
            return default_response
Exemplo n.º 29
0
#!/usr/bin/env python3.7

import sys

sys.path.insert(1, '/Users/Cameron/Desktop/transfer_ggp')

from model import Model

from mcts import MCTSNode, simulation
from propnet.propnet import load_propnet
import time

start = time.time()

# propnet = load_propnet('connect4match1')
# propnet = load_propnet('tictactoe1')
data, propnet = load_propnet('connectFour')

root = MCTSNode(propnet, data)
# exit(0)
for i in range(400):
    simulation(root)
root.print_node()

print('Took', time.time() - start, 'seconds')
Exemplo n.º 30
0
 def set_mcts(self, state):
     self.head = MCTSNode(state, evaluator=evaluator)
Exemplo n.º 31
0
 def test_add_child(self):
     root = MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     self.assertIn(17, root.children)
     self.assertEqual(child.parent, root)
     self.assertEqual(child.fmove, 17)
Exemplo n.º 32
0
 def test_add_child(self):
     root = MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     self.assertIn(17, root.children)
     self.assertEqual(child.parent, root)
     self.assertEqual(child.fmove, 17)
Exemplo n.º 33
0
def _root():
    state = TicTacToeState([None] * 9, True, None, None)
    return MCTSNode(state)