Exemplo n.º 1
0
def playMCTSgame():

    ttt = tictactoe.TicTacToeGameState()
    root = mcts.MCTSNode(ttt)
    node = root
    while ttt.winner is None:
        move = mcts.mcts(node, 10000)
        ttt.executeMove(move)
        node = mcts.MCTSNode(ttt)
        print ttt
Exemplo n.º 2
0
def playMCTSgame():

    cf = connectfour.ConnectFourGameState()
    root = mcts.MCTSNode(cf)
    node = root

    while cf.winner is None:
        move = mcts.mcts(node, 20000)
        cf.executeMove(move)
        node = mcts.MCTSNode(cf)
        print cf
        print "Move:", move

    print "Winner is player", cf.winner
Exemplo n.º 3
0
 def test_action_flipping(self):
     np.random.seed(1)
     probs = np.array([.02] * (go.N * go.N + 1))
     probs = probs + np.random.random([go.N * go.N + 1]) * 0.001
     black_root = mcts.MCTSNode(go.Position())
     white_root = mcts.MCTSNode(go.Position(to_play=go.WHITE))
     black_root.select_leaf().incorporate_results(probs, 0, black_root)
     white_root.select_leaf().incorporate_results(probs, 0, white_root)
     # No matter who is to play, when we know nothing else, the priors
     # should be respected, and the same move should be picked
     black_leaf = black_root.select_leaf()
     white_leaf = white_root.select_leaf()
     self.assertEqual(black_leaf.fmove, white_leaf.fmove)
     self.assertEqualNPArray(black_root.child_action_score,
                             white_root.child_action_score)
Exemplo n.º 4
0
 def test_add_child_idempotency(self):
     root = mcts.MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     current_children = copy.copy(root.children)
     child2 = root.maybe_add_child(17)
     self.assertEqual(child, child2)
     self.assertEqual(current_children, root.children)
Exemplo n.º 5
0
 def initialize_game(self, position=None):
     if position is None:
         position = go.Position()
     self.root = mcts.MCTSNode(position)
     self.result = 0
     self.result_string = None
     self.comments = []
     self.searches_pi = []
Exemplo n.º 6
0
    def test_select_leaf(self):
        flattened = coords.to_flat(coords.from_kgs('D9'))
        probs = np.array([.02] * (go.N * go.N + 1))
        probs[flattened] = 0.4
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, 0, root)

        self.assertEqual(root.position.to_play, go.WHITE)
        self.assertEqual(root.select_leaf(), root.children[flattened])
Exemplo n.º 7
0
def playMCTSgame():

    q = quoridor.QuoridorGameState()
    root = mcts.MCTSNode(q)
    node = root

    while q.winner is None:
        start = time.clock()
        move = mcts.mcts(node, 10000)
        end = time.clock()
        print "Move time: ", str(end - start)
        q.executeMove(move)
        node = mcts.MCTSNode(q)

        print "Player", str(3 % q.currentPlayer), "Move:", move
        print q

    print "Winner is player", q.winner
Exemplo n.º 8
0
 def test_do_not_explore_past_finish(self):
     probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32)
     root = mcts.MCTSNode(go.Position())
     root.select_leaf().incorporate_results(probs, 0, root)
     first_pass = root.maybe_add_child(coords.to_flat(None))
     first_pass.incorporate_results(probs, 0, root)
     second_pass = first_pass.maybe_add_child(coords.to_flat(None))
     with self.assertRaises(AssertionError):
         second_pass.incorporate_results(probs, 0, root)
     node_to_explore = second_pass.select_leaf()
     # should just stop exploring at the end position.
     self.assertEqual(second_pass, node_to_explore)
Exemplo n.º 9
0
    def test_normalize_policy(self):
        # sum of probs > 1.0
        probs = np.array([2.0] * (go.N * go.N + 1))

        root = mcts.MCTSNode(TEST_POSITION)
        root.incorporate_results(probs, 0, root)
        root.N = 0

        # Policy sums to 1.0, only legal moves have non-zero values.
        self.assertAlmostEqual(1.0, sum(root.child_prior))
        self.assertEqual(6, np.count_nonzero(root.child_prior))
        self.assertEqual(0, sum(root.child_prior * root.illegal_moves))
Exemplo n.º 10
0
 def initialize_game(self, board=None):
     if board is None:
         board = hex.Hex()
     self.board = board
     self.root = mcts.MCTSNode(board, cpuct=self.cpuct)
     self.result = 0
     self.result_string = None
     self.comments = []
     self.searches_pi = []
     self.qs = []
     first_node = self.root.select_leaf()
     prob, val = self.net.run(first_node.board)
     first_node.incorporate_results(prob, val, first_node)
Exemplo n.º 11
0
 def test_dont_pick_unexpanded_child(self):
     probs = np.array([0.001] * (go.N * go.N + 1))
     # make one move really likely so that tree search goes down that path twice
     # even with a virtual loss
     probs[17] = 0.999
     root = mcts.MCTSNode(go.Position())
     root.incorporate_results(probs, 0, root)
     leaf1 = root.select_leaf()
     self.assertEqual(17, leaf1.fmove)
     leaf1.add_virtual_loss(up_to=root)
     # the second select_leaf pick should return the same thing, since the child
     # hasn't yet been sent to neural net for eval + result incorporation
     leaf2 = root.select_leaf()
     self.assertIs(leaf1, leaf2)
Exemplo n.º 12
0
    def test_upper_bound_confidence(self):
        probs = np.array([.02] * (go.N * go.N + 1))
        root = mcts.MCTSNode(go.Position())
        leaf = root.select_leaf()
        self.assertEqual(root, leaf)
        leaf.incorporate_results(probs, 0.5, root)

        # 0.02 are normalized to 1/82
        self.assertAlmostEqual(root.child_prior[0], 1 / 82)
        self.assertAlmostEqual(root.child_prior[1], 1 / 82)
        puct_policy = lambda n: 2.0 * (math.log(
            (1.0 + n + FLAGS.c_puct_base) / FLAGS.c_puct_base) + FLAGS.
                                       c_puct_init) * 1 / 82
        self.assertEqual(root.N, 1)
        self.assertAlmostEqual(root.child_U[0],
                               puct_policy(root.N) * math.sqrt(1) / (1 + 0))

        leaf = root.select_leaf()
        self.assertNotEqual(root, leaf)

        # With the first child expanded.
        self.assertEqual(root.N, 1)
        self.assertAlmostEqual(root.child_U[0],
                               puct_policy(root.N) * math.sqrt(1) / (1 + 0))
        self.assertAlmostEqual(root.child_U[1],
                               puct_policy(root.N) * math.sqrt(1) / (1 + 0))

        leaf.add_virtual_loss(up_to=root)
        leaf2 = root.select_leaf()

        self.assertNotIn(leaf2, (root, leaf))

        leaf.revert_virtual_loss(up_to=root)
        leaf.incorporate_results(probs, 0.3, root)
        leaf2.incorporate_results(probs, 0.3, root)

        # With the 2nd child expanded.
        self.assertEqual(root.N, 3)
        self.assertAlmostEqual(root.child_U[0],
                               puct_policy(root.N) * math.sqrt(2) / (1 + 1))
        self.assertAlmostEqual(root.child_U[1],
                               puct_policy(root.N) * math.sqrt(2) / (1 + 1))
        self.assertAlmostEqual(root.child_U[2],
                               puct_policy(root.N) * math.sqrt(2) / (1 + 0))
Exemplo n.º 13
0
    def test_backup_incorporate_results(self):
        probs = np.array([.02] * (go.N * go.N + 1))
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.select_leaf().incorporate_results(probs, torch.tensor(0), root)

        leaf = root.select_leaf()
        leaf.incorporate_results(probs, torch.tensor(-1), root)  # white wins!

        # Root was visited twice: first at the root, then at this child.
        self.assertEqual(root.N, 2)
        # Root has 0 as a prior and two visits with value 0, -1
        self.assertAlmostEqual(-1 / 3, root.Q)  # average of 0, 0, -1
        # Leaf should have one visit
        self.assertEqual(1, root.child_N[leaf.fmove])
        self.assertEqual(1, leaf.N)
        # And that leaf's value had its parent's Q (0) as a prior, so the Q
        # should now be the average of 0, -1
        self.assertAlmostEqual(-0.5, root.child_Q[leaf.fmove])
        self.assertAlmostEqual(-0.5, leaf.Q)

        # We're assuming that select_leaf() returns a leaf like:
        #   root
        #     \
        #     leaf
        #       \
        #       leaf2
        # which happens in this test because root is W to play and leaf was a W win.
        self.assertEqual(go.WHITE, root.position.to_play)
        leaf2 = root.select_leaf()
        leaf2.incorporate_results(probs, torch.tensor(-0.2,
                                                      dtype=torch.float32),
                                  root)  # another white semi-win
        self.assertEqual(3, root.N)
        # average of 0, 0, -1, -0.2
        self.assertAlmostEqual(-0.3, root.Q)

        self.assertEqual(2, leaf.N)
        self.assertEqual(1, leaf2.N)
        # average of 0, -1, -0.2
        self.assertAlmostEqual(root.child_Q[leaf.fmove], leaf.Q)
        self.assertAlmostEqual(-0.4, leaf.Q)
        # average of -1, -0.2
        self.assertAlmostEqual(-0.6, leaf.child_Q[leaf2.fmove])
        self.assertAlmostEqual(-0.6, leaf2.Q)
Exemplo n.º 14
0
    def test_inject_noise_only_legal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        root = mcts.MCTSNode(TEST_POSITION)
        root.incorporate_results(probs, 0, root)
        root.N = 0

        uniform_policy = 1 / sum(root.illegal_moves == 0)
        expected_policy = uniform_policy * (1 - root.illegal_moves)

        self.assertTrue((root.child_prior == expected_policy).all())

        root.inject_noise()

        # 0.75/0.25 derived from default dirichlet_noise_weight.
        self.assertTrue((0.75 * expected_policy <= root.child_prior).all())
        self.assertTrue(
            (0.75 * expected_policy + 0.25 >= root.child_prior).all())
        # Policy sums to 1.0, only legal moves have non-zero values.
        self.assertAlmostEqual(1.0, sum(root.child_prior))
        self.assertEqual(0, sum(root.child_prior * root.illegal_moves))
Exemplo n.º 15
0
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = mcts.MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(1, leaf.fmove)

        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(1, leaf.fmove)
Exemplo n.º 16
0
def playAgainstMCTS():
    ttt = tictactoe.TicTacToeGameState()
    print ttt

    while True:

        move = int(input("Make your move: "))
        ttt.executeMove(move)

        if ttt.winner is not None:
            break

        node = mcts.MCTSNode(ttt)
        computerMove = mcts.mcts(node, 10000)
        ttt.executeMove(computerMove)
        print ttt

        if ttt.winner is not None:
            break

    print ttt
    print "Player", ttt.winner, "wins"
Exemplo n.º 17
0
def playAgainstMCTS():
    cf = connectfour.ConnectFourGameState()
    print cf

    while True:

        move = int(input("Make your move: "))
        cf.executeMove(move)

        if cf.winner is not None:
            break

        node = mcts.MCTSNode(cf)
        computerMove = mcts.mcts(node, 10000)
        cf.executeMove(computerMove)
        print cf

        if cf.winner is not None:
            break

    print cf
    print "Player", cf.winner, "wins"
Exemplo n.º 18
0
 def initialize_game(self, status=None):
     if status == None:
         status = go.GoStatus()
     self.root = mcts.MCTSNode(status)
     self.result = 0
Exemplo n.º 19
0
import mcts

mtcss = mcts.MCTSNode(None, action_size=3)
print(mtcss)

Exemplo n.º 20
0
 def test_add_child(self):
     root = mcts.MCTSNode(go.Position())
     child = root.maybe_add_child(17)
     self.assertIn(17, root.children)
     self.assertEqual(root, child.parent)
     self.assertEqual(17, child.fmove)
Exemplo n.º 21
0
 def initialize_game(self, position=None):
     if position is None:
         position = deepxor.Position()
     self.root = mcts.MCTSNode(position)