Exemplo n.º 1
0
 def test_dont_pick_unexpanded_child(self):
     probs = np.array([0.001] * (go.N * go.N + 1))
     # make one move really likely so that tree search goes down that path twice
     # even with a virtual loss
     probs[17] = 0.999
     root = MCTSNode(go.Position())
     root.incorporate_results(probs, 0, root)
     leaf1 = root.select_leaf()
     self.assertEqual(leaf1.fmove, 17)
     leaf1.add_virtual_loss(up_to=root)
     # the second select_leaf pick should return the same thing, since the child
     # hasn't yet been sent to neural net for eval + result incorporation
     leaf2 = root.select_leaf()
     self.assertIs(leaf1, leaf2)
Exemplo n.º 2
0
 def test_dont_pick_unexpanded_child(self):
     probs = np.array([0.001] * (go.N * go.N + 1))
     # make one move really likely so that tree search goes down that path twice
     # even with a virtual loss
     probs[17] = 0.999
     root = MCTSNode(go.Position())
     root.incorporate_results(probs, 0, root)
     leaf1 = root.select_leaf()
     self.assertEqual(leaf1.fmove, 17)
     leaf1.add_virtual_loss(up_to=root)
     # the second select_leaf pick should return the same thing, since the child
     # hasn't yet been sent to neural net for eval + result incorporation
     leaf2 = root.select_leaf()
     self.assertIs(leaf1, leaf2)
Exemplo n.º 3
0
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(leaf.fmove, 1)

        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(leaf.fmove, 1)
Exemplo n.º 4
0
    def test_never_select_illegal_moves(self):
        probs = np.array([0.02] * (go.N * go.N + 1))
        # let's say the NN were to accidentally put a high weight on an illegal move
        probs[1] = 0.99
        root = MCTSNode(SEND_TWO_RETURN_ONE)
        root.incorporate_results(probs, 0, root)
        # and let's say the root were visited a lot of times, which pumps up the
        # action score for unvisited moves...
        root.N = 100000
        root.child_N[root.position.all_legal_moves()] = 10000
        # this should not throw an error...
        leaf = root.select_leaf()
        # the returned leaf should not be the illegal move
        self.assertNotEqual(leaf.fmove, 1)

        # and even after injecting noise, we should still not select an illegal move
        for i in range(10):
            root.inject_noise()
            leaf = root.select_leaf()
            self.assertNotEqual(leaf.fmove, 1)