Exemplo n.º 1
0
def modelPredict(state, currentPlayer):
    mirrored = np.random.random() > 0.5
    if (mirrored):
        state = Utils.createMirrorState(state)

    # state = state.toNumpyState()
    convState = Utils.state2ConvState(state, currentPlayer)
    evaluation, policy = PREDICT_MODEL.predict(np.array([convState]))
    evaluation = evaluation[0][0]
    policy = policy[0]

    if (mirrored):
        return evaluation, policy[::-1]
    return evaluation, policy
Exemplo n.º 2
0
    def makeMove(self, state):
        global CURRENT_ROOT
        if (CURRENT_ROOT is not None):
            self.addDirichletNoiseToRoot(CURRENT_ROOT)

        root = SequentialMCTS.performSearch(self.gameFuncs, state,
                                            self.simulationsPerMove,
                                            self.playerID, CURRENT_ROOT)

        # Bug testing
        assert (Utils.compareState(root.state, state))
        assert (root.currentPlayer == self.playerID)
        assert (root.children[0].currentPlayer == -self.playerID)
        moves = GetPossibleActions.getPossibleActions(state)
        for c in root.children:
            assert (c.action in moves)

        # Create Training Labels from current Tree
        inState, policyLabel = LabelGenerator.generateLabels(root)
        self.addToLocalGameBuffer(inState, policyLabel)

        if (FullGame.CURRENT_ROUND >= Hyperparameters.POLICY_THRESHOLD):
            selectedMove = np.argmax(policyLabel)
        else:
            selectedMove = np.random.choice(np.arange(0, len(policyLabel)),
                                            p=policyLabel)

        setNewCurrentRoot(root, selectedMove)
        return selectedMove
    def testMirrorState(self):
        AMOUNT_OF_TESTS_PER_CASE = 10
        for case in testCasesRawEvaluate.TEST_CASES:
            board = np.array(case[0])
            for p in [-1, 1]:
                convState = Utils.state2ConvState(board, p)
                convStates = [convState for i in range(AMOUNT_OF_TESTS_PER_CASE)]
                randomPolices = [np.random.random(7) for i in range(AMOUNT_OF_TESTS_PER_CASE)]

                mirrorStates, mirrorPolices = Utils.createMirroredStateAndPolicy(convStates, randomPolices)
                reMirrorStates, reMirrorPolices = Utils.createMirroredStateAndPolicy(mirrorStates, mirrorPolices)

                for i in range(len(randomPolices)):
                    assert np.array_equal(randomPolices[i], reMirrorPolices[i])

                for m in reMirrorStates:
                    assert np.array_equal(convState, m)
Exemplo n.º 4
0
def getBestActionFromNode(node, game):
    if (node.visits == 1):
        legalMoves = game.getPossibleActions(node.state, node.currentPlayer)
        return np.argmax(
            Utils.createNormalizedLegalPolicy(node.policyEstimatedFromThisNode,
                                              legalMoves))
    action = max(node.children, key=lambda x: x.visits).action

    return action
def simulatePostEvaluate(game, node, path, evalScore, policy):
    Funcs._expandNode(game, node, policy)

    # Only extract policy for legal moves, since it comes from a softmax layer there should be no risk of the legal
    #  policy summing to zero.
    legalMoves = game.getPossibleActions(node.state, 1338)
    if (len(legalMoves) != 7):
        legalPolicy = Utils.createNormalizedLegalPolicy(policy, legalMoves)
        node.policyEstimatedFromThisNode = legalPolicy / np.sum(
            legalPolicy)  # Set & Normalize
    else:
        node.policyEstimatedFromThisNode = policy

    node.score = evalScore
    Funcs._backprop(path, evalScore)
Exemplo n.º 6
0
def simulateMCTS(game, root, iteration):
    node = root
    path = []

    while (node.expanded and node.terminal == False):
        node.visits += 1
        path.append(node)
        # node = node.children[np.argmax(
        # [MainFunctions._UCB2(c, iteration, node.currentPlayer) for c in node.children])]
        node = node.children[np.argmax(MainFunctions._PUCT(node))]

    if (node.terminal):
        # Should we perhaps re-consider this ? We could in theory skip adding visits and score to terminal nodes
        # and only return the original score, for clarity and computation
        path.append(node)
        MainFunctions._backprop(path, node.score / node.visits)
        node.visits += 1
        return

    # We have not yet been to this node and should evaluate
    node.visits += 1
    node.terminal, terminalScore = game.evaluateTerminal(node.state)

    if (node.terminal == False):
        evalScore, policy = game.evaluateState(node.state, node.currentPlayer)

        # Only set policy to legal moves
        legalMoves = game.getPossibleActions(node.state, 1)
        '''
        legalMovesOld = gameOld.getPossibleActions(node.state.toNumpyState(), 1)
        for i in range(len(legalMoves)):
            assert(legalMoves[i] == legalMovesOld[i])
        '''

        if (len(legalMoves) != 7):
            legalPolicy = Utils.createNormalizedLegalPolicy(policy, legalMoves)
            node.policyEstimatedFromThisNode = legalPolicy / np.sum(
                legalPolicy)  # Set & Normalize
        else:
            node.policyEstimatedFromThisNode = policy

        MainFunctions._expandNode(game, node, policy)
    else:
        evalScore = terminalScore

    node.score = evalScore
    MainFunctions._backprop(path, evalScore)
def computePredictionTable(model):
    t1 = time.time()
    if (len(MemoryBuffers.STATES_VISITED) == 0):
        return {}

    distinctStates = _countDistinctStates(MemoryBuffers.STATES_VISITED)
    distinctStates = [distinctStates[k][0]
                      for k in distinctStates.keys()]  # List-form
    distinctStates.extend([d.mirror()
                           for d in distinctStates])  # Add mirrored states

    convStates = [Utils.bitBoard2ConvState(d) for d in distinctStates]
    predictions = model.predict(np.array(convStates))
    preComputeTable = {}
    for i in range(len(distinctStates)):
        preComputeTable.update(
            {distinctStates[i]: (predictions[0][i][0], predictions[1][i])})

    print("Pre computation complete: {} states,  {}".format(
        len(distinctStates),
        time.time() - t1))
    return preComputeTable
Exemplo n.º 8
0
def generateLabels(root):
    # validateTree(root)
    # return Utils.state2ConvState(root.state, root.currentPlayer), _createPolicyLabel(root)
    return Utils.bitBoard2ConvState(root.state), _createPolicyLabel(root)
Exemplo n.º 9
0
def generateQLabels(root):
    # return Utils.state2ConvState(root.state, root.currentPlayer), _createValueLabels(root), _createPolicyLabel(root)
    return Utils.bitBoard2ConvState(
        root.state), _createValueLabels(root), _createPolicyLabel(root)
Exemplo n.º 10
0
 def toConvState(self, currentPlayer):
     return Utils.state2ConvState(self.toNumpyState(), currentPlayer)
Exemplo n.º 11
0
def simulateAndAssert(bitBoard, action, numpyState):
    player = Utils.getCurrentPlayerFromState(numpyState)
    simBoard = bitMaps.simulateAction(bitBoard, player, action)
    numpySimBoard = SimulateAction.simulateAction(numpyState, player, action)
    assert np.array_equal(simBoard.toNumpyState(), numpySimBoard)
    return simBoard