示例#1
0
def expectiDepthMax(board, Net, gamma, currDepth, finalDepth):
    if currDepth >= finalDepth:
        maxScoreSoFar = 0
        movesToDo = nn2048helper.checkMoves(board)
        chosenMove = None
        for candMove in movesToDo:
            candBoard, candRewToNet, candReward = nn2048helper.makeMove(
                board, candMove)
            candBoardTensor = nn2048helper.makeTensor(candBoard)
            candScore = getScore(candBoard, Net, candBoardTensor)
            if candRewToNet + gamma * candScore > maxScoreSoFar:
                maxScoreSoFar = candRewToNet + gamma * candScore
                chosenMove = candMove
        return maxScoreSoFar, chosenMove
    else:
        maxScoreSoFar = 0
        movesToDo = nn2048helper.checkMoves(board)
        chosenMove = None
        for candMove in movesToDo:
            candBoard, candRewToNet, candReward = nn2048helper.makeMove(
                board, candMove)
            candScore = 0
            candNextBoards = nn2048helper.addAllPossibleNums(candBoard)
            for candNextBoard in candNextBoards:
                nonZeroCt = np.count_nonzero(candNextBoard[0])
                candScore += gamma * expectiDepthMax(
                    candNextBoard[0], Net, gamma, currDepth + 1,
                    depthsPolicy[16 - nonZeroCt])[0] * candNextBoard[1]
            candScore *= 2 / len(candNextBoards)
            if candRewToNet + gamma * candScore > maxScoreSoFar:
                maxScoreSoFar = candRewToNet + gamma * candScore
                chosenMove = candMove
        return maxScoreSoFar, chosenMove
示例#2
0
def getScore(board, Net=None, boardTensor = None):
    if len(nn2048helper.checkMoves(board)) == 0:
        return 0
    if Net is not None:
        if boardTensor is not None:
            score = Net.forward(boardTensor.view(-1, 18, 4, 4)).data.tolist()[0][0]
            return score
        score = Net.forward(nn2048helper.makeTensor(board).view(-1, 18, 4, 4)).data.tolist()[0][0]
        return score
    return 0
示例#3
0
def makeMoveUnderPolicy(board, Net, gamma, eps):
    movesToDo = nn2048helper.checkMoves(board)
    if len(movesToDo) == 0:
        return None, 0, 0
    if random.random() <= eps:
        move = random.choice(movesToDo)
        newBoard, rewToNet, reward = nn2048helper.makeMove(board, move)
        return newBoard, rewToNet, reward
    else:
        maxScoreSoFar = float('-inf')
        for candMove in movesToDo:
            candBoard, candRewToNet, candReward = nn2048helper.makeMove(board, candMove)
            candBoardTensor = nn2048helper.makeTensor(candBoard)
            candScore = getScore(candBoard, Net, candBoardTensor)
            if candRewToNet + gamma*candScore > maxScoreSoFar:
                maxScoreSoFar = candRewToNet + gamma*candScore
                chosenBoard = candBoard.copy()
                chosenRewToNet = candRewToNet
                chosenReward = candReward
        return chosenBoard, chosenRewToNet, chosenReward
示例#4
0
 def run(self):
     overallScore = 0
     if self.epNum <= 50:
         board = nn2048helper.initBoard()
     else:
         if random.random() <= 0.3:
             board = nn2048helper.initBoard()
         else:
             board = nn2048helper.randomBoard(random.randint(2, 12))
     board, rewToNet, reward = makeMoveUnderPolicy(board, self.Net, self.gamma, self.eps)
     overallScore += reward
     currMax = 0
     while board is not None:
         currMax = max(currMax, np.max(board))
         boardNewGain = newGain(board, self.Net, self.gamma)
         boardTensor = nn2048helper.makeTensor(board)
         currScore = getScore(board, self.Net, boardTensor)
         self.data.append([boardTensor, currScore*(1-self.alpha) + self.alpha*boardNewGain])
         board = nn2048helper.addNum(board)
         board, rewToNet, reward = makeMoveUnderPolicy(board, self.Net, self.gamma, self.eps)
         overallScore += reward
     print('Score: ', overallScore)