Exemplo n.º 1
0
    def play(self, board):
        # display(board)
        valid = self.game.getValidMoves(board, 1)

        print('hp: possible moves: ', end="")
        for i in range(len(valid)):
            if valid[i]:
                print(i, end=' ')
        print('')

        while True:
            # Python 3.x
            a = input()
            # Python 2.x
            # a = raw_input()

            x = 0
            for y in a.split(' '):
                x = int(y)
            a = x if x != -1 else 2 * self.game.n
            if a >= 0 and a <= self.game.n and valid[a]:
                break
            else:
                print('hp: Invalid')

        select = a
        b = Board(6)
        b.pieces = np.copy(board)
        b.check_board(select, prefix="hp: ")

        return a
Exemplo n.º 2
0
    def play(self, board):
        valid = self.game.getValidMoves(board, 1)
        #print('op: possible moves: ', end="")
        #for i in range(len(valid)):
        #if valid[i]:
        #print(i, end=' ')
        #print('')

        b = Board(6)
        b.pieces = np.copy(board)
        scores = b.oracle_eval_board(prefix="op: ")
        best = -127
        best_i = 6
        next_best = best
        next_best_i = best_i
        for i in range(6):
            if scores[i] != 127:
                # pick best move, or choose between best ones;
                # allow for mistakes if so desired
                if scores[i] > best or (scores[i] == best and random() < 0.5):
                    next_best = best
                    next_best_i = best_i
                    best = scores[i]
                    best_i = i
        select = best_i
        if random() < self.mistake_fraction:
            # take other choice, if not absurd:
            if next_best != -127 and (best - next_best) <= self.mistake_max:
                select = next_best_i
                #print('op: select suboptimal move')
        #print('op: select ' + str(select))

        return select
Exemplo n.º 3
0
    def play(self, board):
        '''
        :param board: the current configuration of the board
        :return: if more actions have the same value, which is the best one, it returns randomly one action from these
        '''
        valids = self.game.getValidMoves(board, 1)
        candidates = []
        for a in range(self.game.getActionSize()):
            if valids[a] == 0:
                continue
            nextBoard, _ = self.game.getNextState(board, 1, a)
            score = self.game.getScore(nextBoard, 1)
            candidates += [(-score, a)]
        candidates.sort()
        list = []
        max = candidates[0][0]
        for i in range(len(candidates)):
            if candidates[i][0] == max:
                list.append(candidates[i][1])
        select = random.choice(list)

        b = Board(6)
        b.pieces = np.copy(board)
        b.check_board(select, prefix="gp: ")

        return select
Exemplo n.º 4
0
    def play(self, board):
        args1 = dotdict({'numMCTSSims': args.mcts, 'cpuct': args.cpuct})
        mcts1 = MCTS(self.game, self.n1, args1)
        actions = mcts1.getActionProb(board, temp=1)
        select = np.argmax(actions)
        #print('board: ', end="")
        #print(board)
        #print('action p-values: ' + str(actions))

        b = Board(6)
        b.pieces = np.copy(board)
        b.check_board(select, prefix="nn: ")

        return select
Exemplo n.º 5
0
    def play(self, board):
        '''
        :param board: the configuration of the board
        :return: the action from the tuple (action, score) where this action is the best action detected by alfa-beta
        '''

        score = self.minimax((board, -1), self.depth, 1, -infinity, +infinity)
        print("mp: minmax at depth " + str(self.depth))
        select = score[0]
        print("mp: select " + str(select))

        b = Board(6)
        b.pieces = np.copy(board)
        b.check_board(select, prefix="mp: ")

        return select
Exemplo n.º 6
0
    def play(self, board):
        select = np.random.randint(self.game.getActionSize())
        valid = self.game.getValidMoves(board, 1)
        print('rp: possible moves: ', end="")
        for i in range(len(valid)):
            if valid[i]:
                print(i, end=' ')
        print('')

        while valid[select] != 1:
            select = np.random.randint(self.game.getActionSize())
        print('rp: select ' + str(select))

        b = Board(6)
        b.pieces = np.copy(board)
        b.check_board(select, prefix="rp: ")
        return select
Exemplo n.º 7
0
    def executeEpisode(self):
        """
        This function executes one episode of self-play, starting with player 1.
        As the game is played, each turn is added as a training example to
        trainExamples. The game is played till the game ends. After the game
        ends, the outcome of the game is used to assign values to each example
        in trainExamples.

        It uses a temp=1 if episodeStep < tempThreshold, and thereafter
        uses temp=0.

        Returns:
            trainExamples: a list of examples of the form (canonicalBoard,pi,v)
                           pi is the MCTS informed policy vector, v is +1 if
                           the player eventually won the game, else -1.
        """
        trainExamples = []
        board = self.game.getInitBoard()
        self.curPlayer = 1
        episodeStep = 0

        moves = 0
        max_moves = self.mcts.MAX_TREE_DEPTH
        while True:
            episodeStep += 1
            canonicalBoard = self.game.getCanonicalForm(board, self.curPlayer)
            # TODO: look carefully into good settings for tempThreshold. Game dependent!
            # Don't want to be stuck in a loop at lower levels.
            temp = int(episodeStep < self.args.tempThreshold)

            pi = self.mcts.getActionProb(canonicalBoard, temp=temp)
            sym = self.game.getSymmetries(canonicalBoard, pi)
            for b, p in sym:
                trainExamples.append([b.tolist(), self.curPlayer, p, None])

            action = np.random.choice(len(pi), p=pi)
            prevboard = board
            prevplayer = self.curPlayer
            board, self.curPlayer = self.game.getNextState(
                board, self.curPlayer, action)

            r = self.game.getGameEnded(board, self.curPlayer)
            moves += 1
            if moves > max_moves:
                r = 1e-4

            if r != 0:
                # also add final move that finished the game
                if 1:  # r == 1 or r == -1 or r == 1e-4:
                    canonicalBoard = self.game.getCanonicalForm(
                        board, self.curPlayer)
                    b = Board(6)
                    b.pieces = np.copy(canonicalBoard)
                    # also add a (fake but legal) pi for the final move; but don't do MCTS for this
                    moves = b.get_legal_moves(self.curPlayer)
                    if len(moves) > 0:
                        # print('board: ' + str(b.pieces) + ' canonicalBoard: ' + str(canonicalBoard) + ' legal moves: ' + str(moves))
                        pi = [0, 0, 0, 0, 0, 0, 0]
                        for i in moves:
                            pi[i] = 1 / len(moves)
                    else:
                        pi = [0, 0, 0, 0, 0, 0, 1]
                    # trainExamples.append([canonicalBoard, self.curPlayer, pi, None])
                    trainExamples.append(
                        [canonicalBoard.tolist(), self.curPlayer, pi, None])
                return [(x[0], x[2], r * ((-1)**(x[1] != self.curPlayer)))
                        for x in trainExamples]