def act(self, state):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)

        # return a simulated or random move
        if np.random.random() < self.epsilon:
            # run a MCTS move if at min epsilon
            if self.epsilon == self.epsilon_min:
                # create a new node based on the board
                test_node = Node(self.board, 1, 9, None)

                # expand the tree while I have time
                time_start = time.time()
                expand_node(test_node, time_start, self.max_time,
                            self.max_sims)

                action = test_node.get_best_move()
            # otherwise return a random move
            else:
                action = random.getrandbits(2)
                while not self.board.is_valid_move(action):
                    action = (action + 1) % 4

        else:
            output_array = self.model.predict(state)[0]
            action = np.argmax(output_array)
            # if the action is invalid choose the next best
            while not self.board.is_valid_move(action):
                output_array[action] = -999999999999999
                action = np.argmax(output_array)

        return action
示例#2
0
    def mcts(self,
             board,
             player,
             iteration_count=100,
             mixing_hyperparameter=0.5):
        root = Node((-1, -1), 1, player, board)
        for it in range(0, iteration_count):
            history = set()
            leaf = root.traverse_to_leaf(history)
            leaf.expand(self)
            leaf_values = self.evaluate_turn_value(leaf.board, leaf.player)
            player_values = {1: leaf_values[1], -1: leaf_values[2]}
            rollout_result = leaf.play_rollout(self)
            for node in history:
                node.visit_count = node.visit_count + 1
                node.value_sum = node.value_sum + (
                    player_values[node.player] * mixing_hyperparameter +
                    rollout_result * node.player * (1 - mixing_hyperparameter))

        move = None
        max_visit_count = 0
        for child_node in root.children:
            if child_node.visit_count > max_visit_count:
                max_visit_count = child_node.visit_count
                move = child_node.move
        return move
示例#3
0
    def get_NextLegalCommandNode(self, bruteForce=False):
        # legal Command를 가진 Node만 return
        argmaxOfSoftmax = self.currentNode.get_argmaxOfSoftmax()
        array4096 = self.currentNode.get_array4096()
        color = self.currentNode.get_Color()
        numOfLegalMoves = self.board_stack.get_ChessBoard().legal_moves.count()
        numOfChild = self.currentNode.get_LengthOfChild()
        finalIndex = self.currentNode.get_FinalChildIndex()

        # 언제 정지시켜야하는지 조건을 확인해야 한다.
        if bruteForce:
            repeatNum = 4096
        else:
            repeatNum = numOfLegalMoves - numOfChild

        for i in range(repeatNum):
            index = argmaxOfSoftmax[(finalIndex + 1 + i) % 4096]
            command = self.ohe.indexToMove4096(index)
            tmpCommand = chess.Move.from_uci(command)

            if self.thresholdOfPolicyNetwork > array4096[index] and not bruteForce:
                # 정책망의 기준값 보다 작다면 반환하지 않는다.
                break
            if (tmpCommand in self.board_stack.get_ChessBoard().legal_moves) and not (
            self.currentNode.is_SameCommandInChild(command)):
                return index, Node.Node(self.currentNode, command, array4096[index], color)
            else:
                tmpCommand = chess.Move.from_uci(command + "q")
                if (tmpCommand in self.board_stack.get_ChessBoard().legal_moves) and not (
                self.currentNode.is_SameCommandInChild(command)):
                    command = command + "q"
                    return index, Node.Node(self.currentNode, command, array4096[index], color)
        # can't make child anymore
        return None, None
示例#4
0
 def build_mcts(self, state):
     """"""
     lg.logger_player.info("BUILDING MCTS")
     if self.mcts is None:
         if self.turn == 1:
             self.mcts = self.load_history(state)
             if self.color == -1:
                 self.mcts.new_root(Node(state))
                 self.mcts.swap_values()
         if self.mcts is None:  # may still be None if state does not exist in history
             self.mcts = MCTS(self.color, Node(state), self.c_puct)
         win_action = None
     else:
         win_action = self.mcts.new_root(Node(state))
     return win_action
示例#5
0
    def generate_game(self, model: Polvalnet_fc):
        np.random.seed()
        triplets = []
        step_game = 0
        temperature = 1
        game_over = False
        moves = 0
        env = oz_env()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        while not game_over:
            moves += 1
            step_game += 1
            if step_game == 50:
                temperature = 10e-6

            start = time.time()
            pi, successor, root_node = MCTS(temp=temperature,
                                            network=model,
                                            root=root_node)
            #print("Calculated next move in {}ms".format(time.time() - start))
            feature = root_node.env.board
            triplets.append([feature, pi])
            #print('')
            #print(root_node.env.board)
            #print("Running on {} ".format(mp.current_process()))
            root_node = successor
            game_over = root_node.env.is_game_over()

        z = root_node.env.who_won()
        for i in range(len(triplets) - step_game, len(triplets)):
            triplets[i].append(z)

        return triplets
示例#6
0
    def get_BestQuNode_Before(self):
        #Qu는 가장 Q(s,a) + u(s,a) 의 값
        #자식 노드의 수 다음 배열에서 Node를 만들어서
        argmaxOfSoftmax = self.currentNode.get_argmaxOfSoftmax()
        array4096 = self.currentNode.get_array4096()
        index = argmaxOfSoftmax[self.currentNode.get_LengthOfChild()]
        command = self.ohe.indexToMove4096(index)
        color = not self.currentNode.get_Color()

        newNode = Node.Node(self.currentNode, command, array4096[index], color)

        childList = self.currentNode.get_Child()
        if len(childList) == 0: #자식이 없는 경우
            return newNode
        maxQuNode = newNode

        for node in childList:
            if node.get_Qu() > maxQuNode.get_Qu():
                maxQuNode = node

        ####is_child만으로는 중복 생성되는 노드를 막을 수 없음. 수정 필요
        if self.currentNode.is_child(maxQuNode):
            #자식인경우 새로 생성된 newNode는 사용되지 않았으므로
            #소멸
            del newNode

        return maxQuNode
示例#7
0
    def decide_move(self, board, verbose=False, total_moves=None):
        """
        Given current board, return a move to play.
        :type board: Class Board
        :rtype A list of 2 tuples, specifying the move's FROM and TO.
        """
        if verbose:
            board.visualise(cur_player = self.player_num)
            print('Facing the board above, Ai Version {} is thinking.'.format(self.model.version))

        node = Node(board, self.player_num)

        # Play deterministically when moves reach a certain number
        if total_moves is not None and total_moves > TOTAL_MOVES_TILL_TAU0:
            if self.tree_tau != DET_TREE_TAU:
                print('Player {}: changing tree tau from {} to {}'.format(self.player_num, self.tree_tau, DET_TREE_TAU))
            self.tree_tau = DET_TREE_TAU

        tree = MCTS(node, self.model, tree_tau=self.tree_tau)
        pi, sampled_edge = tree.search()

        if verbose:
            human_fromPos = board_utils.np_index_to_human_coord(sampled_edge.fromPos)
            human_toPos = board_utils.np_index_to_human_coord(sampled_edge.toPos)
            print('Ai Version {} moved from {} to {}\n'.format(
                self.model.version, human_fromPos, human_toPos))

        return sampled_edge.fromPos, sampled_edge.toPos
示例#8
0
 def getColumn(self, board):
     # t0 = time.time()
     depth = 0 if self.plays_first else 1
     node = Node(board=board, depth=depth)
     Agent().train_mcts_ntimes(node, 10, verbose=True)
     best_move = argmax([child.wins for child in node.children])
     print("#" * 50)
     # warning(time.time() - t0)
     return best_move
示例#9
0
def main():
    network = load_model(args.newnetwork)
    score_net = 0
    score_random = 0

    for game in range(args.numgames):
        moves = 0
        temperature = 10e-6
        env = oz_env()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False
     #   print(root_node.env.board[71])
        while not game_over:
            start = time.time()
            if root_node.env.board[71] == -1:
                #print("am here")
                pi, successor, root_node = MCTS(temp=temperature, network= network, root=root_node)
                root_node = successor
            else:
                if (root_node.children == None):
                    root_node.children = [None]*len(root_node.legal_moves)

                move = np.random.randint(0,(len(root_node.legal_moves)))
                if (root_node.children[move] is None):
                    next_env = deepcopy(root_node.env)
                    next_env.step(root_node.legal_moves[move])

                    root_node.children[move] = Node(next_env,temperature,parent=root_node,child_id=move)
                root_node = root_node.children[move]
            moves = moves + 1

            game_over = root_node.env.is_game_over()
            z = root_node.env.who_won()

        if z <= -1:
            score_net += 1
        else:
            score_random += 1

        print("Game {} complete. Net: {} Random: {}".format(game, score_net, score_random))

    print("New network score total wins: {} Average Score: {}".format(score_net, score_net / args.numgames))
    print("Random play score total wins: {} Average Score: {}".format(score_random, score_random / args.numgames))
示例#10
0
def selfPlay():
    env = QE()
    env.reset()
    tree = Tree(Node(env))
    while (tree.rootNode.state.winner == None):
        print('Here')
        for i in range(10):
            tree.search()

        pi = tree.play()
    print(tree.rootNode.state.winner)
示例#11
0
 def action(self, board):
     """
     Performs an action
     """
     states = board.all_possible_next_states(board.Player_turn())
     nodes = [Node(i) for i in states]
     values = []
     converted_state = self.convert_nodes_to_input(nodes)
     for state in converted_state:
         values.append(self.nn.forward(state).item())
     if board.Player_turn == "A":
         return states[np.argmax(values)]
     else:
         return states[np.argmin(values)]
示例#12
0
    def evaluateLeaf(self, leaf, value):
        if value == 0:
            value, probs, allowedActions = self.getPredictions(leaf.state)
            probs = probs[allowedActions]

            for idx, action in enumerate(allowedActions):
                newState = leaf.state.takeAction(action)
                if newState.toString() not in self.mcts.tree:
                    node = Node(newState)
                    self.mcts.addNode(node)
                else:
                    node = self.mcts.tree[newState.toString()]
                
                newEdge = Edge(leaf, node, probs[idx], action)
                leaf.edges.append(newEdge)
        return value
示例#13
0
def run(numThreads, numSimulations, modelName):
    #model = load_model('./models/' + modelName + '.h5', custom_objects={'softmax_cross_entropy_with_logits': softmax_cross_entropy_with_logits})

    env = QE()
    env.reset()
    tree = Tree(Node(env))

    gameStates = []
    players = []
    pis = []
    envs = []

    testBool = True
    #while tree.rootNode.state.winner == None:
    while testBool:
        testBool = False
        gameStates.append(tree.rootNode.state.gameState)
        players.append(
            1 *
            (tree.rootNode.state.playerA == tree.rootNode.state.currPlayer) +
            -1 *
            (tree.rootNode.state.playerB == tree.rootNode.state.currPlayer))
        envs.append(tree.rootNode.state)

        pi = tree.play()
        pis.append(pi)

    winner = tree.rootNode.state.winner
    for i in range(len(players)):
        gameState = gameStates[i]
        pi = pis[i]
        player = players[i]
        env = envs[i]
        if winner == 0:
            value = 0
        elif winner == player:
            value = 1
        elif winner == -player:
            value = -1
        else:
            raise Exception("Unrecognized Winner")
        savedState = SavedState(gameState, pi, value, env)
        savedPath = "./positions/" + modelName + "-" + datetime.now().strftime(
            "%d-%b-%Y-%H-%M-%S-%f")
        writeSavedState(savedState, savedPath)

    return winner
def make_random_move(root):
    '''
    Independent on MCTS.
    Instead sample a random move from current board's valid moves.
    '''
    random.seed()

    cur_state = root.state
    player = root.currPlayer

    valid_actions = cur_state.get_valid_moves(
        player)  # dict, key: checker pos, value: possible dest from pos

    random_start = random.choice(list(valid_actions.keys()))
    while len(valid_actions[random_start]) == 0:
        random_start = random.choice(list(valid_actions.keys()))
    random_end = random.choice(valid_actions[random_start])

    next_state = copy.deepcopy(cur_state)
    next_state.place(player, random_start, random_end)
    new_player = PLAYER_ONE + PLAYER_TWO - player

    return Node(next_state, new_player)
示例#15
0
    def run_tournament(self, candidate, candidate_alpha_scores,
                       incumbent_alpha_scores, _):
        moves = 0
        temperature = 10e-6

        p = np.random.binomial(1, 0.5) == 1
        white, black = (self.current_policy,
                        candidate) if p else (candidate, self.current_policy)
        env = oz_env()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False

        while not game_over:
            if root_node.env.white_to_move:
                player = white
            else:
                player = black

            pi, successor, root_node = MCTS(temp=temperature,
                                            network=player,
                                            root=root_node)
            root_node = successor
            moves += 1
            game_over = root_node.env.is_game_over()
        z = root_node.env.who_won()

        # from white perspective

        if white == candidate:
            candidate_alpha_scores.append(+z)
            incumbent_alpha_scores.append(-z)
            print("Candidate won!")
        else:
            candidate_alpha_scores.append(-z)
            incumbent_alpha_scores.append(+z)
            print("Incumbent won!")
示例#16
0
 def buildMCTS(self, state):
     self.root = Node(state)
     self.mcts = MCTS(Node(state))
示例#17
0
 def set_RootNode(self):
     self.root_Node = Node.Node(None,None,self.board_stack.get_Color()) # 루트 노드 생성
     self.currentNode = self.root_Node #루트노드가 생성될 때 currentNode로 설정
示例#18
0
from QuorridorEnvironment import QuorridorEnvironment as QE
from MCTS import Tree, Node
import Thread
import time
start = time.time()
env = QE()
env.reset()
rootNode = Node(env)
Thread.search(8, 32, rootNode)
end = time.time()
print('Total Time: ' + str(end - start))
示例#19
0
 def initialize_mcts(self):
     self.state.PlayerA.place_workers(self.state)
     self.state.PlayerB.place_workers(self.state)
     root = Node(self.state)
     self.mcts = MCTS(root, self.nn, self.args)
def selfplay(model1, model2=None, randomised=False):
    '''
    Generate an agent self-play given two models
    TODO: if `randomised`, randomise starting board state
    '''
    if model2 is None:
        model2 = model1

    player_progresses = [0, 0]
    player_turn = 0
    num_useless_moves = 0
    play_history = []
    tree_tau = TREE_TAU

    board = Board(randomised=randomised)
    root = Node(board, PLAYER_ONE)  # initial game state
    use_model1 = True

    while True:
        model = model1 if use_model1 else model2

        if len(root.state.hist_moves) < INITIAL_RANDOM_MOVES:
            root = make_random_move(root)
        else:
            # Use Current model to make a move
            root = make_move(root, model, tree_tau, play_history)

        assert root.isLeaf()

        hist_moves = root.state.hist_moves
        cur_player_hist_moves = [
            hist_moves[i] for i in range(len(hist_moves) - 1, -1, -2)
        ]
        history_dests = set([move[1] for move in cur_player_hist_moves])

        # If limited destinations exist in the past moves, then there is some kind of repetition
        if len(cur_player_hist_moves) * 2 >= TOTAL_HIST_MOVES and len(
                history_dests) <= UNIQUE_DEST_LIMIT:
            print('Repetition detected: stopping and discarding game')
            return None, None

        # Evaluate player progress for stopping
        progress_evaluated = root.state.player_progress(player_turn + 1)
        if progress_evaluated > player_progresses[player_turn]:
            num_useless_moves = int(num_useless_moves * (NUM_CHECKERS - 1) /
                                    NUM_CHECKERS)
            player_progresses[player_turn] = progress_evaluated
        else:
            num_useless_moves += 1

        # Change player
        player_turn = 1 - player_turn
        use_model1 = not use_model1

        # Change TREE_TAU to very small if game has certain progress so actions are deterministic
        if len(play_history) + INITIAL_RANDOM_MOVES > TOTAL_MOVES_TILL_TAU0:
            if tree_tau == TREE_TAU:
                print(
                    'selfplay: Changing tree_tau to {} as total number of moves is now {}'
                    .format(DET_TREE_TAU, len(play_history)))
            tree_tau = DET_TREE_TAU

        if root.state.check_win():
            print('END GAME REACHED')
            break

        # Stop (and discard) the game if it's nonsense
        if num_useless_moves >= PROGRESS_MOVE_LIMIT:
            print(
                'Game stopped by reaching progress move limit; Game Discarded')
            return None, None

    if randomised:
        # Discard the first `BOARD_HIST_MOVES` as the history is not enough
        return play_history[BOARD_HIST_MOVES:], utils.get_p1_winloss_reward(
            root.state)
    else:
        return play_history, utils.get_p1_winloss_reward(root.state)
示例#21
0
    def get_move(self, game_state, det, sims):
        # if only one move is available, this one is chosen
        allowed_actions = valid_actions(game_state.array)
        if len(allowed_actions) == 1:
            return allowed_actions[0], None

        # the given game state is set as root of the tree
        if self.mcts is None or game_state.id not in self.mcts.tree:
            self.mcts = MCTS(Node(game_state))
        else:
            self.mcts.root = self.mcts.tree[game_state.id]

        # simulate a number of games starting from the current game state to fill the Monte Carlo Search Tree
        for i in range(sims):
            leaf, chosen_path, new_game_state = self.mcts.simulate_game()

            # checking if the game finished after the simulation or if the end of the tree was reached
            if new_game_state is None or check_for_winner(
                    new_game_state.array) is None:
                # if the game is not finished the model is used to evaluate the game state
                value, probabilities, allowed_actions = self.get_predictions(
                    leaf.game_state)
                # the model also provides a probability distribution of the best move to take in this situation
                probabilities = probabilities[allowed_actions]

                # new edges and nodes are created at the leaf to expand the tree
                for idx, action in enumerate(allowed_actions):
                    new_game_state = leaf.game_state.take_action(action)
                    if new_game_state.id not in self.mcts.tree:
                        node = Node(new_game_state)
                        self.mcts.add_node(node)
                    else:
                        node = self.mcts.tree[new_game_state.id]
                    new_edge = Edge(leaf, node, probabilities[idx], action)
                    leaf.edges.append((action, new_edge))
            else:
                # if the game is finished, the model is not needed because the value of the game state is the result
                value = -1
                if check_for_winner(new_game_state.array) == 0:
                    value = 0

            # after the value of the game state is calculated, the chosen path of the Search Tree is updated
            self.mcts.back_propagation(leaf, chosen_path, value)

        q = np.zeros(42, dtype=np.float32)
        n = np.zeros(42, dtype=np.integer)
        # choosing the best move after the simulations
        for action, edge in self.mcts.root.edges:
            q[action] = edge.Q
            n[action] = edge.N
        n = n / (np.sum(n) * 1.0)
        # the values are normalized into a scale of 0 to 1
        allowed_actions = valid_actions(game_state.array)
        normalized = np.zeros(42, dtype=np.float64)
        for index in allowed_actions:
            normalized[index] = (q[index] - min(q)) / (max(q) - min(q))
        normalized = normalized / np.sum(normalized)
        # the selection can rarely lead to an error because of a prior rounding error
        try:
            # either the best move is chosen or a random one depending on whether the deterministic flag is set.
            if det:
                # one of the moves with the highest value is chosen
                actions = np.argwhere(normalized == max(normalized))
                action = random.choice(actions)[0]
            else:
                # semi-randomly selecting a move - the higher the value the more likely it is chosen
                normalized[allowed_actions[-1]] = normalized[
                    allowed_actions[-1]] + (1 - np.sum(normalized))
                action_idx = np.random.multinomial(1, normalized)
                action = np.where(action_idx == 1)[0][0]
        except (ValueError, IndexError):
            # if the error occurs, simply a random allowed move is chosen instead
            action = random.choice(allowed_actions)
        return action, n
示例#22
0
def main():
    # n, num_games, verbose, starting_player, max_rollouts = setup_game()
    n, num_games, verbose, starting_player, max_rollouts = 5, 200, False, 1, 0.5
    results = []
    game_num = 1
    viewer = None

    run_tournament = True
    with_training = True
    num_games_tournament = 25
    if run_tournament:
        save_path = "short_topp"
    else:
        save_path = "long_topp"

    ##### CONFIG #####

    buffer_size = 40
    train_interval = 40
    saving_interval = 10
    moves_done = 0
    epochs = 300

    ##################

    buffer = ReplayBuffer(vfrac=0.1, tfrac=0.1, size=buffer_size)
    anet = init_anet(n, buffer)

    if with_training:
        anet.save_to_file(save_path + "/model_step_{0}.h5".format(0))
    game = Hex(n, starting_player)
    ROOT_NODE = Node(game=game)
    while with_training and num_games >= game_num:
        game = Hex(n, starting_player)
        next_root = ROOT_NODE
        # viewer = Board(game)
        print("Game number {}".format(game_num))
        while game.get_moves():
            mc = MonteCarlo(game, max_rollouts, next_root)
            mc.run(lambda _input: ANET.predict(_input, model=anet.model))
            case = mc.get_training_case()
            buffer.push(case)
            next_root = mc.get_best_move()
            game.do_move(next_root.move)
            moves_done += 1

            if viewer:
                viewer.do_move(next_root.move, game.player)
            if moves_done % train_interval == 0:
                buffer.update()
                anet.train_model(epochs)
                anet.run_against_random(num_games=50, game_num=game_num)
        if saving_interval > 0 and game_num % saving_interval == 0:
            anet.save_to_file(save_path +
                              "/model_step_{0}.h5".format(game_num))
            buffer.size += 20
            # train_interval += 5
            # anet.optimizer.lr /= 2
        if game.get_result(game.player) == 1:
            results.append(game.player)
        game_num += 1

    if viewer:
        viewer.persist()

    if run_tournament:
        tournament = Tournament(num_games_tournament)
        tournament.run_tournament(save_path)

    else:
        anet.save_to_file("best_topp/model_2.h5")
示例#23
0
def AI_vs_AI():
    flag = True
    while flag:
        playerLetter = random.choice(('X', 'O'))
        computerLetter = 'O' if playerLetter == 'X' else 'X'
        turn = whoGoesFirst()
        theBoard = [' '] * 10

        mcts = MCTS(2, playrandom, get_possible_next_states)

        first_letter = playerLetter if turn == 'player' else computerLetter

        for player in mcts.player_list:
            if player.nr == 0:
                player.id = first_letter
            else:
                player.id = playerLetter if first_letter == computerLetter else computerLetter

        mcts.root = Node(State(True, theBoard), mcts.player_list[0])

        gameIsPlaying = True

        while gameIsPlaying:
            if turn == 'player':
                print('\n')

                drawBoard(theBoard)
                print('\n')
                mcts.root = mcts.find_next_move()
                # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

                # make the move that was choosen by the mcts-algorithm
                for i, entry in enumerate(theBoard):
                    if entry != mcts.root.state.board[i]:
                        makeMove(theBoard, playerLetter, i)
                        break

                if isWinner(theBoard, playerLetter):
                    drawBoard(theBoard)
                    print(playerLetter, ' won the game!')
                    gameIsPlaying = False
                else:
                    if isBoardFull(theBoard):
                        drawBoard(theBoard)
                        print('The game is a tie!')
                        break
                    else:
                        turn = 'computer'
                        input()

            else:
                print('\n')
                #print('\n')
                drawBoard(theBoard)
                print('\n')

                mcts.root = mcts.find_next_move()
                # choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

                # make the move that was choosen by the mcts-algorithm
                for i, entry in enumerate(theBoard):
                    if entry != mcts.root.state.board[i]:
                        makeMove(theBoard, computerLetter, i)
                        break

                if isWinner(theBoard, computerLetter):
                    drawBoard(theBoard)
                    print(computerLetter, ' won the game!')
                    gameIsPlaying = False
                else:
                    if isBoardFull(theBoard):
                        drawBoard(theBoard)
                        print('The game is a tie!')
                        break
                    else:
                        turn = 'player'
                        input()

        cont = input('another game?\n')
        if cont not in ['y', 'yes', 'ye']:
            flag = False
示例#24
0
def normal_game():
    print("\nWelcome to MonteCarlo-TicTacToe")
    playerLetter, computerLetter = inputPlayerLetter()
    turn = whoGoesFirst()
    theBoard = [' '] * 10

    mcts = MCTS(2, playrandom, get_possible_next_states)

    first_letter = playerLetter if turn == 'player' else computerLetter

    for player in mcts.player_list:
        if player.nr == 0:
            player.id = first_letter
        else:
            player.id = playerLetter if first_letter == computerLetter else computerLetter

    mcts.root = Node(State(True, theBoard), mcts.player_list[0])

    gameIsPlaying = True

    while gameIsPlaying:
        if turn == 'player':
            print('\n')
            drawBoard(theBoard)
            move = getPlayerMove(theBoard)
            makeMove(theBoard, playerLetter, move)

            status = True  # da falls das nicht der fall ist in der folgenden Auswertung sowieso das Spiel endet

            next_state = State(status, theBoard)
            mcts.update_root(next_state)

            if isWinner(theBoard, playerLetter):
                drawBoard(theBoard)
                print('You have won the game!')
                gameIsPlaying = False
            else:
                if isBoardFull(theBoard):
                    drawBoard(theBoard)
                    print('The game is a tie!')
                    break
                else:
                    turn = 'computer'

        else:
            print('\n')
            print('\n')
            drawBoard(theBoard)

            mcts.root = mcts.find_next_move()
            #choosen_next_state = mcts.find_next_move(tree, tree.root.state.infolist[0])

            # make the move that was choosen by the mcts-algorithm
            for i, entry in enumerate(theBoard):
                if entry != mcts.root.state.board[i]:
                    makeMove(theBoard, computerLetter, i)
                    break

            if isWinner(theBoard, computerLetter):
                drawBoard(theBoard)
                print('The computer has beaten you!')
                gameIsPlaying = False
            else:
                if isBoardFull(theBoard):
                    drawBoard(theBoard)
                    print('The game is a tie!')
                    break
                else:
                    turn = 'player'
示例#25
0
def main():
    #print("check-2")
    network = load_model(args.newnetwork)
    #print("am here")
    score_net = 0
    score_random = 0

    for game in range(args.numgames):
        moves = 0
        temperature = 10e-6
        white = 1
        black = None

        env = oz_env()
        env.reset()
        root_node = Node(env, Config.EXPLORE_FACTOR)
        game_over = False

        while not game_over:
            if root_node.env.board[71] == 1:
                player = white
            else:
                player = black
            #print(root_node.env.board)
            start = time.time()
            if player == white:
                #print(root_node.env.board)
                print("am here-1\n")
                pi, successor, root_node = MCTS(temp=temperature,
                                                network=network,
                                                root=root_node)
                #print(root_node.env.board)
                print("MCTS completed move {} in: {}".format(
                    moves,
                    time.time() - start))
                root_node = successor
            else:
                print("am here-2\n")
                if (root_node.children == None):
                    root_node.children = [None] * len(root_node.legal_moves)

                move = np.random.randint(0, (len(root_node.legal_moves)))
                if (root_node.children[move] is None):
                    #next_env = root_node.env.deepcopy()
                    next_env = deepcopy(root_node.env)
                    #next_env = root_node.env
                    next_env.step(root_node.legal_moves[move])

                    root_node.children[move] = Node(next_env,
                                                    temperature,
                                                    parent=root_node,
                                                    child_id=move)
                root_node = root_node.children[move]
            print(root_node.env.board)
            moves = moves + 1

            game_over = root_node.env.is_game_over()
            z = root_node.env.who_won()

        # from white perspective

        #if white == player:
        if z >= 1:
            score_net += 1
        else:
            score_random += 1
        #else:
        #    if z <= -1:
        #        score_net += 1
        #    else:
        #        score_random += 1

        print("Game {} complete. Net: {} Random: {}".format(
            game, score_net, score_random))

    print("New network score total wins: {} Average Score: {}".format(
        score_net, score_net / args.numgames))
    print("Random play score total wins: {} Average Score: {}".format(
        score_random, score_random / args.numgames))