示例#1
0
    def duel(self, opponent, first=1):
        '''Play a full game against an opponent AI.'''

        if first == -1:
            e0, e1 = opponent, self.estimator
        else:
            e0, e1 = self.estimator, opponent

        s0 = MCTS(e0, maxiter=self.mcts_iters)
        s1 = MCTS(e1, maxiter=self.mcts_iters)

        while not s0.state.over:

            a = State.domain[np.argmax(s0.search())]

            s0.apply(a)
            s1.apply(a)

            if s0.state.over:
                break

            a = State.domain[np.argmax(s1.search())]

            s1.apply(a)
            s0.apply(a)

        return s0.state.winner
示例#2
0
    def learn(self):
        for i in range(self.config.num_iters):
            self_play = SelfPlay(self.game, self.model)
            examples = self_play.generate_play_data()
            for _ in range(self.config.num_episodes):
                examples += self_play.generate_play_data()
            examples = self.examples_to_array(examples)
            examples = self.shuffle_examples(examples)

            # Step 1. Keep a copy of the current model
            self.model.save_checkpoint(filename='temp.pth.tar')
            self.prev_model.load_checkpoint(filename='temp.pth.tar')

            # Step 2. Training the model
            prev_mcts = MCTS(self.game, self.prev_model, self.config.c_puct, self.config.num_sims)
            self.model.train(examples)
            new_mcts = MCTS(self.game, self.model, self.config.c_puct, self.config.num_sims)

            # Step 3. Evaluate the model
            print 'PITTING AGAINST PREVIOUS VERSION'
            arena = Arena(self.game, new_mcts, prev_mcts)
            # Player 1 is the optimized player
            player1_win, player2_win, draw = arena.play_matches(self.config.arena_games)
            print 'NEW MODEL/PREV MODEL WINS : %d / %d ; DRAWS : %d' % (player1_win, player2_win, draw)

            if ((player1_win * 1.0) / self.config.arena_games) > self.config.arena_threshold:
                print 'ACCEPTING NEW MODEL'
                self.model.save_checkpoint(filename=self.getCheckpointFile(i))
                self.model.save_checkpoint(filename='best.pth.tar')
            else:
                print 'REJECTING NEW MODEL'
                self.model.load_checkpoint(filename='temp.pth.tar')
示例#3
0
def main(args):
    if args.player1 == "human":
        agent1 = Human(1, surface)
    elif args.player1 == "minimax":
        agent1 = Minimax(1, args.minimax_depth[0], args.variant)
    elif args.player1 == "mcts":
        agent1 = MCTS(1, args.mcts_depth[0], args.mcts_rollouts[0],\
         args.variant, args.heuristic_rollouts[0], \
         args.input_file[0] if args.input_file else None, args.output_file[0] if args.output_file else None, args.ucb_const[0])

    if args.player2 == "human":
        agent2 = Human(-1, surface)
    elif args.player2 == "minimax":
        agent2 = Minimax(-1, args.minimax_depth[1], args.variant)
    elif args.player2 == "mcts":
        agent2 = MCTS(1, args.mcts_depth[1], args.mcts_rollouts[1],\
         args.variant, args.heuristic_rollouts[1], args.input_file[1] if len(args.input_file) == 2 else None,\
          args.output_file[1] if len(args.output_file) == 2 else None, args.ucb_const[1])

    for i in range(args.num_games):
        play_game(agent1, agent2, surface, args.variant, args.wait_between)
        if type(agent1) == MCTS:
            agent1.reset(1)
        if type(agent2) == MCTS:
            agent2.reset(-1)
        if args.alternate_sides:
            agent1.switch_sides()
            agent2.switch_sides()
            temp = agent1
            agent1 = agent2
            agent2 = temp
        if type(agent1) == MCTS:
            agent1.store_root()
        if type(agent2) == MCTS:
            agent2.store_root()
示例#4
0
    def learn(self):
        for i in range(1, self.args.n_epochs + 1):
            # bookkeeping
            print('------EPOCH ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.max_queue)

                for eps in range(self.args.n_episodes):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory) > self.args.n_trainexamples:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
示例#5
0
    def __compareToCurrentBest(self, trainedNets, numberOfGames=20, searchesPerMove=50):
        print("Evaluating network")
        previousNets = self.__loadNNets(self.CURRENT_BEST_NNET)
        wins, losses = 0, 0

        for game in tqdm(range(numberOfGames)):
            isTrainedBlack = bool(getrandbits(1))
            isBlacksMove = False
            env = Environment()

            while not env.isGameFinished():
                if isBlacksMove != isTrainedBlack:
                    mcts = MCTS(env, previousNets, self.device)
                else:
                    mcts = MCTS(env, trainedNets, self.device)

                env.saveCheckpoint()
                for search in range(searchesPerMove):
                    mcts.search()
                    env.loadCheckpoint()

                nextMove = mcts.getBestMove()
                env.move(*nextMove)

                isBlacksMove = not isBlacksMove

            isBlackWinner = not env.isBlackTurn
            if isBlackWinner != isTrainedBlack:
                losses += 1
            else:
                wins += 1

        return wins, losses
示例#6
0
def play_game(agent0, agent1, mcts_iter):
    board = Board()

    steps = 0
    # agents = (agent0, agent1)
    agents = ((agent0, MCTS(agent0, n_iter=mcts_iter)),
              (agent1, MCTS(agent1, n_iter=mcts_iter)))
    curr_agent_idx = random.choice([0, 1])
    samples_buffer = []
    while True:
        steps += 1

        # MCTS
        agent, mcts = agents[curr_agent_idx]
        try:
            root_node, mcts_p, action_p, value = mcts.search(
                board, curr_agent_idx)
            # root_node, mcts_p, action_p, value = mcts(board, agent, curr_agent_idx, n_iter=mcts_iter)
        except TerminalStateException:
            break

        state, valid_positions, valid_positions_mask = root_node.state
        if steps <= 20:
            action_idx = np.random.choice(len(mcts_p), p=mcts_p)
        else:
            action_idx = np.argmax(mcts_p)

        # /MCTS

        # No mcts
        # agent = agents[curr_agent_idx]
        # state, valid_positions, valid_positions_mask = get_state(board, curr_agent_idx)

        # if len(valid_positions) == 0:
        #     break

        # action_p, value = agent(tf.convert_to_tensor([state], dtype=tf.float32))
        # action_p = action_p[0].numpy() * valid_positions_mask.reshape((-1,))
        # value = value[0].numpy()
        # action_idx = np.random.choice(len(action_p), p=action_p / np.sum(action_p))
        # /No mcts

        if curr_agent_idx == 0:
            samples_buffer.append(
                [state, action_p[action_idx], action_idx, value])

        position_key = (int(action_idx / board.size),
                        int(action_idx % board.size))
        board.apply_position(curr_agent_idx, valid_positions[position_key])

        curr_agent_idx = 1 - curr_agent_idx

    reward = 0
    player0_score, player1_score = board.scores()
    if player0_score < player1_score:
        reward = -1
    elif player1_score < player0_score:
        reward = 1

    return samples_buffer, reward, steps
示例#7
0
 def __init__(self, game, neural_net_mister_x, neural_net_detectives, args):
     self.game = game
     self.nnet = neural_net_mister_x
     # self.pnet = self.nnet.__class__(self.game)  # the competitor network
     self.pnet = neural_net_detectives  # the competitor network
     self.args = args
     self.mcts_mister_x = MCTS(self.game, self.nnet, self.args)
     self.mcts_detectives = MCTS(self.game, self.pnet, self.args)
     self.train_examples_history = [
     ]  # history of examples from args.numItersForTrainExamplesHistory latest iterations
     self.skipFirstSelfPlay = False  # can be overriden in loadTrainExamples()
示例#8
0
    def __init__(self, black):
        self.game = Game()
        self.black = load_model('Agz224.h5')
        self.black_graph = tf.get_default_graph()
        self.white = load_model('Agz224.h5')
        self.white_graph = tf.get_default_graph()

        # parse args and create models
        if (black.strip() == 'white'):
            self.black = MCTS(name='MCTS',
                              black_model=(self.black, self.black_graph),
                              white_model=(self.white, self.white_graph),
                              black_playout=(self.black, self.black_graph),
                              white_playout=(self.white, self.white_graph),
                              timeout=2.75,
                              high=14,
                              gamma=0.99,
                              verbose=0,
                              min_prob=0.8,
                              param1=0.2,
                              param2=0.65)
            self.white = 'Human'

        elif (black.strip() == 'black'):
            self.white = MCTS(name='MCTS',
                              black_model=(self.black, self.black_graph),
                              white_model=(self.white, self.white_graph),
                              black_playout=(self.black, self.black_graph),
                              white_playout=(self.white, self.white_graph),
                              timeout=2.75,
                              high=14,
                              gamma=0.99,
                              verbose=0,
                              min_prob=0.8,
                              param1=0.2,
                              param2=0.65)
            self.black = 'Human'

        # init gui
        window = tkinter.Tk()
        self.board_frame = BoardFrame(window)
        self.board_canvas = BoardCanvas(self.board_frame.board_label_frame,
                                        height=600,
                                        width=500)

        # bind left mouse button click event
        self.board_canvas.bind('<Button-1>', self.click_event)

        self.board_frame.pack()
        self.board_canvas.pack()

        window.mainloop()
示例#9
0
    def test_mcts(self):
        # set up
        rings = 19
        marbles = {'w': 10, 'g': 10, 'b': 10}
        win_con = [{'w': 2}, {'w': 1, 'g': 1, 'b': 1}]
        t = 3
        game = Game(rings, marbles, win_con, t)
        nnet = DumbNN(game)

        # take some actions
        #(('PUT', 'w', (4, 4)), ('REM', (4, 3)))
        game.get_next_state((0, 24, 23), 'PUT')
        #(('PUT', 'b', (3, 4)), ('REM', (4, 2)))
        game.get_next_state((2, 19, 22), 'PUT')
        #(('PUT', 'g', (2, 3)), ('REM', (1, 3)))
        game.get_next_state((1, 13, 8), 'PUT')
        #(('PUT', 'b', (1, 1)), ('REM', (3, 1)))
        game.get_next_state((1, 6, 16), 'PUT')
        #(('PUT', 'b', (2, 1)), ('REM', (0, 2)))
        game.get_next_state((2, 11, 2), 'PUT')
        #(('PUT', 'w', (3, 3)), ('REM', (0, 0)))
        game.get_next_state((0, 18, 0), 'PUT')

        # do MCTS
        board_state, player_value = game.get_current_state()
        print(board_state[0] + board_state[1] + board_state[2]*2 + board_state[3]*3)
        ai = MCTS(game, nnet, 1, 50)
        ai.reset(player_value)
        ai.get_action_prob(board_state, temp=0)
    def _act(self, obs, action_space):
        state = self._create_sim_state(obs)

        env_state = _EnvState(state, self._character.agent_id, self._sim_env, self._net)

        selected_actions = None
        selected_actions_prs = None
        if self._is_self_play and self._step_count <= self._num_exploration_steps:
            temp = 1.0
        else:
            temp = 1e-3
        searcher = MCTS(env_state, temp=temp, iteration_limit=self._iteration_limit, is_self_play=self._is_self_play)
        for i, (actions, action_prs) in enumerate(searcher.search()):
            if i == self._character.agent_id:
                self._training_states_self += self._get_training_states(i)
                self._action_prs_self.append(action_prs)

                selected_actions = actions
                selected_actions_prs = action_prs
            else:
                self._training_states_other += self._get_training_states(i)
                self._action_prs_other.append(action_prs)

        np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))
        action = np.random.choice(selected_actions, p=selected_actions_prs)

        return action
示例#11
0
 def mcts_refresh_game(self):
     with torch.no_grad():
         self.nn.eval()
         self.time_steps = []
         for i in range(self.game_size):
             nn_thread_edge_queue = queue.Queue(maxsize=self.max_queue_size)
             # def gpu_thread_worker(nn, queue, eval_batch_size, is_cuda):
             gpu_thread = threading.Thread(
                 target=gpu_thread_worker,
                 args=(self.nn, nn_thread_edge_queue, self.eval_batch_size,
                       self.is_cuda))
             gpu_thread.start()
             mcts = MCTS(nn_thread_edge_queue, self.nn, self.is_cuda,
                         self.max_game_length, self.simulations_per_play,
                         self.debug)
             mcts.play_until_terminal()
             nn_thread_edge_queue.put(None)
             # print("Terminal sentinel is put on queue")
             nn_thread_edge_queue.join()
             if self.debug:
                 print("Queue has joined")
             gpu_thread.join()
             if self.debug:
                 print("Thread has joined")
             self.time_steps += mcts.time_steps
             print("Successful generation of one game")
             print("Queue empty:", nn_thread_edge_queue.empty())
示例#12
0
def main():
    env_name = "Taxi-v3"
    state_units = 16
    hid_units = 8
    dirichlet_alpha = 0.25
    exploration_fraction = 0.25
    pb_c_base = 19652
    pb_c_init = 1.25
    discount = 0.99
    num_simulations = 100
    filename = "model_last.pth"

    device = get_device(True)

    env = gym.make(env_name)
    env = RecordEpisodeStatistics(env)
    env = TaxiObservationWrapper(env)

    network = Network(env.observation_space.nvec.sum(), env.action_space.n,
                      state_units, hid_units)
    mcts = MCTS(dirichlet_alpha, exploration_fraction, pb_c_base, pb_c_init,
                discount, num_simulations)
    agent = Agent(network, mcts)
    trainer = Trainer()

    if os.path.exists(filename):
        agent.load_model(filename, device)
        # print(network.state_dict())

    trainer.validate(env, agent, network)
示例#13
0
    def play_episode(self):
        obs = self.env.reset()
        env_state = self.env.get_state()

        done = False
        t = 0
        total_reward = 0.0

        mcts = MCTS(self.config)

        root_node = Node(state=env_state,
                         done=False,
                         obs=obs,
                         reward=0,
                         action=None,
                         parent=RootParentNode(env=self.env_creator()),
                         mcts=mcts,
                         depth=0)

        while not done:
            t += 1
            # compute action choice
            action, root_node = mcts.compute_action(root_node)
            # remove old part of the tree that we wont use anymore
            root_node.parent = RootParentNode(env=self.env_creator())

            # take action
            obs, reward, done, info = self.env.step(action)
            if self.config["render"]:
                self.env.render()
            total_reward += reward
        self.env.close()
        return t, total_reward
示例#14
0
 def test_select_expand(self):
     env = gym.make('MiniGrid-Empty-5x5-v0')
     mcts_obj = MCTS(env)
     self.assertEqual(mcts_obj.root_node.children, [])
     path = mcts_obj.select_expand()
     self.assertEqual(path, [0])
     self.assertEqual(len(mcts_obj.root_node.children), 7)
    def policyIteration(self, start_round, rounds, episodes, iterations, dup):
        for i in range(start_round, start_round + rounds + 1):

            net = self.nnet
            self.mcts = MCTS(net, iterations)
            mcts = self.mcts
            print("ROUND")
            print(i)
            path = "Models/checkpoint" + "_" + str(i) + "_" + str(episodes) + "_" + str(mcts.iterations) + "_" + str(dup) + ".pth"
            print("model " + path + " saved")
            torch.save(net.state_dict(), path)
            state_dict = torch.load(path)
            net.load_state_dict(state_dict)
            
            if i >= rounds:
                return self.nnet
            
            for e in range(episodes):
                print(e)
                self.data += self.executeEpisode()       # collect examples from this game
                print(len(self.data))
            
            if dup:
                duplicate =  [(encode_reverse(x[0]), x[1], x[2]) for x in self.data]
                self.data += duplicate
            
            datasets = np.array(self.data)
            optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.8, 0.999))
            scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50,100,150,200,250,300,400], gamma=0.77)
            train(net, datasets, optimizer, scheduler, 0, 0, 0)
            self.nnet = net
            self.data = []
        
        return self.nnet
示例#16
0
def fight(net1, net2):
    numGame = 10
    win_net1 = 0
    win_net2 = 0
    mcts = MCTS()

    for color in [BLACK, WHITE]:
        for e in range(int(numGame / 2)):
            print ('[FIGHTING] game number ', e)
            board = game.GameBoard()
            board.play(randint(0, 360)) # on part d'une position random
            while not board.gameEnd():

                if board.player_turn == color:
                    moves = mcts.pi(board, net1)
                else:
                    moves = mcts.pi(board, net2)
                a = moves.index(max(moves))
                board.play(a)
            print ('end, winner = ', "White" if board.reward == -1 else "Black")
            board.display_board()

            if board.player_turn == color: #le nouveau réseau a perdu
                win_net2 += 1
            else:
                win_net1 += 1

    print ('bilan de l\'affrontement: ', win_net1, ' / ', win_net2)

    return win_net1 / numGame
示例#17
0
    def play_with_agents(self, agt1, agt2):
        player_turn = 1
        player1_wins = 0

        for i in range(self.G):
            print("[{}>{}]".format("-" * i, "." * (self.G - i - 1)), end="\r")
            sm = StateManager(5)
            agent = MCTS(exploration_rate=1, anet=agt1)
            game = sm.create_game()
            tree = Tree(game, chanceOfRandom=0.0)

            state = tree.root
            while not sm.is_finished():

                if player_turn == 1:
                    agent.anet = agt1
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)
                else:
                    agent.anet = agt2
                    best_child = agent.uct_search(tree, state,
                                                  num_search_games)

                game.execute_move(best_child.move)
                state = best_child

            if sm.get_winner() == 1:
                player1_wins += 1

        print("{} won {}/{} against {}.".format(agt1.name, player1_wins,
                                                self.G, agt2.name))
        print(np.reshape(sm.game.board, (boardsize, boardsize)))
示例#18
0
文件: mctpolicy.py 项目: skanin/MCTS
 def __init__(self, model, name, player, cfg, timeout):
     self.cfg = cfg
     self.mct = MCTS(None, player, None, self.cfg['training']['c'])
     self.name = name
     self.player = player
     self.random_move_prob = 1
     self.timeout = timeout
示例#19
0
def execute_episode(network, replay_buffer, experiment):
    examples = []
    board = Game(player_turn=1)
    mcts = MCTS(board.clone(), network)
    temp = 1.0
    i = 0
    while not board.over():
        i += 1
        if i >= experiment.get_parameter('temp_decrese_moves'):
            t = 10e-3
        # perform mcts search
        for i in range(experiment.get_parameter('mcts_rollouts')):
            mcts.search(mcts.root, board.clone())

        # choose the action
        N_total = np.sum(np.array(list(mcts.root.N.values()))**(1 / temp))
        pi = np.zeros(6)
        for a in mcts.root.actions:
            pi[a] = mcts.root.N[a]**(1 / temp) / N_total
        action = np.random.choice(np.arange(6), p=pi)
        # add the move to the replay buffer
        replay_buffer.add(board.board(), action, pi, mcts.root.v_mult,
                          board.valid_moves())
        print("Board {}, action {}, MCTS probabilities {}".format(
            board.board(), action, pi))
        board.move(action)
        if board.over():
            replay_buffer.finish_episode(board.winner())
            return board.winner()
        mcts.root = mcts.root.children[action]
示例#20
0
    def __init__(self,
                 model,
                 optimizer,
                 dataset_max_size,
                 resignation_threshold,
                 vis,
                 asycio_data_generation=False):
        self.best_mcts = MCTS(StateNode(None, init_game()),
                              config.cpuct)  # best player to generate data
        self.dataset = GameDataset(dataset_max_size)
        self.resignation_threshold = resignation_threshold  # not used for now
        self.model = model
        # Initialize visdom
        self.vis = vis
        self.iter_plot = create_vis_plot(self.vis, 'Iteration', 'Loss',
                                         "Avg Loss")
        self.len_plot = create_vis_plot(self.vis, 'Iteration', 'Length',
                                        "Avg Self-Play Length")

        self.logger = build_logger("pipeline", config.file2write)
        self.checkpoints_directory = "../checkpoints/2901"
        if not os.path.exists(self.checkpoints_directory):
            os.makedirs(self.checkpoints_directory)

        self.optimizer = optimizer
        self.asycio_data_generation = asycio_data_generation  # Python >= 3.4

        self.epoch_index = 0
        self.play_index = 0
        self.model_index = 0
示例#21
0
    def maximum_similarity_model(model,
                                 clusters,
                                 scaler,
                                 MAX_CLUSTERS,
                                 NOISE_PARAM,
                                 similarity_mean,
                                 similarity_std,
                                 env=None):
        sim = similarity[model]
        node = run_mcts(clusters, similarity, scaler, MAX_CLUSTERS,
                        NOISE_PARAM, similarity_mean, similarity_std,
                        action_count)(idx=0,
                                      cluster=1,
                                      similarity=sim[0],
                                      terminal=False)
        mcts = MCTS(env=env)

        while True:
            for i in range(25):
                mcts.do_rollout(node)
            node, score = mcts.choose(node)
            if node.terminal:
                break
        idxs = np.where((similarity == node.similarity))
        idxs = np.where((clusters[idxs[0]] == node.cluster))[0]
        state_selected = idxs[0]
        return state_selected, score
示例#22
0
    def test_mcts2(self):
        # set up
        rings = 19
        marbles = {'w': 10, 'g': 10, 'b': 10}
        win_con = [{'w': 2}, {'g': 2}, {'b': 2}, {'w': 1, 'g': 1, 'b': 1}]
        t = 3
        game = Game(rings, marbles, win_con, t)
        nnet = DumbNN(game)

        # take some actions
        #Human:   PUT g B1 B4
        game.get_next_state((1, 16, 1), 'PUT')
        #AI:      PUT b D3 C5
        game.get_next_state((2, 13, 2), 'PUT')
        #Human:   PUT b E1 C4
        game.get_next_state((2, 24, 7), 'PUT')
        #AI:      PUT w B2 D1
        game.get_next_state((0, 11, 23), 'PUT')
        #Human:   CAP g B1 w B3
        game.get_next_state((3, 3, 1), 'CAP')
        #AI:      PUT g A3 D4
        #game.get_next_state((1, 0, 8), 'PUT')
        #Human:   CAP g A3 g C3
        #game.get_next_state((5, 0, 0), 'CAP')
        #Human:   CAP g C3 b E3

        # do MCTS
        board_state, player_value = game.get_current_state()
        print(board_state[0] + board_state[1] + board_state[2]*2 + board_state[3]*3)
        print(board_state[-1])
        ai = MCTS(game, nnet, 1, 6)
        ai.reset(player_value)
        ai.get_action_prob(board_state, temp=0)
示例#23
0
    def _AI_player(self):
        '''the interface for AI
        Parameters required and updated: board status, which side to play 
        Return: the next gomoku piece coordinate (x, y)

        Gomoku Board status: 0 means no pieces, 1 means black pieces and -1 means white pieces
        '''

        self.human = False

        if self.is_start == False:
            return

        # AI_program

        AI = MCTS()
        AI = Alpha(model_file=self.model_file, use_gpu=False)
        [x, y] = AI.play(self.row, self.column, self.board)

        self._draw_piece(x, y, self.is_black)
        self.board[x][y] = self._ternary_op(1, -1, self.is_black)

        self.last_x, self.last_y = x, y
        self._gomoku_who_win()

        self.is_black = not self.is_black
        self.l_info.config(
            text=self._ternary_op('黑方行棋', '白方行棋', self.is_black))
        self.human = True
示例#24
0
def test_update_tree():
    mcts = MCTS()
    root = GameNode({},1)
    prior = 0.5
    c1 = GameNode(root, prior)
    root.children[1] = c1
    root.player = 1
    c1.player = 2

    c2 = GameNode(c1, 0.2)
    c1.children[1] = c2
    c2.player = 1

    c3 = GameNode(c2,0.8) 
    c2.children[1] = c3
    c3.player = 2

    c4 = GameNode(c3,0.3)
    c3.children[1] = c4
    c4.player = 1
    root = mcts.update_tree(c3,1.4,1)
    print("update 1")
    mcts.print_tree(root)
    print("update 2")
    root = mcts.update_tree(c4, 1.5,1)

    mcts.print_tree(root)
示例#25
0
 def update_model_weights(self, weights):
     self.model.set_weights(weights)
     self.searches = [
         MCTS(self.game, self.model, self.mcts_args)
         for _ in range(len(self.game.players))
     ]
     printl(f'{self.name}: Updated model weights')
示例#26
0
def rna_folding(rna_data, policy, stochastically=True, render=False):
    np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))

    rna = RNA(rna_data['seq'], rna_data['pairs'])
    mcts = MCTS(policy, 2000, False, 10)
    min_energy = rna.energy()
    pred_energy, _, _ = mcts.evaluate_state(rna)
    if render: print(rna)

    while rna.action_space and pred_energy > 1:
        action, action_probs = mcts.get_action(rna, stochastically=stochastically, show_node=render)
        rna.move(action)
        mcts.update_with_action(action)

        energy = rna.energy()
        if energy < min_energy: min_energy = energy
        pred_energy, _, _ = mcts.evaluate_state(rna)

        if render:
            print("[*] RNA pair position: %s" % (action,))
            print("[*] RNA secondary structure: %s" % ''.join(rna.sec))
            print("[*] Predicted energy: %.2f" % pred_energy)
            print("[*] Current energy: %.2f" % energy)
            print("[*] Min energy: %.2f\n" % min_energy)
            print(rna)

    final_energy = rna.energy()
    rna_data['pred_sec'] = ''.join(rna.sec)
    rna_data['pred_pairs'] = rna.find_pairs
    return rna_data
def play_game():
    game = Gomoku(game_board_width)
    policy = policy_network(input_dim=game.nn_input.shape,
                            output_dim=game.w**2)
    policy.load(model_file)
    mcts_player = MCTS(policy, mcts_playout_itermax_play)

    starting_player = random.choice([1, 2])
    game.reset(starting_player)
    mcts_player.set_rootnode(starting_player)
    while not game.is_end:
        print(game)
        # print(game.nn_input)

        if game.current_player == 1:  # Player X
            action, _ = mcts_player.get_move(game)
        else:  # Player O
            action = human_play()

        game.move(action)
        mcts_player.update_with_move(action, game)

        print("[*] Player %s move: %s\n" %
              (['X', 'O'][game.player_just_moved - 1], action))

    print(game)
    if game.winner > 0:
        print("[*] Player %s win" % ['X', 'O'][game.winner - 1])
    else:
        print("[*] Player draw")
    def __init__(self, player, nb_rows, nb_cols, timelimit):
        """Create Dots and Boxes agent.

        :param player: Player number, 1 or 2
        :param nb_rows: Rows in grid
        :param nb_cols: Columns in grid
        :param timelimit: Maximum time allowed to send a next action.
        """
        self.moves_made = []

        self.player = {player}
        self.timelimit = timelimit
        self.ended = False
        self.nb_rows = nb_rows
        self.nb_cols = nb_cols
        rows = []
        for ri in range(nb_rows + 1):
            columns = []
            for ci in range(nb_cols + 1):
                columns.append({"v": 0, "h": 0})
            rows.append(columns)
        self.cells = rows
        free_lines = []
        for ri in range(len(self.cells)):
            row = self.cells[ri]
            for ci in range(len(row)):
                cell = row[ci]
                if ri < (len(self.cells) - 1) and cell["v"] == 0:
                    free_lines.append((ri, ci, "v"))
                if ci < (len(row) - 1) and cell["h"] == 0:
                    free_lines.append((ri, ci, "h"))
        self.mcts = MCTS(self.cells, free_lines, player, timelimit)
示例#29
0
    def compare_nns(self):
        wins = []
        for i in range(self.simulation_length):
            self.game = ConnectFourGame(MCTS(self.current_nn),
                                        MCTS(self.new_nn))
            player = self.game.run_game()
            if player != 'draw':
                wins.append(player.name)

        print(Counter(wins))
        if Counter(
                wins
        )['b'] > self.simulation_length * self.win_per / self.simulation_length:
            self.current_nn = AlphaZeroNN()
            self.current_nn.copy(self.new_nn)
            self.current_nn.save()
示例#30
0
    def run_batch(self):
        """
    Runs G games of the specified type (Nim or Ledge). All parameters are fixed
    for all runs. Summarizes the results of the batch run in a print-sentence. 
    Creates a new instance of the game and for each move, asks the agent for an
    action. This action is applied and chancges the state of the board. When a
    final state is reached, the results are given to the agent for backpropagation
    and a new game instance is made.
    Returns a list of round winners
    """
        agent = MCTS(exploration_rate=self.c)
        win_stats = []

        game = self.create_game()
        tree = Tree(game)

        for i in range(self.G):
            state = tree.root

            while (not game.is_terminal_state()):
                best_child = agent.uct_search(tree, state, self.M)
                game.move(best_child.move)
                state = best_child

            win_stats.append(game.get_active_player())
            game = self.create_game()
            tree = Tree(game)

        self.summarize_batch(win_stats)
        return win_stats