def test_jouer_coup(self):
     """ Test la fonction jouer_coup
         Cas à tester:
             - La fonction joue le bon coup automatiquement
             - QuoridorError si le numéro du joueur est invalide
             - QuoridorError si la partie est déjà terminée
     """
     partie_terminee_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 9)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (6, 9)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         }
     }
     # Tester l'erreur soulevée lorsqu'on donne un joueur invalide
     jeu_nouveau = Quoridor(["joueur1", "joueur2"])
     self.assertRaisesRegex(QuoridorError, "joueur invalide!",
                            jeu_nouveau.jouer_coup, 5)
     # tester l'erreur soulevée l'orsqu'on cherche à jouer un
     # coup alors que la partie est déjà terminée
     jeu_termine = Quoridor(partie_terminee_etat['joueurs'],
                            partie_terminee_etat['murs'])
     self.assertRaisesRegex(QuoridorError, "La partie est déjà terminée!",
                            jeu_termine.jouer_coup, 1)
示例#2
0
 def __init__(self, init_model=None):
     # 棋盘参数
     self.game = Quoridor()
     # 训练参数
     self.learn_rate = 2e-3
     self.lr_multiplier = 1.0  # 适应性调节学习速率
     self.temp = 1.0
     self.n_playout = 400
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 128  # 取1 测试ing
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.play_batch_size = 1
     self.epochs = 5
     self.kl_targ = 0.02
     self.check_freq = 50
     self.game_batch_num = 1500
     self.best_win_ratio = 0.0
     self.pure_mcts_playout_num = 1000
     if init_model:
         self.policy_value_net = PolicyValueNet(model_file=init_model)
     else:
         self.policy_value_net = PolicyValueNet()
     # 设置电脑玩家信息
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
示例#3
0
    def __init__(self, init_model=None):
        self.game = Quoridor()


        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0
        self.temp = 1.0
        self.n_playout = 200
        self.c_puct = 5
        self.buffer_size = 10000
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.kl_targ = 0.02
        self.check_freq = 10
        self.game_batch_num = 1000
        self.best_win_ratio = 0.0
        self.pure_mcts_playout_num = 1000

        self.old_probs = 0
        self.new_probs = 0

        self.first_trained = False

        if init_model:
            self.policy_value_net = PolicyValueNet(model_file=init_model)
        else:
            self.policy_value_net = PolicyValueNet()

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct,
                                      n_playout=self.n_playout, is_selfplay=1)
示例#4
0
文件: mcts.py 项目: wrongu/QuoridorV2
 def __init__(self, game_state:Quoridor, policy_output, value_output):
     # _counts is the number of times we've taken some action *from this state*. Initialized to all zeros. Stored
     # as a torch tensor over all possible actions, to be later masked with the set of legal actions
     self._counts = torch.zeros(3, 9, 9)
     self._total_reward = torch.zeros(3, 9, 9)
     self._policy = policy_output
     self._value = value_output
     self._legal_mask = encode_actions_to_planes(game_state.all_legal_moves(), game_state.current_player)
     self._player = game_state.current_player
     self._key = game_state.hash_key()
     self._children = {}
     self.__flagged = False
示例#5
0
 def setUp(self):
     self.partie = Quoridor(
         [{
             "nom": "idul",
             "murs": 7,
             "pos": [5, 5]
         }, {
             "nom": "automate",
             "murs": 3,
             "pos": [8, 6]
         }],
         murs={
             "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
             "verticaux": [[6, 2], [4, 4], [2, 6], [7, 5], [7, 7]]
         })
示例#6
0
def mode_automatique(idul):
    """mode automatique"""
    [identifiant, état] = initialiser_partie(idul)
    print(afficher_damier_ascii(état))
    joueur = [état['joueurs'][0]['nom'], état['joueurs'][1]['nom']]
    jeu = Quoridor(joueur)
    état = jeu.état_partie()
    while 1:
        (coup, pos) = jeu.jouer_coup(1)
        print(jeu)
        état = jouer_coup(identifiant, coup, tuple(pos))
        afficher_damier_ascii(état)
        joueur1 = état['joueurs']
        murs_j1 = état['murs']
        jeu = Quoridor(joueur1, murs_j1)
示例#7
0
    def test_init_with_state(self):
        # Initiation d'une partie avec une liste de dictionnaires
        partie = Quoridor(
            [{
                "nom": "idul",
                "murs": 7,
                "pos": [5, 5]
            }, {
                "nom": "automate",
                "murs": 3,
                "pos": [8, 6]
            }],
            murs={
                "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
                "verticaux": [[6, 2], [4, 4], [2, 6], [7, 5], [7, 7]]
            })

        self.assertEqual(partie.joueurs, [
            {
                'nom': 'idul',
                'murs': 7,
                'pos': [5, 5]
            },
            {
                'nom': 'automate',
                'murs': 3,
                'pos': [8, 6]
            },
        ])

        self.assertEqual(
            partie.murs, {
                "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
                "verticaux": [[6, 2], [4, 4], [2, 6], [7, 5], [7, 7]]
            })
示例#8
0
文件: main.py 项目: aru1998/projet-2
def main():
    """Boucle principale."""
    args = analyser_commande()

    if args.lister:
        for partie in api.lister_parties(args.idul):
            print(partie["id"])
        return

    id_partie, partie = api.débuter_partie(args.idul)
    gagnant = False
    q = None

    while not gagnant:
        if args.mode_graphique:
            q = QuoridorX(partie["joueurs"], partie["murs"])
        else:
            q = Quoridor(partie["joueurs"], partie["murs"])

        gagnant = q.partie_terminée()
        if gagnant:
            break

        if args.mode_graphique:
            q.afficher()
        else:
            print("", q, sep="\n")

        partie = jouer_coup(args, q, id_partie)

    if args.mode_graphique:
        turtle.mainloop()
    else:
        print("", q, "", f'{gagnant} a gagné la partie!', "", sep="\n")
示例#9
0
 def __init__(self, init_model=None):
     # params of the board and the game
     self.board_width = 6
     self.board_height = 6
     self.n_in_row = 4
     self.board = Quoridor()
     self.game = Game(self.board)
     # training params
     self.learn_rate = 2e-3
     self.lr_multiplier = 1.0  # adaptively adjust the learning rate based on KL
     self.temp = 1.0  # the temperature param
     self.n_playout = 1000  # num of simulations for each move
     self.c_puct = 5
     self.buffer_size = 10000
     self.batch_size = 1000  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)
     self.play_batch_size = 64
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.02
     self.check_freq = 50
     self.game_batch_num = 2000
     self.best_win_ratio = 0.0
     # num of simulations used for the pure mcts, which is used as
     # the opponent to evaluate the trained policy
     self.pure_mcts_playout_num = 1000
     if init_model:
         # start training from an initial policy-value net
         self.policy_value_net = PolicyValueNet(model_file=init_model, use_gpu=True)
     else:
         # start training from a new policy-value net
         self.policy_value_net = PolicyValueNet()
     self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                   c_puct=self.c_puct,
                                   n_playout=self.n_playout,
                                   is_selfplay=1)
 def test_partie_terminée(self):
     """ Test de la fonction partie_terminée
         Cas à tester:
             - La fonction retourne False si la partie n'est pas terminée
             - La fonction retourne le nom du joueur qui a gagné si la partie est terminée
     """
     partie_terminee1_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 9)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (6, 9)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         }
     }
     partie_terminee2_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (6, 1)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (5, 1)
         }],
         "murs": {
             "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
             "verticaux": [[6, 2], [4, 4], [2, 5], [7, 5], [7, 7]]
         }
     }
     # Tester que la fonction retourne False si la partie n'est pas terminée
     jeu_pas_fini = Quoridor(['joueur1', 'joueur2'])
     self.assertEqual(jeu_pas_fini.partie_terminée(), False)
     # Tester que la fonction retourne le nom du joueur qui a gagné quand c'est le cas
     jeu_fini1 = Quoridor(partie_terminee1_etat['joueurs'],
                          partie_terminee1_etat['murs'])
     self.assertEqual(jeu_fini1.partie_terminée(), "joueur1")
     jeu_fini2 = Quoridor(partie_terminee2_etat['joueurs'],
                          partie_terminee2_etat['murs'])
     self.assertEqual(jeu_fini2.partie_terminée(), "joueur2")
示例#11
0
文件: mcts.py 项目: wrongu/QuoridorV2
    def _single_search(self, game:Quoridor, c_puct, verbose=False) -> float:
        """Recursively run a single MCTS thread out from the given state using exploration parameter 'c_puct'.
        """
        node = self._node_lookup[game.hash_key()]
        action = sample_action(node.upper_conf(c_puct), node._player, temperature=0.0)
        if verbose:
            print("\tsingle_search starting @", node, "\n\t\ttaking", action, end="")
        with game.temp_move(action):
            winner = game.get_winner()
            if winner is not None:
                if verbose:
                    print("--> winner is", winner)
                # Case 1: 'action' ended the game. Return +1 if a win from the perspective of whoever played the move
                backup_val = +1 if winner == node._player else -1
            elif game.hash_key() not in self._node_lookup:
                # Case 2: 'action' resulted in a state we've never seen before. Create a new node and return
                pol, val = self.pol_val_fun(game)
                new_node = TreeNode(game, pol, val)
                self._node_lookup[game.hash_key()] = new_node
                node.add_child(action, new_node)
                if verbose:
                    print("--> leaf <{}> with value".format(str(new_node)), val)
                # "val" is from the perspective of "new_node" but we're evaluating "node". Flip sign for minmax.
                backup_val = -val
            else:
                # Case 3: we've seen this state before. But it's possible we're reaching it from a different history.
                # Ensure the parent/child relationship exists then recurse, flipping the sign of the child node's value.
                if verbose:
                    print("--> recursing to node", self._node_lookup[game.hash_key()])
                node.add_child(action, self._node_lookup[game.hash_key()])
                backup_val = -self._single_search(game, c_puct, verbose=verbose)

        # Apply backup
        node.backup(action, backup_val)
        return backup_val
def create_board(self_loc, oppo_loc, self_walls, oppo_walls, walls):
    q = Quoridor()
    q._self_loc = self_loc
    q._oppo_loc = oppo_loc
    q.wall_remaining[1] = self_walls
    q.wall_remaining[-1] = oppo_walls
    q._walls[:] = walls[:]
    return q
示例#13
0
 def pos_mur_erroné():
     return Quoridor([{
         'nom': 'Simon',
         'murs': 10,
         'pos': (10, 1)
     }, 'Robot'],
                     murs={
                         'horizontaux': [(3, 5)],
                         'verticaux': [(4, 4)]
                     })
示例#14
0
class TestQuoridorMethods(unittest.TestCase):
    def setUp(self):
        self.partie = Quoridor(
            [{
                "nom": "idul",
                "murs": 7,
                "pos": [5, 5]
            }, {
                "nom": "automate",
                "murs": 3,
                "pos": [8, 6]
            }],
            murs={
                "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
                "verticaux": [[6, 2], [4, 4], [2, 6], [7, 5], [7, 7]]
            })

    def test_état_partie(self):
        état = self.partie.état_partie()
        self.assertEqual(
            état, {
                "joueurs": [{
                    "nom": "idul",
                    "murs": 7,
                    "pos": [5, 5]
                }, {
                    "nom": "automate",
                    "murs": 3,
                    "pos": [8, 6]
                }],
                "murs": {
                    "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
                    "verticaux": [[6, 2], [4, 4], [2, 6], [7, 5], [7, 7]]
                }
            })

    def test_placer_mur_exceptions(self):
        def joueur_erroné():
            self.partie.placer_mur(joueur=0,
                                   position=(4, 5),
                                   orientation='horizontal')

        self.assertRaises(QuoridorError, joueur_erroné)
示例#15
0
    def __init__(self, init_model=None, first_player=1):
        self.game = Quoridor()
        self.temp = 1.0
        self.c_puct = 5
        self.play_batch_size = 1
        self.alpha_playout = 100
        self.pure_playout = 100
        self.first = first_player

        self.alpha_player = A_Player(
            PolicyValueNet(model_file=init_model).policy_value_fn,
            c_puct=5,
            n_playout=self.alpha_playout,
            is_selfplay=0)
        self.pure_player = B_Player(c_puct=5, n_playout=self.pure_playout)  #
        self.alpha_win_total = 0
        self.alpha_win_first = 0
        self.alpha_draw_total = 0
        self.alpha_draw_first = 0
示例#16
0
def automatique(idul):
    """Section automatique"""
    identifiant, etat = débuter_partie(idul)
    partie = Quoridor(etat["joueurs"], etat['murs'])
    print(partie)
    while not partie.partie_terminée():
        before = copy.deepcopy(partie.état_partie())
        partie.jouer_coup(1)
        after = copy.deepcopy(partie.état_partie())
        print(partie)
        if before["joueurs"][0]["pos"] != after["joueurs"][0]["pos"]:
            etat = jouer_coup(identifiant, "D", after["joueurs"][0]["pos"])
        elif len(after["murs"]["horizontaux"]) != len(
                before["murs"]["horizontaux"]):
            etat = jouer_coup(
                identifiant, "MH",
                after["murs"]["horizontaux"][len(after["murs"]["horizontaux"])
                                             - 1])
        elif len(after["murs"]["verticaux"]) != len(
                before["murs"]["verticaux"]):
            etat = jouer_coup(
                identifiant, "MV",
                after["murs"]["verticaux"][len(after["murs"]["verticaux"]) -
                                           1])
        partie = Quoridor(etat["joueurs"], etat['murs'])
示例#17
0
def test_playout():
    from quoridor import Quoridor
    from policy_value_net import PolicyValueNet
    c_puct = 5
    n_playout = 400
    policy_value_net = PolicyValueNet(model_file=None, use_gpu=True)
    mcts = MCTS(policy_value_net.policy_value_fn,
                c_puct=c_puct,
                n_playout=n_playout)
    q = Quoridor()
    acts, act_probs = mcts.get_move_probs(q)
    print(acts)
    print(act_probs)
示例#18
0
    def test_init_with_strings(self):
        partie = Quoridor(['Simon', 'Robot'])

        self.assertEqual(
            partie.joueurs,
            [
                {
                    'nom': 'Simon',
                    'murs': 10,
                    'pos': (5, 1)
                },
                {
                    'nom': 'Robot',
                    'murs': 10,
                    'pos': (5, 9)
                },
            ],
        )

        self.assertEqual(partie.murs, {'horizontaux': [], 'verticaux': []})
 def test_état_partie(self):
     """ Test la fonction état_partie
         Cas à tester:
             - La fonction retourne le bon résultat
     """
     nouvelle_partie_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 10,
             "pos": (5, 1)
         }, {
             "nom": "joueur2",
             "murs": 10,
             "pos": (5, 9)
         }],
         "murs": {
             "horizontaux": [],
             "verticaux": []
         }
     }
     # Tester si la fonction retourne la bonne affichage
     nouvellepartie = Quoridor(["joueur1", "joueur2"])
     self.assertEqual(nouvellepartie.état_partie(), nouvelle_partie_etat)
示例#20
0
def test_mcts_player():
    from quoridor import Quoridor
    from policy_value_net import PolicyValueNet
    policy_value_net = PolicyValueNet(model_file=None)
    c_puct = 5
    n_playout = 400
    mcts_player = MCTSPlayer(policy_value_net.policy_value_fn,
                             c_puct=c_puct,
                             n_playout=n_playout,
                             is_selfplay=1)
    q = Quoridor()

    action = mcts_player.get_action(q)
    q.take_action(action)
    q.alter()
    # print(q.valid_actions())
    # try:
    action = mcts_player.get_action(q)
    # except:
    #     print(q.valid_actions())

    print('take action : ', action)
示例#21
0
class TrainPipeline(object):
    def __init__(self, init_model=None):
        # 棋盘参数
        self.game = Quoridor()
        # 训练参数
        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0  # 适应性调节学习速率
        self.temp = 1.0
        self.n_playout = 400
        self.c_puct = 5
        self.buffer_size = 10000
        self.batch_size = 128  # 取1 测试ing
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.epochs = 5
        self.kl_targ = 0.02
        self.check_freq = 50
        self.game_batch_num = 1500
        self.best_win_ratio = 0.0
        self.pure_mcts_playout_num = 1000
        if init_model:
            self.policy_value_net = PolicyValueNet(model_file=init_model)
        else:
            self.policy_value_net = PolicyValueNet()
        # 设置电脑玩家信息
        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn,
                                      c_puct=self.c_puct,
                                      n_playout=self.n_playout,
                                      is_selfplay=1)

    # def get_equi_data(self, play_data):
    #     """
    #     数据集增强,获取旋转后的数据,因为五子棋也是对称的
    #     play_data: [(state, mcts_prob, winner_z), ..., ...]"""
    #     extend_data = []
    #     for state, mcts_porb, winner in play_data:
    #         equi_state = np.array([np.rot90(s,2) for s in state])
    #         equi_mcts_prob = np.rot90(np.flipud(mcts_porb.reshape(9, 9)), 2)
    #         extend_data.append((equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
    #         # flip horizontally
    #         equi_state = np.array([np.fliplr(s) for s in equi_state])
    #         equi_mcts_prob = np.fliplr(equi_mcts_prob)
    #         extend_data.append((equi_state, np.flipud(equi_mcts_prob).flatten(), winner))
    #     return extend_data

    def collect_selfplay_data(self, n_games=1):
        """收集训练数据"""
        for i in range(n_games):
            winner, play_data = self.game.start_self_play(
                self.mcts_player, temp=self.temp)  # 进行自博弈
            play_data = list(play_data)[:]
            self.episode_len = len(play_data)
            # 数据增强
            # play_data = self.get_equi_data(play_data)
            self.data_buffer.extend(play_data)

    def policy_update(self):
        """训练策略价值网络"""
        mini_batch = random.sample(self.data_buffer,
                                   self.batch_size)  # 获取mini-batch
        state_batch = [data[0] for data in mini_batch]  # 提取第一位的状态
        mcts_probs_batch = [data[1] for data in mini_batch]  # 提取第二位的概率
        winner_batch = [data[2] for data in mini_batch]  # 提取第三位的胜负情况
        old_probs, old_v = self.policy_value_net.policy_value(
            state_batch)  # 输入网络计算旧的概率和胜负价值,这里为什么要计算旧的数据是因为需要计算
        #                                                                     新旧之间的KL散度来控制学习速率的退火
        # 开始训练epochs个轮次
        for i in range(self.epochs):
            loss, entropy = self.policy_value_net.train_step(
                state_batch, mcts_probs_batch, winner_batch,
                self.learn_rate * self.lr_multiplier)
            new_probs, new_v = self.policy_value_net.policy_value(
                state_batch)  # 计算新的概率和价值
            kl = np.mean(
                np.sum(old_probs *
                       (np.log(old_probs + 1e-10) - np.log(new_probs + 1e-10)),
                       axis=1))
            if kl > self.kl_targ * 4:  # 如果KL散度发散的很不好,就提前结束训练
                break
        # 根据KL散度,适应性调节学习速率
        if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1:
            self.lr_multiplier /= 1.5
        elif kl < self.kl_targ / 2 and self.lr_multiplier < 10:
            self.lr_multiplier *= 1.5

        explained_var_old = 1 - np.var(
            np.array(winner_batch) - old_v.flatten()) / np.var(
                np.array(winner_batch))
        explained_var_new = 1 - np.var(
            np.array(winner_batch) - new_v.flatten()) / np.var(
                np.array(winner_batch))
        print(
            "kl:{:.5f},lr_multiplier:{:.3f},loss:{},entropy:{},explained_var_old:{:.3f},explained_var_new:{:.3f}"
            .format(kl, self.lr_multiplier, loss, entropy, explained_var_old,
                    explained_var_new))
        return loss, entropy

    def run(self):
        """训练"""
        try:
            for i in range(self.game_batch_num):
                self.collect_selfplay_data(self.play_batch_size)
                print("batch i:{}, episode_len:{}".format(
                    i + 1, self.episode_len))
                if len(self.data_buffer) > self.batch_size:
                    loss, entropy = self.policy_update()
                    print("LOSS:", loss)
                    # 保存loss
                    with open('loss.txt', 'a') as f:
                        f.writelines(str(loss) + '\n')
                if (i + 1) % self.check_freq == 0:
                    print("current self-play batch: {}".format(i + 1))
                    # win_ratio = self.policy_evaluate()
                    self.policy_value_net.save_model('current_policy')  # 保存模型
        except KeyboardInterrupt:
            print('\n\rquit')
示例#22
0
                                                               k][4 * x - 1::]

    # On afficher le damier au complet et à l'endroit
    dam.reverse()
    NOMA = et['joueurs'][0]['nom']
    NOMB = et['joueurs'][1]['nom']
    return f'Légende: 1={NOMA}, 2={NOMB}\n   -----------------------------------\n' + ''.join(
        dam
    ) + '--|-----------------------------------\n  | 1   2   3   4   5   6   7   8   9'


c = analyser_commande()

#Mode manuel
if not (c.a or c.x or c.ax):
    jeu = Quoridor((c.idul, 'robot'))
    ID, etat = debuter_partie(c.idul)

    while True:
        TYPE_COUP = input('Quel type de coup voulez-vous jouer? (D/MH/MV) ')
        POSI = input(
            'À quelle position (x,y) voulez-vous jouer ce coup ? ').replace(
                ' ', '')
        x, y = int(POSI[1]), int(POSI[3])

        if TYPE_COUP.lower() == 'd':
            jeu.déplacer_jeton(1, (x, y))
            print(jeu)
            time.sleep(0.6)
            reponse = jouer_coup(ID, 'D', (x, y))
        elif TYPE_COUP.lower() == 'mh':
示例#23
0
        else:
            return encode_loc(flip_y_perspective(row, current_player, True),
                              col) + "v"

    if temperature < 1e-6:
        # Do max operation instead of unstable low-temperature manipulations
        idx = torch.argmax(policy_planes)
    else:
        idx = torch.multinomial(policy_planes.flatten()**temperature,
                                num_samples=1)
    return _idx_to_action(idx.item())


if __name__ == '__main__':
    # mini test
    q = Quoridor()

    legal_moves = q.all_legal_moves(partial_check=False)
    print("INITIAL STATE LEGAL MOVES ({} of them):".format(len(legal_moves)))
    print(legal_moves)

    for mv in legal_moves:
        planes = encode_actions_to_planes(mv, q.current_player)
        print("=========== {} ============".format(mv))
        print(planes)
        mv2 = sample_action(planes, 0)
        print(mv2)
        assert mv2 == mv, "Failed to encode/decode {}".format(mv)

    # Test that just sampling random moves leads to some illegal moves getting selected (this is expected)
    random_actions, masked_random_actions = [''] * 100, [''] * 100
示例#24
0
class TrainPipeline(object):
    def __init__(self, init_model=None):
        self.game = Quoridor()


        self.learn_rate = 2e-3
        self.lr_multiplier = 1.0
        self.temp = 1.0
        self.n_playout = 200
        self.c_puct = 5
        self.buffer_size = 10000
        self.data_buffer = deque(maxlen=self.buffer_size)
        self.play_batch_size = 1
        self.kl_targ = 0.02
        self.check_freq = 10
        self.game_batch_num = 1000
        self.best_win_ratio = 0.0
        self.pure_mcts_playout_num = 1000

        self.old_probs = 0
        self.new_probs = 0

        self.first_trained = False

        if init_model:
            self.policy_value_net = PolicyValueNet(model_file=init_model)
        else:
            self.policy_value_net = PolicyValueNet()

        self.mcts_player = MCTSPlayer(self.policy_value_net.policy_value_fn, c_puct=self.c_puct,
                                      n_playout=self.n_playout, is_selfplay=1)

    def get_equi_data(self, play_data):

        extend_data = []
        for i, (state, mcts_prob, winner) in enumerate(play_data):
            wall_state = state[:3,:BOARD_SIZE - 1,:BOARD_SIZE - 1]
            dist_state1 = np.reshape(state[(6 + (WALL_NUM + 1) * 2), :BOARD_SIZE, :BOARD_SIZE], (1, BOARD_SIZE, BOARD_SIZE))
            dist_state2 = np.reshape(state[(7 + (WALL_NUM + 1) * 2), :BOARD_SIZE, :BOARD_SIZE], (1, BOARD_SIZE, BOARD_SIZE))

            # horizontally flipped game
            flipped_wall_state = []

            for i in range(3):
                wall_padded = np.fliplr(wall_state[i])
                wall_padded = np.pad(wall_padded, (0,1), mode='constant', constant_values=0)
                flipped_wall_state.append(wall_padded)

            flipped_wall_state = np.array(flipped_wall_state)

            player_position = state[3:5, :,:]

            flipped_player_position = []
            for i in range(2):
                flipped_player_position.append(np.fliplr(player_position[i]))

            flipped_player_position = np.array(flipped_player_position)

            h_equi_state = np.vstack([flipped_wall_state, flipped_player_position, state[5:, :,:]])

            h_equi_mcts_prob = np.copy(mcts_prob)

            h_equi_mcts_prob[11] = mcts_prob[10]  # SE to SW
            h_equi_mcts_prob[10] = mcts_prob[11]  # SW to SE
            h_equi_mcts_prob[9] = mcts_prob[8]    # NE to NW
            h_equi_mcts_prob[8] = mcts_prob[9]    # NW to NE
            h_equi_mcts_prob[7] = mcts_prob[6]    # EE to WW
            h_equi_mcts_prob[6] = mcts_prob[7]    # WW to EE
            h_equi_mcts_prob[3] = mcts_prob[2]    # E to W
            h_equi_mcts_prob[2] = mcts_prob[3]    # W to E

            h_wall_actions = h_equi_mcts_prob[12:12 + (BOARD_SIZE-1) ** 2].reshape(BOARD_SIZE-1, BOARD_SIZE-1)
            v_wall_actions = h_equi_mcts_prob[12 + (BOARD_SIZE-1) ** 2:].reshape(BOARD_SIZE-1, BOARD_SIZE -1)

            flipped_h_wall_actions = np.fliplr(h_wall_actions)
            flipped_v_wall_actions = np.fliplr(v_wall_actions)

            h_equi_mcts_prob[12:] = np.hstack([flipped_h_wall_actions.flatten(), flipped_v_wall_actions.flatten()])

            # Vertically flipped game
            flipped_wall_state = []

            for i in range(3):
                wall_padded = np.flipud(wall_state[i])
                wall_padded = np.pad(wall_padded, (0,1), mode='constant', constant_values=0)
                flipped_wall_state.append(wall_padded)

            flipped_wall_state = np.array(flipped_wall_state)


            flipped_player_position = []
            for i in range(2):
                flipped_player_position.append(np.flipud(player_position[1-i]))

            flipped_player_position = np.array(flipped_player_position)

            cur_player = (np.ones((BOARD_SIZE, BOARD_SIZE)) - state[5 + 2* (WALL_NUM+1),:,:]).reshape(-1,BOARD_SIZE, BOARD_SIZE)

            v_equi_state = np.vstack([flipped_wall_state, flipped_player_position, state[5+(WALL_NUM+1):5 + 2*(WALL_NUM+1), :,:], state[5:5+(WALL_NUM+1),:,:], cur_player, dist_state2, dist_state1])
            # v_equi_state = np.vstack([flipped_wall_state, flipped_player_position, state[5:(5 + (WALL_NUM+1) * 2), :, :], cur_player, state[:(6 + (WALL_NUM + 1) * 2), :, :]])


            v_equi_mcts_prob = np.copy(mcts_prob)

            v_equi_mcts_prob[11] = mcts_prob[9]  # SE to NE
            v_equi_mcts_prob[10] = mcts_prob[8]  # SW to NW
            v_equi_mcts_prob[9] = mcts_prob[11]  # NE to SE
            v_equi_mcts_prob[8] = mcts_prob[10]  # NW to SW
            v_equi_mcts_prob[5] = mcts_prob[4]   # NN to SS
            v_equi_mcts_prob[4] = mcts_prob[5]   # SS to NN
            v_equi_mcts_prob[1] = mcts_prob[0]   # N to S
            v_equi_mcts_prob[0] = mcts_prob[1]   # S to N

            h_wall_actions = v_equi_mcts_prob[12:12 + (BOARD_SIZE-1) ** 2].reshape(BOARD_SIZE-1, BOARD_SIZE-1)
            v_wall_actions = v_equi_mcts_prob[12 + (BOARD_SIZE-1) ** 2:].reshape(BOARD_SIZE-1, BOARD_SIZE -1)

            flipped_h_wall_actions = np.flipud(h_wall_actions)
            flipped_v_wall_actions = np.flipud(v_wall_actions)

            v_equi_mcts_prob[12:] = np.hstack([flipped_h_wall_actions.flatten(), flipped_v_wall_actions.flatten()])

            ## Horizontally-vertically flipped game

            wall_state = state[:3,:BOARD_SIZE - 1,:BOARD_SIZE - 1]
            flipped_wall_state = []

            for i in range(3):
                wall_padded = np.fliplr(np.flipud(wall_state[i]))
                wall_padded = np.pad(wall_padded, (0,1), mode='constant', constant_values=0)
                flipped_wall_state.append(wall_padded)

            flipped_wall_state = np.array(flipped_wall_state)



            flipped_player_position = []
            for i in range(2):
                flipped_player_position.append(np.fliplr(np.flipud(player_position[1-i])))

            flipped_player_position = np.array(flipped_player_position)

            cur_player = (np.ones((BOARD_SIZE, BOARD_SIZE)) - state[5 + 2*(WALL_NUM+1),:,:]).reshape(-1,BOARD_SIZE, BOARD_SIZE)

            hv_equi_state = np.vstack([flipped_wall_state, flipped_player_position, state[5 + (WALL_NUM+1):5 + 2*(WALL_NUM+1), :,:], state[5:5+(WALL_NUM+1),:,:], cur_player, dist_state2, dist_state1])
            # hv_equi_state = np.vstack([flipped_wall_state, flipped_player_position, state[5:(5 + (WALL_NUM+1) * 2), :, :], cur_player, state[(6 + (WALL_NUM + 1) * 2):, :, :]])

            hv_equi_mcts_prob = np.copy(mcts_prob)

            hv_equi_mcts_prob[11] = mcts_prob[8]  # SE to NW
            hv_equi_mcts_prob[10] = mcts_prob[9]  # SW to NE
            hv_equi_mcts_prob[9] = mcts_prob[10]  # NE to SW
            hv_equi_mcts_prob[8] = mcts_prob[11]  # NW to SE
            hv_equi_mcts_prob[7] = mcts_prob[6]   # EE to WW
            hv_equi_mcts_prob[6] = mcts_prob[7]   # WW to EE
            hv_equi_mcts_prob[5] = mcts_prob[4]   # NN to SS
            hv_equi_mcts_prob[4] = mcts_prob[5]   # SS to NN
            hv_equi_mcts_prob[3] = mcts_prob[2]   # E to W
            hv_equi_mcts_prob[2] = mcts_prob[3]   # W to E
            hv_equi_mcts_prob[1] = mcts_prob[0]   # N to S
            hv_equi_mcts_prob[0] = mcts_prob[1]   # S to N

            h_wall_actions = hv_equi_mcts_prob[12:12 + (BOARD_SIZE-1) ** 2].reshape(BOARD_SIZE-1, BOARD_SIZE-1)
            v_wall_actions = hv_equi_mcts_prob[12 + (BOARD_SIZE-1) ** 2:].reshape(BOARD_SIZE-1, BOARD_SIZE -1)

            flipped_h_wall_actions = np.fliplr(np.flipud(h_wall_actions))
            flipped_v_wall_actions = np.fliplr(np.flipud(v_wall_actions))

            hv_equi_mcts_prob[12:] = np.hstack([flipped_h_wall_actions.flatten(), flipped_v_wall_actions.flatten()])

            ###########

            extend_data.append((state, mcts_prob, winner))
            extend_data.append((h_equi_state, h_equi_mcts_prob, winner))
            extend_data.append((v_equi_state, v_equi_mcts_prob, winner * -1))
            extend_data.append((hv_equi_state, hv_equi_mcts_prob, winner * -1))

        return extend_data

    def collect_selfplay_data(self, n_games=1):
        for i in range(n_games):
            winner, play_data = self.game.start_self_play(self.mcts_player, temp=self.temp)
            play_data = list(play_data)[:]
            self.episode_len = len(play_data)

            play_data = self.get_equi_data(play_data)

            self.data_buffer.extend(play_data)
            print("{}th game finished. Current episode length: {}, Length of data buffer: {}".format(i, self.episode_len, len(self.data_buffer)))

    def policy_update(self):

        dataloader = DataLoader(self.data_buffer, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

        valloss_acc = 0
        polloss_acc = 0
        entropy_acc = 0

        for i in range(NUM_EPOCHS):

            self.old_probs = self.new_probs

            if self.first_trained:
                kl = np.mean(np.sum(self.old_probs * (np.log(self.old_probs + 1e-10) - np.log(self.new_probs + 1e-10)), axis=1))
                if kl > self.kl_targ * 4:
                    break

                if kl > self.kl_targ * 2 and self.lr_multiplier > 0.1:
                    self.lr_multiplier /= 1.5
                elif kl < self.kl_targ / 2 and self.lr_multiplier < 10:
                    self.lr_multiplier *= 1.5


            for i, (state, mcts_prob, winner) in enumerate(dataloader):
                valloss, polloss, entropy = self.policy_value_net.train_step(state, mcts_prob, winner, self.learn_rate * self.lr_multiplier)
                self.new_probs, new_v = self.policy_value_net.policy_value(state)

                global iter_count

                writer.add_scalar("Val Loss/train", valloss.item(), iter_count)
                writer.add_scalar("Policy Loss/train", polloss.item(), iter_count)
                writer.add_scalar("Entropy/train", entropy, iter_count)
                writer.add_scalar("LR Multiplier", self.lr_multiplier, iter_count)

                iter_count += 1

                valloss_acc += valloss.item()
                polloss_acc += polloss.item()
                entropy_acc += entropy.item()

            self.first_trained = True

        valloss_mean = valloss_acc / (len(dataloader) * NUM_EPOCHS)
        polloss_mean = polloss_acc / (len(dataloader) * NUM_EPOCHS)
        entropy_mean = entropy_acc / (len(dataloader) * NUM_EPOCHS)

        #explained_var_old = 1 - np.var(np.array(winner_batch) - old_v.flatten()) / np.var(np.array(winner_batch))
        #explained_var_new = 1 - np.var(np.array(winner_batch) - new_v.flatten()) / np.var(np.array(winner_batch))
        #print( "kl:{:.5f}, lr_multiplier:{:.3f}, value loss:{}, policy loss:[], entropy:{}".format(
        #        kl, self.lr_multiplier, valloss, polloss, entropy, explained_var_old, explained_var_new))
        return valloss_mean, polloss_mean, entropy_mean

    def run(self):
        try:
            self.collect_selfplay_data(3)
            count = 0
            for i in range(self.game_batch_num):
                self.collect_selfplay_data(self.play_batch_size)    # collect_s
                print("batch i:{}, episode_len:{}".format(i + 1, self.episode_len))
                if len(self.data_buffer) > BATCH_SIZE:
                    valloss, polloss, entropy = self.policy_update()
                    print("VALUE LOSS: %0.3f " % valloss, "POLICY LOSS: %0.3f " % polloss, "ENTROPY: %0.3f" % entropy)

                    #writer.add_scalar("Val Loss/train", valloss.item(), i)
                    #writer.add_scalar("Policy Loss/train", polloss.item(), i)
                    #writer.add_scalar("Entory/train", entropy, i)

                if (i + 1) % self.check_freq == 0:
                    count += 1
                    print("current self-play batch: {}".format(i + 1))
                    # win_ratio = self.policy_evaluate()
                    # Add generation to filename
                    self.policy_value_net.save_model('model_7x7_' + str(count) + '_' + str("%0.3f_" % (valloss+polloss) + str(time.strftime('%Y-%m-%d', time.localtime(time.time())))))
        except KeyboardInterrupt:
            print('\n\rquit')
 def test__init__(self):
     """test la fonction __init
         Cas à tester:
             - Création d'une partie nouvelle
             - Création d'une partie existante
             - QuoridorError si 'joueur' n'est pas de longueur 2
             - QuoridorError si le nombre de murs plaçable est 0 > n >10
             - QuoridorError si la position d'un joueur est invalide
             - QuoridorError si l'argument 'mur' n'est pas un dictionnaire si présent
             - QuoridorError si le total des murs placés et plaçables n'est pas 20
             - QuoridorError si la position d'un mur est invalide
     """
     # Dresser des rootaux connus pour des constructions connues
     nouveau_jeu = ("légende: 1=foo 2=bar\n" +
                    "   -----------------------------------\n" +
                    "9 | .   .   .   .   2   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "8 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "7 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "6 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "5 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "4 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "3 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "2 | .   .   .   .   .   .   .   .   . |\n" +
                    "  |                                   |\n" +
                    "1 | .   .   .   .   1   .   .   .   . |\n" +
                    "--|-----------------------------------\n" +
                    "  | 1   2   3   4   5   6   7   8   9\n")
     partie_existante_rootau = (
         "légende: 1=foo 2=bar\n" +
         "   -----------------------------------\n" +
         "9 | .   .   .   .   .   .   .   .   . |\n" +
         "  |                                   |\n" +
         "8 | .   .   .   .   .   . | .   .   . |\n" +
         "  |        ------- -------|-------    |\n" +
         "7 | .   .   .   .   2   . | .   .   . |\n" +
         "  |                                   |\n" +
         "6 | . | .   .   .   1   . | .   .   . |\n" +
         "  |   |-------            |           |\n" +
         "5 | . | .   . | .   .   . | .   .   . |\n" +
         "  |           |                       |\n" +
         "4 | .   .   . | .   .   .   .   .   . |\n" +
         "  |            -------                |\n" +
         "3 | .   .   .   .   . | .   .   .   . |\n" +
         "  |                   |               |\n" +
         "2 | .   .   .   .   . | .   .   .   . |\n" +
         "  |                                   |\n" +
         "1 | .   .   .   .   .   .   .   .   . |\n" +
         "--|-----------------------------------\n" +
         "  | 1   2   3   4   5   6   7   8   9\n")
     partie_existante_etat = {
         "joueurs": [{
             "nom": "foo",
             "murs": 7,
             "pos": [5, 6]
         }, {
             "nom": "bar",
             "murs": 3,
             "pos": [5, 7]
         }],
         "murs": {
             "horizontaux": [[4, 4], [2, 6], [3, 8], [5, 8], [7, 8]],
             "verticaux": [[6, 2], [4, 4], [2, 5], [7, 5], [7, 7]]
         }
     }
     # Test de création d'une partie nouvelle
     self.assertEqual(str(Quoridor(["foo", "bar"])), nouveau_jeu)
     # Test de création d'une partie déjà existante
     self.assertEqual(
         str(
             Quoridor(partie_existante_etat['joueurs'],
                      partie_existante_etat['murs'])),
         partie_existante_rootau)
     # Test de l'erreur soulevée si l'argument 'joueur' n'est pas itérable
     self.assertRaisesRegex(QuoridorError, "joueurs n'est pas iterable!",
                            Quoridor, 2)
     # Test de l'erreur soulevée si l'argument 'joueur' n'est pas de longueur 2
     self.assertRaisesRegex(QuoridorError,
                            "Il n'y a pas exactement 2 joueurs!", Quoridor,
                            ["joueur1"])
     self.assertRaisesRegex(QuoridorError,
                            "Il n'y a pas exactement 2 joueurs!", Quoridor,
                            ["joueur1", "joueur2", "joueur3"])
     # Test de l'erreur soulevée si le nombre de murs qu'un joueur peut
     # placer est > 10 ou négatif
     self.assertRaisesRegex(QuoridorError, "mauvais nombre de murs!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 11,
                                "pos": (5, 6)
                            }, {
                                "nom": "bar",
                                "murs": 10,
                                "pos": (5, 7)
                            }])
     self.assertRaisesRegex(QuoridorError, "mauvais nombre de murs!",
                            Quoridor, [{
                                "nom": "joueur1",
                                "murs": 10,
                                "pos": (5, 6)
                            }, {
                                "nom": "joueur2",
                                "murs": -1,
                                "pos": (5, 7)
                            }])
     # Test de l'erreur soulevée si la position d'un joueur est invalide
     self.assertRaisesRegex(QuoridorError, "position du joueur invalide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 10,
                                "pos": (5, 10)
                            }, {
                                "nom": "bar",
                                "murs": 10,
                                "pos": (5, 5)
                            }])
     self.assertRaisesRegex(QuoridorError, "position du joueur invalide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 10,
                                "pos": (5, 10)
                            }, {
                                "nom": "bar",
                                "murs": 10,
                                "pos": (5, 5)
                            }])
     # Test de l'erreur soulevée si l'argument "mur" n'est pas un dictionnaire lorsque présent
     self.assertRaisesRegex(QuoridorError,
                            "murs n'est pas un dictionnaire!", Quoridor,
                            ["joueur1", "joueur2"], [(5, 5)])
     # Test de l'erreur soulevée si le total des murs placés et plaçables n'est pas égal à 20
     self.assertRaisesRegex(QuoridorError,
                            "mauvaise quantité totale de murs!", Quoridor,
                            [{
                                "nom": "foo",
                                "murs": 5,
                                "pos": (5, 6)
                            }, {
                                "nom": "bar",
                                "murs": 10,
                                "pos": (5, 7)
                            }])
     self.assertRaisesRegex(
         QuoridorError, "mauvaise quantité totale de murs!", Quoridor,
         [{
             "nom": "foo",
             "murs": 8,
             "pos": (5, 6)
         }, {
             "nom": "bar",
             "murs": 3,
             "pos": (5, 7)
         }], {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         })
     # Test de l'erreur soulevée si la position d'un mur est invalide
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(0, 5)],
                                "verticaux": [(5, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(9, 5)],
                                "verticaux": [(5, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 1)],
                                "verticaux": [(5, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 10)],
                                "verticaux": [(5, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 5)],
                                "verticaux": [(1, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 5)],
                                "verticaux": [(10, 5)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 5)],
                                "verticaux": [(5, 0)]
                            })
     self.assertRaisesRegex(QuoridorError, "position du mur non-valide!",
                            Quoridor, [{
                                "nom": "foo",
                                "murs": 9,
                                "pos": (3, 3)
                            }, {
                                "nom": "bar",
                                "murs": 9,
                                "pos": (7, 7)
                            }], {
                                "horizontaux": [(5, 5)],
                                "verticaux": [(5, 9)]
                            })
 def test_déplacer_jeton(self):
     """ Test de la fonction déplacer_jeton
         Cas à tester:
             - déplacement des deux joueurs fonctionne bien
             - QuoridorError si le joueur indiqué est invalide
             - QuoridorError si la position est hors des limites du jeu
             - QuoridorError si la position n'est pas accessible au joueur
     """
     etat_partie = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 6)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (5, 7)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         }
     }
     etat_partie2 = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 6)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (5, 5)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         }
     }
     etat_partie3 = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 6)
         }, {
             "nom": "joueur2",
             "murs": 3,
             "pos": (6, 5)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (3, 8), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7)]
         }
     }
     nouveaujeu = Quoridor(["joueur1", "joueur2"])
     # Tester l'erreur soulevée si le joueur indiqué est invalide
     self.assertRaisesRegex(QuoridorError, "joueur invalide!",
                            nouveaujeu.déplacer_jeton, 5, (5, 2))
     # Tester l'erreur soulevée si la position demandée est hors des limites du jeu
     self.assertRaisesRegex(QuoridorError, "position invalide!",
                            nouveaujeu.déplacer_jeton, 1, (0, 5))
     self.assertRaisesRegex(QuoridorError, "position invalide!",
                            nouveaujeu.déplacer_jeton, 1, (10, 5))
     self.assertRaisesRegex(QuoridorError, "position invalide!",
                            nouveaujeu.déplacer_jeton, 1, (5, 0))
     self.assertRaisesRegex(QuoridorError, "position invalide!",
                            nouveaujeu.déplacer_jeton, 1, (5, 10))
     # Tester l'erreur soulevée si la position demandée n'est pas accessible au joueur
     jeu = Quoridor(etat_partie['joueurs'], etat_partie['murs'])
     self.assertRaisesRegex(QuoridorError, "mouvement invalide!",
                            jeu.déplacer_jeton, 2, (5, 8))
     self.assertRaisesRegex(QuoridorError, "mouvement invalide!",
                            jeu.déplacer_jeton, 1, (5, 8))
     self.assertRaisesRegex(QuoridorError, "mouvement invalide!",
                            jeu.déplacer_jeton, 2, (3, 7))
     self.assertRaisesRegex(QuoridorError, "mouvement invalide!",
                            jeu.déplacer_jeton, 2, (4, 6))
     self.assertRaisesRegex(QuoridorError, "mouvement invalide!",
                            jeu.déplacer_jeton, 2, (6, 6))
     # Tester des déplacements qui fonctionnent
     jeu.déplacer_jeton(2, (5, 5))
     self.assertEqual(jeu.état_partie(), etat_partie2)
     jeu.déplacer_jeton(2, (6, 5))
     self.assertEqual(jeu.état_partie(), etat_partie3)
示例#27
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--player_type",
        type=int,
        default=1,
        help="palyer type you want to fight,1 is human,2 is computer")
    parser.add_argument("--computer_type",
                        type=int,
                        default=0,
                        help="computer type,1 is Alpha MCTS,2 is pure MCTS")
    args = parser.parse_args()

    game = Quoridor()
    human1 = ManualPygameAgent('Kurumi')
    human2 = ManualPygameAgent('Cryer')
    MCTS_Alpha = A_Player(PolicyValueNet().policy_value_fn,
                          c_puct=5,
                          n_playout=30,
                          is_selfplay=0)
    MCTS_Pure = B_Player(c_puct=5, n_playout=50)  # 50层400秒

    if args.player_type == 1:
        player_types = {1: 'human', 2: 'human'}
        players = {1: human1, 2: human2}
        if args.computer_type == 0:
            pass
    elif args.player_type == 2:
        player_types = {1: 'human', 2: 'computer'}
        if args.computer_type == 1:
            players = {1: human1, 2: MCTS_Alpha}
        elif args.computer_type == 2:
            players = {1: human1, 2: MCTS_Pure}
        elif args.computer_type == 0:
            print("Set computer type to 1 or 2 for choosing computer!")
            # pygame.quit()

    # game.load(player1, player2)

    pygame.init()

    WINDOW_SIZE = [SCREEN_WIDTH, SCREEN_HEIGHT]
    screen = pygame.display.set_mode(WINDOW_SIZE)

    pygame.display.set_caption("QUORIDOR")

    clock = pygame.time.Clock()

    # valid_actions = game.valid_actions  11
    valid_actions = game.actions()
    done = False
    winner = None
    t1 = time.time()
    while not done:
        player_moved = False

        # 定义落子历史
        # move_history = []

        pawn_moves, walls = draw_game(game, screen, valid_actions)

        # text(screen, "player1 move:", position1=2, position2=0.8, color=BLUE)

        valid_walls = [wall for wall in walls if wall[2] in valid_actions]
        if player_types[game.current_player] == 'human':
            touch = pygame.mouse.get_pos()
            for wall, collides, _ in valid_walls:
                for collide in collides:
                    if collide.collidepoint(touch):
                        pygame.draw.rect(screen, LIGHTBROWN, wall)
                        break
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    done = True
                elif event.type == pygame.MOUSEBUTTONDOWN:
                    touch = pygame.mouse.get_pos()
                    # This is messy - fix later
                    for rect, action in pawn_moves:
                        if rect.collidepoint(touch):
                            players[game.current_player].receive_action(action)
                            player_moved = True
                            break
                        if player_moved:
                            break
                    # if player_moved: break
                    # 添加
                    if player_moved:
                        real_action = players[
                            game.current_player].choose_action()
                        # move_history.append(real_action)
                        done, winner = game.step(real_action)
                        render(game, screen)  # 渲染游戏
                        break

                    for rect, collide_points, action in valid_walls:
                        for collides in collide_points:
                            if collides.collidepoint(touch):
                                players[game.current_player].receive_action(
                                    action)
                                player_moved = True
                                break
                        # 修改
                        if player_moved == True:
                            real_action = players[
                                game.current_player].choose_action()
                            # move_history.append(real_action)
                            done, winner = game.step(real_action)
                            render(game, screen)  # 渲染游戏
                            break

        clock.tick(30)
        pygame.display.flip()

        valid_actions = game.actions()

        # 待改
        if player_types[game.current_player] == 'computer':
            print("computer %s thinking..." % str(game.current_player))
            tic = time.time()
            # real_action = np.random.choice(valid_actions)
            real_action = players[game.current_player].choose_action(game)
            # move_history.append(real_action)
            toc = time.time()
            print("MCTS choose action:", real_action,
                  "  ,spend %s seconds" % str(toc - tic))
            done, winner = game.step(real_action)
            # render(game, screen)
            # valid_actions = game.valid_actions
        # if game.current_player == 1:
        #     text(screen, text, position1=2, position2=0.8, color=BLUE)

        if done:
            print("game over! winner is %s player:%s" %
                  (player_types[winner], winner))
            break

    t2 = time.time()
    print("total time :", t2 - t1)
    pygame.quit()
示例#28
0
from quoridor import Quoridor

partie = Quoridor(["rod", "jimbo"])
print(partie)

print(partie.placer_mur(1, [1, 8], "horizontal"))
print(partie)

try:
    partie.placer_mur(1, [2, 8], "horizontal")
except:
    print("aaaaaaaaaaaaaaaaaaaaa")
print(partie)

partie.placer_mur(1, [3, 8], "horizontal")
print(partie)

partie.placer_mur(1, [3, 4], "vertical")
print(partie)

try:
    partie.placer_mur(1, [3, 5], "vertical")
except:
    print(".awemc3eic43icm3ocmn3rkco ")

partie.placer_mur(1, [3, 6], "vertical")
partie.placer_mur(1, (2, 1), "qmce")

print(partie)

partie.placer_mur_auto(1, partie.etat["joueurs"][1]["pos"], "horizontal")
                 q = QuoridorX(DAMIER['joueurs'], DAMIER['murs'])
             except RuntimeError as err:
                 print(err)
                 CHOIX = input(
                     "Voulez-vous continuer à jouer, oui ou non? ")
                 if CHOIX.lower() == 'non':
                     break
             except StopIteration as err:
                 q.window.clearscreen()
                 q = QuoridorX(DAMIER['joueurs'], DAMIER['murs'])
                 print(f'Le grand gagnant est le joueur {err} !\n')
                 break
 elif ARGS.automatique:
     #automatique et ascii
     print('automatique et ascii')
     q = Quoridor(PARTIE[1]['joueurs'], PARTIE[1]['murs'])
     print(q)
     while True:
         try:
             TYPE_COUP, POSITION = q.jouer_coup(1)
             DAMIER = jouer_coup(ID_PARTIE, TYPE_COUP, POSITION)
             q = Quoridor(DAMIER['joueurs'], DAMIER['murs'])
             print(q)
         except RuntimeError as err:
             print(err)
             CHOIX = input("Voulez-vous continuer à jouer, oui ou non? ")
             if CHOIX.lower() == 'non':
                 break
         except StopIteration as err:
             print(q)
             print(f'Le grand gagnant est le joueur {err} !\n')
 def test_placer_mur(self):
     """ Test de la fonction placer_mur
         Cas à tester:
             - Les murs horizontaux et verticaux sont placés correctement
             - QuoridorError si le numéro du joueur n'est pas bon
             - QuoridorError si un mur occupe déjà la position
             - QuoridorError si la position est invalide pour l'horientation
             - QuoridorError si le joueur a déjà placé tous ses murs
     """
     jeu1_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 9,
             "pos": (5, 1)
         }, {
             "nom": "joueur2",
             "murs": 9,
             "pos": (5, 9)
         }],
         "murs": {
             "horizontaux": [(4, 4)],
             "verticaux": [(6, 6)]
         }
     }
     jeu2_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 8,
             "pos": (5, 1)
         }, {
             "nom": "joueur2",
             "murs": 8,
             "pos": (5, 9)
         }],
         "murs": {
             "horizontaux": [(4, 4), (5, 5)],
             "verticaux": [(6, 6), (7, 7)]
         }
     }
     jeu3_etat = {
         "joueurs": [{
             "nom": "joueur1",
             "murs": 7,
             "pos": (5, 3)
         }, {
             "nom": "joueur2",
             "murs": 0,
             "pos": (3, 5)
         }],
         "murs": {
             "horizontaux": [(4, 4), (2, 6), (4, 2), (5, 8), (7, 8)],
             "verticaux": [(6, 2), (4, 4), (2, 5), (7, 5), (7, 7), (2, 2),
                           (2, 3), (2, 4)]
         }
     }
     jeu1 = Quoridor(jeu1_etat['joueurs'], jeu1_etat['murs'])
     # Tester si le mur est bien placé avec les 2 joueurs
     jeu1.placer_mur(1, (5, 5), 'horizontal')
     jeu1.placer_mur(2, (7, 7), 'vertical')
     self.assertEqual(jeu1.état_partie(), jeu2_etat)
     # Tester l'erreur si le numéro du joueur n'est pas bon
     self.assertRaisesRegex(QuoridorError, "joueur invalide!",
                            jeu1.placer_mur, 5, (2, 2), 'horizontal')
     # Tester l'erreur si le joueur ne peut plus placer de murs
     jeu3 = Quoridor(jeu3_etat['joueurs'], jeu3_etat['murs'])
     self.assertRaisesRegex(QuoridorError,
                            "le joueur ne peut plus placer de murs!",
                            jeu3.placer_mur, 2, (2, 2), 'horizontal')
     # Tester l'erreur si l'emplacement est déjà occupé pour un mur horizontal -->
     # position exacte
     self.assertRaisesRegex(QuoridorError, "Il y a déjà un mur!",
                            jeu3.placer_mur, 1, (4, 4), 'horizontal')
     # Position décallée
     self.assertRaisesRegex(QuoridorError, "Il y a déjà un mur!",
                            jeu3.placer_mur, 1, (5, 4), 'horizontal')
     # Tester l'erreur si l'emplacement est déjà occupé pour un mur vertical --> position exacte
     self.assertRaisesRegex(QuoridorError, "Il y a déjà un mur!",
                            jeu3.placer_mur, 1, (4, 4), 'vertical')
     # Position décallée
     self.assertRaisesRegex(QuoridorError, "Il y a déjà un mur!",
                            jeu3.placer_mur, 1, (4, 5), 'vertical')
     # Tester l'erreur si l'orientation n'est pas valide
     self.assertRaisesRegex(QuoridorError, "orientation invalide!",
                            jeu3.placer_mur, 1, (4, 5), 'diagonale')
     # Tester l'erreur si la position est hors des limites du jeu pour un mur horizontal
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (0, 5), 'horizontal')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (9, 5), 'horizontal')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (5, 1), 'horizontal')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (5, 10), 'horizontal')
     # Tester l'erreur si la position est hors des limites du jeu pour un mur vertical
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (1, 5), 'vertical')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (10, 5), 'vertical')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (5, 0), 'vertical')
     self.assertRaisesRegex(QuoridorError, "position du mur invalide!",
                            jeu1.placer_mur, 1, (5, 9), 'vertical')
     # tester l'erreur si le coup enfermerait le joueur
     self.assertRaisesRegex(nx.exception.NetworkXError, "", jeu3.placer_mur,
                            1, (3, 3), 'horizontal')
     self.assertRaisesRegex(nx.exception.NetworkXError, "", jeu3.placer_mur,
                            1, (4, 2), 'vertical')