Пример #1
0
 def add_data_to_move_buffer_with_8_symmetries(self, own, enemy, policy):
     for flip in [False, True]:
         for rot_right in range(4):
             own_saved, enemy_saved, policy_saved = own, enemy, policy.reshape((8, 8))
             if flip:
                 own_saved = flip_vertical(own_saved)
                 enemy_saved = flip_vertical(enemy_saved)
                 policy_saved = np.flipud(policy_saved)
             if rot_right:
                 for _ in range(rot_right):
                     own_saved = rotate90(own_saved)
                     enemy_saved = rotate90(enemy_saved)
                 policy_saved = np.rot90(policy_saved, k=-rot_right)
             self.moves.append([(own_saved, enemy_saved), list(policy_saved.reshape((64, )))])
Пример #2
0
    async def expand_and_evaluate(self, env):
        """expand new leaf

        update var_p, return leaf_v

        :param ReversiEnv env:
        :return: leaf_v
        """

        key = self.counter_key(env)
        another_side_key = self.another_side_counter_key(env)
        self.now_expanding.add(key)

        black, white = env.board.black, env.board.white

        # (di(p), v) = fθ(di(sL))
        # rotation and flip. flip -> rot.
        is_flip_vertical = random() < 0.5
        rotate_right_num = int(random() * 4)
        if is_flip_vertical:
            black, white = flip_vertical(black), flip_vertical(white)
        for i in range(rotate_right_num):
            black, white = rotate90(black), rotate90(
                white)  # rotate90: rotate bitboard RIGHT 1 time

        black_ary = bit_to_array(black, 64).reshape((8, 8))
        white_ary = bit_to_array(white, 64).reshape((8, 8))
        state = [
            black_ary, white_ary
        ] if env.next_player == Player.black else [white_ary, black_ary]
        future = await self.predict(np.array(state))  # type: Future
        await future
        leaf_p, leaf_v = future.result()

        # reverse rotate and flip about leaf_p
        if rotate_right_num > 0 or is_flip_vertical:  # reverse rotation and flip. rot -> flip.
            leaf_p = leaf_p.reshape((8, 8))
            if rotate_right_num > 0:
                leaf_p = np.rot90(
                    leaf_p,
                    k=rotate_right_num)  # rot90: rotate matrix LEFT k times
            if is_flip_vertical:
                leaf_p = np.flipud(leaf_p)
            leaf_p = leaf_p.reshape((64, ))

        self.var_p[key] = leaf_p  # P is value for next_player (black or white)
        self.var_p[another_side_key] = leaf_p
        self.expanded.add(key)
        self.now_expanding.remove(key)
        return float(leaf_v)