Exemplo n.º 1
0
class Play(object):
    def __init__(self):
        # global bp, wp, board, black_prompt, white_prompt, features, labels, reversi_model   # use occupied to update board history
        # json style to input

        black_prompt = '{"requests":[{"x":-1,"y":-1}],"responses":[]}'
        white_prompt = '{"requests":[],"responses":[]}'
        self.bp = json.loads(black_prompt)
        self.wp = json.loads(white_prompt)
        self.board = Board()

    def play(self, turn):
        """the black side first to place a disc, add output to black_prompt responses and to white_prompt requests, vice versa"""
        # global black_prompt, white_prompt, res, pi, board
        if turn % 2 == 0:
            prompt, requests_add, responses_add, color_to_play = self.bp, self.bp, self.wp, BLACK
            print("pure")
            res = pure_MCTS.UCTAlg(json=prompt).run(time_limit=1)
        else:
            prompt, requests_add, responses_add, color_to_play = self.wp, self.wp, self.bp, WHITE
            print("alpha")
            res = mcts.uctAlg.UCTAlg(predict_model=player,
                                     json=prompt,
                                     mode='comp').run(time_limit=1)[0]
        print(res)
        self.board.disc_place(color_to_play, res[0],
                              res[1])  # record steps to  board

        dct = {'x': res[0], 'y': res[1]}
        requests_add["responses"].append(dct)
        responses_add["requests"].append(dct)
Exemplo n.º 2
0
class Play(object):
    def __init__(self):
        # global bp, wp, board, black_prompt, white_prompt, features, labels, reversi_model   # use occupied to update board history
        # json style to input

        black_prompt = '{"requests":[{"x":-1,"y":-1}],"responses":[]}'
        white_prompt = '{"requests":[],"responses":[]}'
        self.bp = json.loads(black_prompt)
        self.wp = json.loads(white_prompt)
        self.board = Board()
        self.labels = np.reshape(
            [], (-1, BOARD_SIZE**2))  # one turn has (8,8,3) -> 64 * 3
        self.features = np.reshape(
            [], (-1, BOARD_SIZE**2 * channel))  # one turn has 64 + 1
        # all_objects = muppy.get_objects()
        # sum1 = summary.summarize(all_objects)
        # summary.print_(sum1)

    def record_steps(self, color_to_play):
        """record board state in selfplay.py, which maintains a Board instance"""
        self.calc_features(
            color_to_play)  # produce input data before change state

    def record_possibilities(self, pi):
        self.labels = np.vstack((self.labels, pi))
        if DEBUG:
            print('the labels...', self.labels.shape)

    def add_winner_and_writeIO(self, winner, turn):
        """1 if winner is current player which can be calculated by turn else 0 for tie or -1 for lost"""

        # transpose and rotate
        self.transform_history_matrix()

        winner_stack = []
        # turn = labels.shape[0]
        # winner = board.judge()
        for i in range(turn):
            current_player = BLACK if i % 2 == 0 else WHITE
            player_reward = 1 if current_player == winner else (
                0 if winner == TIE else -1)
            winner_stack.append(player_reward)
        winner_stack = np.asarray(winner_stack).reshape((turn, 1))
        x = winner_stack
        for i in range(7):  # because of having added the transformed matrix
            winner_stack = np.vstack((winner_stack, x))

        print(self.labels.shape, winner_stack.shape)
        self.labels = np.hstack((self.labels, winner_stack))
        if DEBUG:
            print('label shape...', self.labels.shape)
            print('final labels...', self.labels)

        data_lock.acquire()
        try:
            with open(label_path, "a+") as f:
                np.savetxt(f, self.labels, fmt='%f')
            with open(features_path, "a+") as f:
                np.savetxt(f, self.features,
                           fmt='%i')  # (tup) can save the array row wise
        except Exception as e:
            print(e)
        finally:
            data_lock.release()

    def play(self, player1, player2, turn, p1isp2):
        """the black side first to place a disc, add output to black_prompt responses and to white_prompt requests, vice versa"""
        # global black_prompt, white_prompt, res, pi, board
        if turn % 2 == 0:
            prompt, requests_add, responses_add, color_to_play = self.bp, self.bp, self.wp, BLACK
            current2play = player1
        else:
            prompt, requests_add, responses_add, color_to_play = self.wp, self.wp, self.bp, WHITE
            current2play = player2

        if p1isp2:  # self play, use stochastic policy
            results = UCTAlg(predict_model=current2play,
                             json=prompt,
                             mode='stoch').run(time_limit=selfplay_timelimit)
        else:  # eval, use deterministic policy
            results = UCTAlg(predict_model=current2play,
                             json=prompt,
                             mode='comp').run(time_limit=eval_timelimit)

        res, pi = results[0], results[1]  # (3, 2)

        if p1isp2:
            self.record_steps(color_to_play)
            self.record_possibilities(pi)

        self.board.disc_place(color_to_play, res[0],
                              res[1])  # record steps to  board

        dct = {'x': res[0], 'y': res[1]}
        requests_add["responses"].append(dct)
        responses_add["requests"].append(dct)
        if DEBUG:
            print('The subprocess responses ...', res)
            print('the possibility...', pi)
            print('%s round has played' %
                  ('black' if color_to_play == BLACK else 'white'))

    def calc_features(self, color_to_play):
        one_piece = [0 for _ in range(channel)]
        one_feature = []
        bd = self.board.board
        for j in range(BOARD_SIZE):
            for i in range(BOARD_SIZE):
                e = bd[i][j]
                if e == color_to_play:
                    one_piece[0], one_piece[1] = 1, 0
                elif e == EMPTY:
                    one_piece[0], one_piece[1] = 0, 0
                else:
                    one_piece[0], one_piece[1] = 0, 1
                one_piece[-1] = 1 if color_to_play == BLACK else 0
                one_feature = np.hstack((one_feature, one_piece))
                # print(one_feature)
        self.features = np.vstack((self.features, one_feature))
        # print(self.features.shape)
        # print(data.shape)

    def transform_history_matrix(self):
        """a board state can be transformed by the rotation and transposition to accelerate the data-generation"""

        # features
        origin = self.features.reshape(-1, BOARD_SIZE, BOARD_SIZE, channel)
        tsp = origin.transpose((0, 2, 1, 3))
        #print(tsp.shape, self.features.shape)
        sp = self.features.shape
        self.features = np.vstack((self.features, tsp.reshape(sp)))
        #print(self.features.shape)
        for i in range(3):
            origin = np.rot90(origin, axes=(1, 2))
            tsp = np.rot90(tsp, axes=(1, 2))
            self.features = np.vstack((self.features, origin.reshape(sp)))
            self.features = np.vstack((self.features, tsp.reshape(sp)))

        # labels
        origin = self.labels.reshape(-1, BOARD_SIZE, BOARD_SIZE)
        sp = self.labels.shape
        tsp = origin.transpose((0, 2, 1))
        self.labels = np.vstack((self.labels, tsp.reshape(sp)))
        for i in range(3):
            origin = np.rot90(origin, axes=(1, 2))
            tsp = np.rot90(tsp, axes=(1, 2))
            self.labels = np.vstack((self.labels, origin.reshape(sp)))
            self.labels = np.vstack((self.labels, tsp.reshape(sp)))