示例#1
0
    def evaluate_state(self, env: MancalaEnv) -> (float, float):
        flip_board = env.side_to_move == Side.NORTH
        state = env.board.get_board_image(flipped=flip_board)
        mask = env.get_action_mask_with_no_pie()
        dist, _, value = self.network.evaluate_move(state=state, mask=mask)

        return dist, float(value)
示例#2
0
 def sample_state(self, env: MancalaEnv) -> (int, float):
     flip_board = env.side_to_move == Side.NORTH
     state = env.board.get_board_image(flipped=flip_board)
     mask = env.get_action_mask_with_no_pie()
     return self.network.sample(state=state, mask=mask)