示例#1
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    for state_fen, policy, value in data:

        state_planes = canon_input_planes(state_fen)

        if is_black_turn(state_fen):
            policy = Config.flip_policy(policy)

        move_number = int(state_fen.split(' ')[5])
        value_certainty = min(
            5, move_number
        ) / 5  # reduces the noise of the opening... plz train faster
        sl_value = value * value_certainty + testeval(
            state_fen, False) * (1 - value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), np.asarray(
        policy_list, dtype=np.float32), np.asarray(value_list,
                                                   dtype=np.float32)
    def action(self, env: GoBangEnv, can_stop=True) -> (str):
        """
        Figures out the next best move
        within the specified environment and returns a string describing the action to take.

        :param GoBangEnv env: environment in which to figure out the action
        :param boolean can_stop: whether we are allowed to take no action (return None)
        :return: None if no action should be taken (indicating a resign). Otherwise, returns a string
            indicating the action to take in uci format
        """
        self.reset()

        # for tl in range(self.play_config.thinking_loop):
        root_value, naked_value = self.search_moves(env)
        policy = self.calc_policy(env)
        my_action = int(
            np.random.choice(range(self.labels_n),
                             p=self.apply_temperature(policy,
                                                      env.num_halfmoves)))

        # if can_stop and self.play_config.resign_threshold is not None and \
        #                 root_value <= self.play_config.resign_threshold \
        #                 and env.num_halfmoves > self.play_config.min_resign_turn:
        #     # noinspection PyTypeChecker
        #     return None
        # else:
        self.moves.append(
            [get_state_by_input_planes(env.observation),
             list(policy)])  # ??list??????json.dump()????
        # ?????????
        move = [env.observation, list(policy)]
        for _state, _policy in Config.flip_moves(move):
            self.moves.append(
                [get_state_by_input_planes(np.array(_state)), _policy])
        return self.config.labels[my_action]
示例#3
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    env = ChessEnv().reset()
    for state_fen, policy, value in data:
        move_number = int(state_fen.split(' ')[5])
        # f2 = maybe_flip_fen(maybe_flip_fen(state_fen,True),True)
        # assert state_fen == f2
        next_move = env.deltamove(state_fen)
        if next_move == None:  # new game!
            assert state_fen == chess.STARTING_FEN
            env.reset()
        else:
            env.step(next_move, False)

        state_planes = env.canonical_input_planes()
        # assert env.check_current_planes(state_planes)

        side_to_move = state_fen.split(" ")[1]
        if side_to_move == 'b':
            #assert np.sum(policy) == 0
            policy = Config.flip_policy(policy)
        else:
            #assert abs(np.sum(policy) - 1) < 1e-8
            pass

        # if np.sum(policy) != 0:
        #     policy /= np.sum(policy)

        #assert abs(np.sum(policy) - 1) < 1e-8

        assert len(policy) == 1968
        assert state_planes.dtype == np.float32

        value_certainty = min(
            15, move_number
        ) / 15  # reduces the noise of the opening... plz train faster
        SL_value = value * value_certainty + env.testeval() * (1 -
                                                               value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(SL_value)

    return np.array(state_list, dtype=np.float32), np.array(
        policy_list, dtype=np.float32), np.array(value_list, dtype=np.float32)
示例#4
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
        this is called with state locked
        insert P(a|s), return leaf_v
        """
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if not env.white_to_move:
            leaf_p = Config.flip_policy(
                leaf_p)  # get it back to python-chess form

        return leaf_p, leaf_v
    def calc_policy(self, env):
        """calc π(a|s0)
        :return:
        """
        state = state_key(env)
        my_visitstats = self.tree[state]
        policy = np.zeros(self.labels_n)
        for action, a_s in my_visitstats.a.items():
            policy[self.move_lookup[action]] = a_s.n

        policy /= np.sum(policy)

        if not env.white_to_move:
            policy = Config.flip_policy(policy)
        return policy
    def __init__(self):
        # Tensorflow session
        import tensorflow as tf
        log.debug("Initializing Tensorflow session...")
        tf_session_config = tf.ConfigProto()
        tf_session_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=tf_session_config)

        from chess_zero.config import Config, PlayWithHumanConfig
        from chess_zero.env.chess_env import ChessEnv

        self.chess_env_class = ChessEnv

        default_config = Config()
        PlayWithHumanConfig().update_play_config(default_config.play)
        self.alpha_player = self.get_player_from_model(default_config)
示例#7
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
		this is called with state locked
		insert P(a|s), return leaf_v
		"""
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if env.board.turn == chess.BLACK:
            leaf_p = Config.flip_policy(
                leaf_p)  # get it back to python-chess form
        #np.testing.assert_array_equal(Config.flip_policy(Config.flip_policy(leaf_p)), leaf_p)

        return leaf_p, leaf_v
示例#8
0
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """ expand new leaf, this is called only once per state
        this is called with state locked
        insert P(a|s), return leaf_v

        This gets a prediction for the policy and value of the state within the given env
        :return (float, float): the policy and value predictions for this state
        """
        state_planes = env.canonical_input_planes()

        leaf_p, leaf_v = self.predict(state_planes)
        # these are canonical policy and value (i.e. side to move is "white")

        if not env.white_to_move:
            leaf_p = Config.flip_policy(leaf_p) # get it back to python-chess form

        return leaf_p, leaf_v
    def expand_and_evaluate(self, env) -> (np.ndarray, float):
        """expand new leaf

        this is called with state locked
        insert P(a|s), return leaf_v

        :param ChessEnv env:
        :return: leaf_v
        """
        if self.play_config.tablebase_access and env.board.num_pieces() <= 5:
            return self.tablebase_and_evaluate(env)

        state = env.board.gather_features(self.config.model.t_history)
        leaf_p, leaf_v = self.predict(state)

        if env.board.turn == chess.BLACK:
            leaf_p = Config.flip_policy(leaf_p)

        return leaf_p, leaf_v
示例#10
0
def load_data_from_file(filename, t_history):
    # necessary to catch an exception here...? if the play data file isn't completely written yet, then some error will be thrown about a "missing delimiter", etc.
    data = read_game_data_from_file(filename)

    state_list = []
    policy_list = []
    value_list = []

    board = MyBoard(None)
    board.fullmove_number = 1000  # an arbitrary large value.

    for state, policy, value in data:
        board.push_fen(state)
        state = board.gather_features(t_history)
        if board.turn == chess.BLACK:
            policy = Config.flip_policy(policy)

        state_list.append(state)
        policy_list.append(policy)
        value_list.append(value)

    return state_list, policy_list, value_list
示例#11
0
def convert_to_cheating_data(data):
    """
    :param data: format is SelfPlayWorker.buffer
    :return:
    """
    state_list = []
    policy_list = []
    value_list = []
    for state_fen, policy, value in data:

        state_planes = canon_input_planes(state_fen)

        if is_black_turn(state_fen):
            policy = Config.flip_policy(policy)

        move_number = int(state_fen.split(' ')[5])
        value_certainty = min(5, move_number)/5 # reduces the noise of the opening... plz train faster
        sl_value = value*value_certainty + testeval(state_fen, False)*(1-value_certainty)

        state_list.append(state_planes)
        policy_list.append(policy)
        value_list.append(sl_value)

    return np.asarray(state_list, dtype=np.float32), np.asarray(policy_list, dtype=np.float32), np.asarray(value_list, dtype=np.float32)
def start(config=Config(config_type='mini')):
    return ManEvaluateWorker(config).start()
示例#13
0
 def create(self):
     # Initial Alpha Zero setup
     default_config = Config()
     PlayWithHumanConfig().update_play_config(default_config.play)
     return self.get_player_from_model(default_config)
示例#14
0
                                    #在棋盘相应位置落相应颜色棋子
                                    plot_chess(i + 1, j + 1, screen, no)
                                    action = f'{i}_{j}_{no}'
                                    print(action)
                                    pygame.display.flip()
                                    env.step(action)
        clock.tick(60)

    if env.white_won:
        put_text('白棋胜利,请重新游戏', screen, 30)
    else:
        put_text('黑棋胜利,请重新游戏', screen, 30)
    sleep(10)


if __name__ == "__main__":
    try:
        pygame.init()
        pygame.mixer.init()
        import chess_zero.lib.tf_util as tu
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        tu.set_session_config(allow_growth=True)
        PvEWorker(Config(config_type='mini')).start()
    except SystemExit:
        pass
    except:
        traceback.print_exc()
        pygame.quit()
        input()
from chess_zero.lib.logger import setup_logger
from chess_zero.config import Config
from chess_zero.manager import create_parser, setup, logger
from chess_zero.worker import sl
import os
import sys
import multiprocessing as mp

_PATH_ = os.path.dirname(os.path.dirname(__file__))

if _PATH_ not in sys.path:
    sys.path.append(_PATH_)

mp.set_start_method('spawn')
sys.setrecursionlimit(10000)

parser = create_parser()
args = parser.parse_args()
config_type = args.type

if args.cmd == 'uci':
    disable(999999)  # plz don't interfere with uci

config = Config(config_type=config_type)
setup(config, args)

logger.info(f"config type: {config_type}")

sl.start(config)

# The gRPC serve function.
#
# Params:
# max_workers: pool of threads to execute calls asynchronously
# port: gRPC server port
#
# Add all your classes to the server here.
# (from generated .py files by protobuf compiler)
def serve(max_workers=10, port=7777):
	server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers))
	grpc_bt_grpc.add_AlphaZeroServicer_to_server(AlphaZeroServicer(), server)
	server.add_insecure_port("[::]:{}".format(port))
	return server


if __name__ == "__main__":
	"""
	Runs the gRPC server to communicate with the Snet Daemon.
	"""

	# Initial Alpha Zero setup
	default_config = Config()
	PlayWithHumanConfig().update_play_config(default_config.play)
	ALPHA_ZERO_PLAYER = get_player_from_model(default_config)

	parser = service.common.common_parser(__file__)
	args = parser.parse_args(sys.argv[1:])
	service.common.main_loop(serve, args)