Exemplo n.º 1
0
def main():
    # Use alphazero self-play for data generation
    agents_meta = parse_schedule() 

    # worker variable of main process
    board = Board()
    sigexit = Event()
    sigexit.set()  # pre-set signal so main proc generator will iterate only once

    # subprocess data generator
    helper = DataHelper(data_files=[])
    helper.set_agents_meta(agents_meta=agents_meta)     
    generator = helper.generate_batch(TRAINING_CONFIG["batch_size"])

    # start generating
    with h5py.File(f"{DATA_CONFIG['data_path']}/latest.train.hdf5", 'a') as hf:    
        for state_batch, value_batch, probs_batch in generator:
            for batch_name in ("state_batch", "value_batch", "probs_batch"):
                if batch_name not in hf:
                    shape = locals()[batch_name].shape
                    hf.create_dataset(batch_name, (0, *shape), maxshape=(None, *shape))
                hf[batch_name].resize(hf[batch_name].shape[0] + 1, axis=0)
                hf[batch_name][-1] = locals()[batch_name]

            # prevent main proc from generating data too quick
            # since sigexit has been set, proc will iterate only once
            run_proc(helper.buffer, helper.buffer_size, helper.lock,
                     sigexit, agents_meta, board) 
            board.reset()
Exemplo n.º 2
0
def main():
    # Use alphazero self-play for data generation
    agents_meta = parse_schedule()

    # worker variable of main process
    board = Board()
    sigexit = Event()
    sigexit.set(
    )  # pre-set signal so main proc generator will iterate only once

    # subprocess data generator
    helper = DataHelper(data_files=[])
    helper.set_agents_meta(agents_meta=agents_meta)
    generator = helper.generate_batch(TRAINING_CONFIG["batch_size"])

    # start generating
    with h5py.File(f"{DATA_CONFIG['data_path']}/latest.train.hdf5", 'a') as hf:
        for state_batch, value_batch, probs_batch in generator:
            for batch_name in ("state_batch", "value_batch", "probs_batch"):
                if batch_name not in hf:
                    shape = locals()[batch_name].shape
                    hf.create_dataset(batch_name, (0, *shape),
                                      maxshape=(None, *shape))
                hf[batch_name].resize(hf[batch_name].shape[0] + 1, axis=0)
                hf[batch_name][-1] = locals()[batch_name]

            # prevent main proc from generating data too quick
            # since sigexit has been set, proc will iterate only once
            run_proc(helper.buffer, helper.buffer_size, helper.lock, sigexit,
                     agents_meta, board)
            board.reset()
Exemplo n.º 3
0
def dual_play(agents, board=None, verbose=False, graphic=False):
    """
    Play with 2 players.
    Params:
      agents:  { Player.black: agent1, Player.white: agent2 }.
      board:   initial board state. Start player will be determined here.
      verbose: if true, then return value will be in the form of training data.
    Returns:
      if verbose set to True:
        [(state_inputs, final_score, action_probs)]
        Each element is a numpy.array.
      else:
        winner
    """
    if board is None:
        board = Board()
    elif board.status["is_end"]:
        board.reset()

    if verbose is True:
        result = []
    else:
        result = Player.none

    while True:
        # set the current agent
        cur_agent = agents[board.status["cur_player"]]

        # evaluate board state and get action
        if verbose is True:
            _, action_probs, next_move = cur_agent.eval_state(board)
            result.append([
                board.encoded_states(),
                board.status["cur_player"], 
                action_probs
            ])
        else:
            next_move = cur_agent.get_action(board)
        # update board
        board.apply_move(next_move)
        if graphic:
            print(board)

        # end judge
        if board.status["is_end"]:
            winner = board.status["winner"]
            if graphic:
                print("Game ends. winner is {}.".format(winner))
            # format output result
            if verbose is True:
                result = [(
                    state[0],
                    np.array(Player.calc_score(state[1], winner)), 
                    state[2]
                ) for state in result]
            else:
                result = winner

            return result
Exemplo n.º 4
0
def eval_agents(agents, num_games=9):
    """
    Eval the performance of two agents by multiple game simulation.
    Params:
      agents: [agent1, agent2]
      num_games: number of games simulated, default to BO9.
      board: a pre-init board can be passed to avoid re-construct.
    Returns:
      [win_rate(a) for a in agents]
    """
    print("---------Evaluating agents-------------")

    board = Board()
    players = [Player.black, Player.white]
    win_cnts = np.zeros(2)

    for i in range(num_games):
        winner = dual_play(dict(zip(players, agents)), board)
        try:
            win_idx = players.index(winner)
            win_cnts[win_idx] += 1
            print("Round {} ends, winner is <{}: {}>;".format(i + 1, winner, agents[win_idx]))
        except ValueError:  # tie
            win_cnts += 0.5
            print("Round {} ends, tie game;".format(i + 1))
        players.reverse()  # exchange the start player
        board.reset()
        [agent.reset() for agent in agents]

    win_rates = win_cnts / num_games

    print("Win rate:")
    print("{}: {}".format(agents[0], win_rates[0]))
    print("{}: {}".format(agents[1], win_rates[1]))
    print("---------------------------------------")
    return tuple(win_rates)