示例#1
0
    def __init__(self, model_path_a, model_path_b):
        if model_path_a == 'random':
            print('load player model:', model_path_a)
            self.player = agents.RandomAgent(BOARD_SIZE)
        elif model_path_a == 'puct':
            print('load player model:', model_path_a)
            self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS)
        elif model_path_a == 'uct':
            print('load player model:', model_path_a)
            self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS)
        elif model_path_a == 'human':
            print('load player model:', model_path_a)
            self.player = agents.HumanAgent(BOARD_SIZE)
        elif model_path_a == 'web':
            print('load player model:', model_path_a)
            self.player = agents.WebAgent(BOARD_SIZE)
        elif model_path_a:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
            state_a = self.player.model.state_dict()
            my_state_a = torch.load(
                model_path_a, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_a.items():
                if k in state_a:
                    state_a[k] = v
            self.player.model.load_state_dict(state_a)
        else:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
        if model_path_b == 'random':
            print('load enemy model:', model_path_b)
            self.enemy = agents.RandomAgent(BOARD_SIZE)
        elif model_path_b == 'puct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS)
        elif model_path_b == 'uct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS)
        elif model_path_b == 'human':
            print('load enemy model:', model_path_b)
            self.enemy = agents.HumanAgent(BOARD_SIZE)
        elif model_path_b == 'web':
            print('load enemy model:', model_path_b)
            self.enemy = agents.WebAgent(BOARD_SIZE)
        elif model_path_b:
            print('load enemy model:', model_path_b)
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
            state_b = self.enemy.model.state_dict()
            my_state_b = torch.load(
                model_path_b, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_b.items():
                if k in state_b:
                    state_b[k] = v
            self.enemy.model.load_state_dict(state_b)
        else:
            print('load enemy model:', model_path_b)
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
        self.player_pi = None
        self.enemy_pi = None
        self.player_visit = None
        self.enemy_visit = None

        self.player_monitor = self.player
        self.enemy_monitor = self.enemy
示例#2
0
文件: main.py 项目: llejo3/alpha_omok
    torch.cuda.manual_seed_all(SEED)

# Global variables
rep_memory = deque(maxlen=MEMORY_SIZE)
cur_memory = deque()
step = 0
start_iter = 0
total_epoch = 0
result = {'Black': 0, 'White': 0, 'Draw': 0, 'Resign': 0}
if USE_TENSORBOARD:
    from tensorboardX import SummaryWriter
    Writer = SummaryWriter()

# Initialize agent & model
Agent = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES, noise=True)
Agent.model = model.PVNet(N_BLOCKS, IN_PLANES, OUT_PLANES,
                          BOARD_SIZE).to(device)

logging.warning('\nCUDA: {}'
                '\nAGENT: {}'
                '\nMODEL: {}'
                '\nSEED: {}'
                '\nBOARD_SIZE: {}'
                '\nN_MCTS: {}'
                '\nTAU_THRES: {}'
                '\nRESIGN_MODE: {}'
                '\nN_BLOCKS: {}'
                '\nIN_PLANES: {}'
                '\nOUT_PLANES: {}'
                '\nN_SELFPLAY: {}'
                '\nMEMORY_SIZE: {}'
                '\nN_EPOCHS: {}'
示例#3
0
    def set_agents(self, model_path_a, model_path_b, model_path_m):

        if model_path_a == 'human' or model_path_b == 'human':
            game_mode = 'pygame'
        else:
            game_mode = 'text'

        self.env = game.GameState(game_mode)

        if model_path_a == 'random':
            print('load player model:', model_path_a)
            self.player = agents.RandomAgent(BOARD_SIZE)
        elif model_path_a == 'puct':
            print('load player model:', model_path_a)
            self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'uct':
            print('load player model:', model_path_a)
            self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'human':
            print('load player model:', model_path_a)
            self.player = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_a == 'web':
            print('load player model:', model_path_a)
            self.player = agents.WebAgent(BOARD_SIZE)
        else:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS_PLAYER,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER,
                                            IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
            state_a = self.player.model.state_dict()
            my_state_a = torch.load(
                model_path_a, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_a.items():
                if k in state_a:
                    state_a[k] = v
            self.player.model.load_state_dict(state_a)

        if model_path_b == 'random':
            print('load enemy model:', model_path_b)
            self.enemy = agents.RandomAgent(BOARD_SIZE)
        elif model_path_b == 'puct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'uct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'human':
            print('load enemy model:', model_path_b)
            self.enemy = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_b == 'web':
            print('load enemy model:', model_path_b)
            self.enemy = agents.WebAgent(BOARD_SIZE)
        else:
            print('load enemy model:', model_path_b)
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS_ENEMY,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY,
                                           IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
            state_b = self.enemy.model.state_dict()
            my_state_b = torch.load(
                model_path_b, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_b.items():
                if k in state_b:
                    state_b[k] = v
            self.enemy.model.load_state_dict(state_b)

        # monitor agent
        self.monitor = agents.ZeroAgent(BOARD_SIZE,
                                        N_MCTS_MONITOR,
                                        IN_PLANES_ENEMY,
                                        noise=False)
        self.monitor.model = model.PVNet(N_BLOCKS_ENEMY,
                                         IN_PLANES_ENEMY,
                                         OUT_PLANES_ENEMY,
                                         BOARD_SIZE).to(device)
        state_b = self.monitor.model.state_dict()
        my_state_b = torch.load(
            model_path_m, map_location='cuda:0' if use_cuda else 'cpu')
        for k, v in my_state_b.items():
            if k in state_b:
                state_b[k] = v
        self.monitor.model.load_state_dict(state_b)
示例#4
0
    def set_agents(self, model_path_a, model_path_b, model_path_m):

        # 플레이어 중 human이 있으면 pygame창에서 게임 실행, 아니면 텍스트만 출력
        if model_path_a == 'human' or model_path_b == 'human':
            game_mode = 'pygame'
        else:
            game_mode = 'text'

        # env파일의 gamemode 설정
        self.env = game.GameState(game_mode)

        # 플레이어의 모델 설정 (human)
        if model_path_a == 'random':
            print('load player model:', model_path_a)
            self.player = agents.RandomAgent(BOARD_SIZE)
        elif model_path_a == 'puct':
            print('load player model:', model_path_a)
            self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'uct':
            print('load player model:', model_path_a)
            self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER)
        elif model_path_a == 'human':
            print('load player model:', model_path_a)
            self.player = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_a == 'web':
            print('load player model:', model_path_a)
            self.player = agents.WebAgent(BOARD_SIZE)
        else:
            print('load player model:', model_path_a)
            self.player = agents.ZeroAgent(BOARD_SIZE,
                                           N_MCTS_PLAYER,
                                           IN_PLANES_PLAYER,
                                           noise=False)
            self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER,
                                            OUT_PLANES_PLAYER,
                                            BOARD_SIZE).to(device)
            state_a = self.player.model.state_dict()
            my_state_a = torch.load(
                model_path_a, map_location='cuda:0' if use_cuda else 'cpu')
            for k, v in my_state_a.items():
                if k in state_a:
                    state_a[k] = v
            self.player.model.load_state_dict(state_a)

        # 적 플레이어의 모델 설정 (
        if model_path_b == 'random':
            print('load enemy model:', model_path_b)
            self.enemy = agents.RandomAgent(BOARD_SIZE)
        elif model_path_b == 'puct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'uct':
            print('load enemy model:', model_path_b)
            self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY)
        elif model_path_b == 'human':
            print('load enemy model:', model_path_b)
            self.enemy = agents.HumanAgent(BOARD_SIZE, self.env)
        elif model_path_b == 'web':
            print('load enemy model:', model_path_b)
            self.enemy = agents.WebAgent(BOARD_SIZE)
        else:  # 이미 만들어진 데이터를 사용할땐 이 부분이 실행됨
            print('load enemy model:', model_path_b)
            # 적 에이전트 설정
            self.enemy = agents.ZeroAgent(BOARD_SIZE,
                                          N_MCTS_ENEMY,
                                          IN_PLANES_ENEMY,
                                          noise=False)
            # 적 신경망 모델 설정 및 device(GPU)로 불러와 agents.ZeroAgent().model에 저장
            self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                           OUT_PLANES_ENEMY,
                                           BOARD_SIZE).to(device)
            state_b = self.enemy.model.state_dict()  # dict형식의 신경망 파라미터의 텐서
            my_state_b = torch.load(model_path_b,
                                    map_location='cuda:0'
                                    if use_cuda else 'cpu')  # 저장한 파라미터 파일을 불러옴
            # state_b에는 키 값으로 여러 레이어의 weight, bias 등과 그에 해당하는 value들이 저장됨
            for k, v in my_state_b.items():
                if k in state_b:
                    state_b[k] = v
            self.enemy.model.load_state_dict(state_b)  # 딥러닝 모델에 파라미터 설정

        # monitor agent 위와 동일
        self.monitor = agents.ZeroAgent(BOARD_SIZE,
                                        N_MCTS_MONITOR,
                                        IN_PLANES_ENEMY,
                                        noise=False)
        self.monitor.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY,
                                         OUT_PLANES_ENEMY,
                                         BOARD_SIZE).to(device)
        state_b = self.monitor.model.state_dict()
        my_state_b = torch.load(model_path_m,
                                map_location='cuda:0' if use_cuda else 'cpu')
        for k, v in my_state_b.items():
            if k in state_b:
                state_b[k] = v
        self.monitor.model.load_state_dict(state_b)
示例#5
0
def main():
    # Initialize agent & model
    agent = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES, noise=True)
    agent.model = model.PVNet(N_BLOCKS, IN_PLANES, OUT_PLANES,
                              BOARD_SIZE).to(device)
    agent.model.share_memory()

    no_decay = ['bn', 'bias']
    model_parameters = [{
        'params': [
            p for n, p in agent.model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        L2
    }, {
        'params': [
            p for n, p in agent.model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = optim.SGD(model_parameters, momentum=0.9, lr=LR)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, TOTAL_ITER)

    logging.info(f'\nCUDA: {use_cuda}'
                 f'\nAGENT: {type(agent).__name__}'
                 f'\nMODEL: {agent.model}'
                 f'\nBOARD_SIZE: {BOARD_SIZE}'
                 f'\nN_MCTS: {N_MCTS}'
                 f'\nTAU_THRES: {TAU_THRES}'
                 f'\nN_BLOCKS: {N_BLOCKS}'
                 f'\nIN_PLANES: {IN_PLANES}'
                 f'\nOUT_PLANES: {OUT_PLANES}'
                 f'\nTOTAL_ITER: {TOTAL_ITER}'
                 f'\nMEMORY_SIZE: {MEMORY_SIZE}'
                 f'\nBATCH_SIZE: {BATCH_SIZE}'
                 f'\nLR: {LR}'
                 f'\nL2: {L2}')

    # ====================== self-play & training ====================== #
    model_path = './data/201215_11_12099_step_model.pickle'
    if model_path is not None:
        load_model(agent, optimizer, scheduler, model_path)

    for n_iter in range(start_iter, TOTAL_ITER):
        print('=' * 58)
        print(' ' * 20 + '  {:2} Iteration  '.format(n_iter) + ' ' * 20)
        print('=' * 58)
        logging.info(datetime.now().isoformat())
        logging.info('=' * 58)
        logging.info(' ' * 20 + "  {:2} Iteration  ".format(n_iter) + ' ' * 20)
        logging.info('=' * 58)
        datetime_now = datetime.now().strftime('%y%m%d')
        train_memory = []
        cur_memory = deque()

        with futures.ProcessPoolExecutor(max_workers=N_PROCESS) as executor:
            fs = [
                executor.submit(self_play, agent, cur_memory, i)
                for i in range(N_PROCESS)
            ]
            for f in futures.as_completed(fs):
                train_memory.extend(f.result())

        train(agent, train_memory, optimizer, scheduler)

        save_model(agent, optimizer, scheduler, datetime_now, n_iter, step)
        save_dataset(train_memory, datetime_now, n_iter, step)

        reset_iter(result)