def __init__(self, model_path_a, model_path_b): if model_path_a == 'random': print('load player model:', model_path_a) self.player = agents.RandomAgent(BOARD_SIZE) elif model_path_a == 'puct': print('load player model:', model_path_a) self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS) elif model_path_a == 'uct': print('load player model:', model_path_a) self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS) elif model_path_a == 'human': print('load player model:', model_path_a) self.player = agents.HumanAgent(BOARD_SIZE) elif model_path_a == 'web': print('load player model:', model_path_a) self.player = agents.WebAgent(BOARD_SIZE) elif model_path_a: print('load player model:', model_path_a) self.player = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES_PLAYER, noise=False) self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER, OUT_PLANES_PLAYER, BOARD_SIZE).to(device) state_a = self.player.model.state_dict() my_state_a = torch.load( model_path_a, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_a.items(): if k in state_a: state_a[k] = v self.player.model.load_state_dict(state_a) else: print('load player model:', model_path_a) self.player = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES_PLAYER, noise=False) self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER, OUT_PLANES_PLAYER, BOARD_SIZE).to(device) if model_path_b == 'random': print('load enemy model:', model_path_b) self.enemy = agents.RandomAgent(BOARD_SIZE) elif model_path_b == 'puct': print('load enemy model:', model_path_b) self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS) elif model_path_b == 'uct': print('load enemy model:', model_path_b) self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS) elif model_path_b == 'human': print('load enemy model:', model_path_b) self.enemy = agents.HumanAgent(BOARD_SIZE) elif model_path_b == 'web': print('load enemy model:', model_path_b) self.enemy = agents.WebAgent(BOARD_SIZE) elif model_path_b: print('load enemy model:', model_path_b) self.enemy = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES_ENEMY, noise=False) self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) state_b = self.enemy.model.state_dict() my_state_b = torch.load( model_path_b, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_b.items(): if k in state_b: state_b[k] = v self.enemy.model.load_state_dict(state_b) else: print('load enemy model:', model_path_b) self.enemy = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES_ENEMY, noise=False) self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) self.player_pi = None self.enemy_pi = None self.player_visit = None self.enemy_visit = None self.player_monitor = self.player self.enemy_monitor = self.enemy
torch.cuda.manual_seed_all(SEED) # Global variables rep_memory = deque(maxlen=MEMORY_SIZE) cur_memory = deque() step = 0 start_iter = 0 total_epoch = 0 result = {'Black': 0, 'White': 0, 'Draw': 0, 'Resign': 0} if USE_TENSORBOARD: from tensorboardX import SummaryWriter Writer = SummaryWriter() # Initialize agent & model Agent = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES, noise=True) Agent.model = model.PVNet(N_BLOCKS, IN_PLANES, OUT_PLANES, BOARD_SIZE).to(device) logging.warning('\nCUDA: {}' '\nAGENT: {}' '\nMODEL: {}' '\nSEED: {}' '\nBOARD_SIZE: {}' '\nN_MCTS: {}' '\nTAU_THRES: {}' '\nRESIGN_MODE: {}' '\nN_BLOCKS: {}' '\nIN_PLANES: {}' '\nOUT_PLANES: {}' '\nN_SELFPLAY: {}' '\nMEMORY_SIZE: {}' '\nN_EPOCHS: {}'
def set_agents(self, model_path_a, model_path_b, model_path_m): if model_path_a == 'human' or model_path_b == 'human': game_mode = 'pygame' else: game_mode = 'text' self.env = game.GameState(game_mode) if model_path_a == 'random': print('load player model:', model_path_a) self.player = agents.RandomAgent(BOARD_SIZE) elif model_path_a == 'puct': print('load player model:', model_path_a) self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER) elif model_path_a == 'uct': print('load player model:', model_path_a) self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER) elif model_path_a == 'human': print('load player model:', model_path_a) self.player = agents.HumanAgent(BOARD_SIZE, self.env) elif model_path_a == 'web': print('load player model:', model_path_a) self.player = agents.WebAgent(BOARD_SIZE) else: print('load player model:', model_path_a) self.player = agents.ZeroAgent(BOARD_SIZE, N_MCTS_PLAYER, IN_PLANES_PLAYER, noise=False) self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER, OUT_PLANES_PLAYER, BOARD_SIZE).to(device) state_a = self.player.model.state_dict() my_state_a = torch.load( model_path_a, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_a.items(): if k in state_a: state_a[k] = v self.player.model.load_state_dict(state_a) if model_path_b == 'random': print('load enemy model:', model_path_b) self.enemy = agents.RandomAgent(BOARD_SIZE) elif model_path_b == 'puct': print('load enemy model:', model_path_b) self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY) elif model_path_b == 'uct': print('load enemy model:', model_path_b) self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY) elif model_path_b == 'human': print('load enemy model:', model_path_b) self.enemy = agents.HumanAgent(BOARD_SIZE, self.env) elif model_path_b == 'web': print('load enemy model:', model_path_b) self.enemy = agents.WebAgent(BOARD_SIZE) else: print('load enemy model:', model_path_b) self.enemy = agents.ZeroAgent(BOARD_SIZE, N_MCTS_ENEMY, IN_PLANES_ENEMY, noise=False) self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) state_b = self.enemy.model.state_dict() my_state_b = torch.load( model_path_b, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_b.items(): if k in state_b: state_b[k] = v self.enemy.model.load_state_dict(state_b) # monitor agent self.monitor = agents.ZeroAgent(BOARD_SIZE, N_MCTS_MONITOR, IN_PLANES_ENEMY, noise=False) self.monitor.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) state_b = self.monitor.model.state_dict() my_state_b = torch.load( model_path_m, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_b.items(): if k in state_b: state_b[k] = v self.monitor.model.load_state_dict(state_b)
def set_agents(self, model_path_a, model_path_b, model_path_m): # 플레이어 중 human이 있으면 pygame창에서 게임 실행, 아니면 텍스트만 출력 if model_path_a == 'human' or model_path_b == 'human': game_mode = 'pygame' else: game_mode = 'text' # env파일의 gamemode 설정 self.env = game.GameState(game_mode) # 플레이어의 모델 설정 (human) if model_path_a == 'random': print('load player model:', model_path_a) self.player = agents.RandomAgent(BOARD_SIZE) elif model_path_a == 'puct': print('load player model:', model_path_a) self.player = agents.PUCTAgent(BOARD_SIZE, N_MCTS_PLAYER) elif model_path_a == 'uct': print('load player model:', model_path_a) self.player = agents.UCTAgent(BOARD_SIZE, N_MCTS_PLAYER) elif model_path_a == 'human': print('load player model:', model_path_a) self.player = agents.HumanAgent(BOARD_SIZE, self.env) elif model_path_a == 'web': print('load player model:', model_path_a) self.player = agents.WebAgent(BOARD_SIZE) else: print('load player model:', model_path_a) self.player = agents.ZeroAgent(BOARD_SIZE, N_MCTS_PLAYER, IN_PLANES_PLAYER, noise=False) self.player.model = model.PVNet(N_BLOCKS_PLAYER, IN_PLANES_PLAYER, OUT_PLANES_PLAYER, BOARD_SIZE).to(device) state_a = self.player.model.state_dict() my_state_a = torch.load( model_path_a, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_a.items(): if k in state_a: state_a[k] = v self.player.model.load_state_dict(state_a) # 적 플레이어의 모델 설정 ( if model_path_b == 'random': print('load enemy model:', model_path_b) self.enemy = agents.RandomAgent(BOARD_SIZE) elif model_path_b == 'puct': print('load enemy model:', model_path_b) self.enemy = agents.PUCTAgent(BOARD_SIZE, N_MCTS_ENEMY) elif model_path_b == 'uct': print('load enemy model:', model_path_b) self.enemy = agents.UCTAgent(BOARD_SIZE, N_MCTS_ENEMY) elif model_path_b == 'human': print('load enemy model:', model_path_b) self.enemy = agents.HumanAgent(BOARD_SIZE, self.env) elif model_path_b == 'web': print('load enemy model:', model_path_b) self.enemy = agents.WebAgent(BOARD_SIZE) else: # 이미 만들어진 데이터를 사용할땐 이 부분이 실행됨 print('load enemy model:', model_path_b) # 적 에이전트 설정 self.enemy = agents.ZeroAgent(BOARD_SIZE, N_MCTS_ENEMY, IN_PLANES_ENEMY, noise=False) # 적 신경망 모델 설정 및 device(GPU)로 불러와 agents.ZeroAgent().model에 저장 self.enemy.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) state_b = self.enemy.model.state_dict() # dict형식의 신경망 파라미터의 텐서 my_state_b = torch.load(model_path_b, map_location='cuda:0' if use_cuda else 'cpu') # 저장한 파라미터 파일을 불러옴 # state_b에는 키 값으로 여러 레이어의 weight, bias 등과 그에 해당하는 value들이 저장됨 for k, v in my_state_b.items(): if k in state_b: state_b[k] = v self.enemy.model.load_state_dict(state_b) # 딥러닝 모델에 파라미터 설정 # monitor agent 위와 동일 self.monitor = agents.ZeroAgent(BOARD_SIZE, N_MCTS_MONITOR, IN_PLANES_ENEMY, noise=False) self.monitor.model = model.PVNet(N_BLOCKS_ENEMY, IN_PLANES_ENEMY, OUT_PLANES_ENEMY, BOARD_SIZE).to(device) state_b = self.monitor.model.state_dict() my_state_b = torch.load(model_path_m, map_location='cuda:0' if use_cuda else 'cpu') for k, v in my_state_b.items(): if k in state_b: state_b[k] = v self.monitor.model.load_state_dict(state_b)
def main(): # Initialize agent & model agent = agents.ZeroAgent(BOARD_SIZE, N_MCTS, IN_PLANES, noise=True) agent.model = model.PVNet(N_BLOCKS, IN_PLANES, OUT_PLANES, BOARD_SIZE).to(device) agent.model.share_memory() no_decay = ['bn', 'bias'] model_parameters = [{ 'params': [ p for n, p in agent.model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': L2 }, { 'params': [ p for n, p in agent.model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = optim.SGD(model_parameters, momentum=0.9, lr=LR) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, TOTAL_ITER) logging.info(f'\nCUDA: {use_cuda}' f'\nAGENT: {type(agent).__name__}' f'\nMODEL: {agent.model}' f'\nBOARD_SIZE: {BOARD_SIZE}' f'\nN_MCTS: {N_MCTS}' f'\nTAU_THRES: {TAU_THRES}' f'\nN_BLOCKS: {N_BLOCKS}' f'\nIN_PLANES: {IN_PLANES}' f'\nOUT_PLANES: {OUT_PLANES}' f'\nTOTAL_ITER: {TOTAL_ITER}' f'\nMEMORY_SIZE: {MEMORY_SIZE}' f'\nBATCH_SIZE: {BATCH_SIZE}' f'\nLR: {LR}' f'\nL2: {L2}') # ====================== self-play & training ====================== # model_path = './data/201215_11_12099_step_model.pickle' if model_path is not None: load_model(agent, optimizer, scheduler, model_path) for n_iter in range(start_iter, TOTAL_ITER): print('=' * 58) print(' ' * 20 + ' {:2} Iteration '.format(n_iter) + ' ' * 20) print('=' * 58) logging.info(datetime.now().isoformat()) logging.info('=' * 58) logging.info(' ' * 20 + " {:2} Iteration ".format(n_iter) + ' ' * 20) logging.info('=' * 58) datetime_now = datetime.now().strftime('%y%m%d') train_memory = [] cur_memory = deque() with futures.ProcessPoolExecutor(max_workers=N_PROCESS) as executor: fs = [ executor.submit(self_play, agent, cur_memory, i) for i in range(N_PROCESS) ] for f in futures.as_completed(fs): train_memory.extend(f.result()) train(agent, train_memory, optimizer, scheduler) save_model(agent, optimizer, scheduler, datetime_now, n_iter, step) save_dataset(train_memory, datetime_now, n_iter, step) reset_iter(result)