def __init__(self, model_path=None, num_simul=800, num_channel=128, user=None): # simul env self.env_simul = tictactoe_env_simul.TicTacToeEnv() # tree self.tree = defaultdict(lambda: np.zeros((3, 3, 4), 'float')) # model self.pv_net = neural_net_5block.PolicyValueNet(num_channel).cuda() if model_path is not None: print(' ####### Model is loaded ####### ') self.pv_net.load_state_dict(torch.load(model_path)) self.done = False self.root = None self.evaluate = None self.player_color = None self.num_simul = num_simul self.user = user # hyperparameter self.c_puct = 5 self.epsilon = 0.25 self.alpha = 0.7 self.tau = None # reset_step member self.edge = None self.total_visit = None self.legal_move = None self.no_legal_move = None self.state = None self.prob = None self.value = None self.current_user = None # reset_episode member self.node_memory = None self.edge_memory = None self.action_memory = None self.action_count = None # init self.reset_step() self._reset_episode()
def __init__(self, model_path=None): # tree self.tree = defaultdict(lambda: np.zeros((3, 3, 4), 'float')) # model self.pv_net = neural_net_5block.PolicyValueNet(CHANNEL) if model_path is not None: print(' ####### Model is loaded ####### ') self.pv_net.load_state_dict(torch.load(model_path)) # hyperparameter self.c_puct = 5 self.epsilon = 0.25 self.alpha = 0.7 # loop controller self.done = False # reset_step member self.edge = None self.total_visit = None self.legal_move = None self.no_legal_move = None self.state = None self.prob = None self.value = None self.current_user = None # reset_episode member self.node_memory = None self.edge_memory = None self.action_memory = None self.action_count = None # init self.reset_step() self._reset_episode()
EPOCHS = 64 BATCH_SIZE = 32 LR = 0.2 L2 = 0.0001 CHANNEL = 128 # data load with open('data/train_dataset_s800_g800.pickle', 'rb') as f: dataset = pickle.load(f) train_dataset = data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True) # 신경망 생성 및 최적화 인스턴스 생성 pv_net = neural_net_5block.PolicyValueNet(CHANNEL).cuda() optimizer = torch.optim.SGD(pv_net.parameters(), lr=LR, momentum=0.9, weight_decay=L2) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr=2e-4, patience=5, verbose=1) # print spec spec = { 'epoch': EPOCHS, 'batch size': BATCH_SIZE, 'optim': 'SGD',