Exemplo n.º 1
0
    def __init__(self,
                 model_path=None,
                 num_simul=800,
                 num_channel=128,
                 user=None):
        # simul env
        self.env_simul = tictactoe_env_simul.TicTacToeEnv()

        # tree
        self.tree = defaultdict(lambda: np.zeros((3, 3, 4), 'float'))

        # model
        self.pv_net = neural_net_5block.PolicyValueNet(num_channel).cuda()
        if model_path is not None:
            print(' #######  Model is loaded  ####### ')
            self.pv_net.load_state_dict(torch.load(model_path))

        self.done = False
        self.root = None
        self.evaluate = None
        self.player_color = None
        self.num_simul = num_simul
        self.user = user

        # hyperparameter
        self.c_puct = 5
        self.epsilon = 0.25
        self.alpha = 0.7
        self.tau = None

        # reset_step member
        self.edge = None
        self.total_visit = None
        self.legal_move = None
        self.no_legal_move = None
        self.state = None
        self.prob = None
        self.value = None
        self.current_user = None

        # reset_episode member
        self.node_memory = None
        self.edge_memory = None
        self.action_memory = None
        self.action_count = None

        # init
        self.reset_step()
        self._reset_episode()
Exemplo n.º 2
0
    def __init__(self, model_path=None):
        # tree
        self.tree = defaultdict(lambda: np.zeros((3, 3, 4), 'float'))

        # model
        self.pv_net = neural_net_5block.PolicyValueNet(CHANNEL)
        if model_path is not None:
            print(' #######  Model is loaded  ####### ')
            self.pv_net.load_state_dict(torch.load(model_path))

        # hyperparameter
        self.c_puct = 5
        self.epsilon = 0.25
        self.alpha = 0.7

        # loop controller
        self.done = False

        # reset_step member
        self.edge = None
        self.total_visit = None
        self.legal_move = None
        self.no_legal_move = None
        self.state = None
        self.prob = None
        self.value = None
        self.current_user = None

        # reset_episode member
        self.node_memory = None
        self.edge_memory = None
        self.action_memory = None
        self.action_count = None

        # init
        self.reset_step()
        self._reset_episode()
Exemplo n.º 3
0
EPOCHS = 64
BATCH_SIZE = 32
LR = 0.2
L2 = 0.0001
CHANNEL = 128

# data load
with open('data/train_dataset_s800_g800.pickle', 'rb') as f:
    dataset = pickle.load(f)
train_dataset = data.DataLoader(dataset,
                                batch_size=BATCH_SIZE,
                                shuffle=True,
                                drop_last=True)

# 신경망 생성 및 최적화 인스턴스 생성
pv_net = neural_net_5block.PolicyValueNet(CHANNEL).cuda()
optimizer = torch.optim.SGD(pv_net.parameters(),
                            lr=LR,
                            momentum=0.9,
                            weight_decay=L2)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                           'min',
                                           min_lr=2e-4,
                                           patience=5,
                                           verbose=1)

# print spec
spec = {
    'epoch': EPOCHS,
    'batch size': BATCH_SIZE,
    'optim': 'SGD',