示例#1
0
 def test_call_hooks_uninitialized_param(self):
     target = UninitializedChain()
     opt = optimizers.MomentumSGD()
     opt.setup(target)
     opt.add_hook(optimizer.Lasso(rate=0.0005))
     target(np.ones((4, 10), dtype=np.float32))
     opt.call_hooks()
示例#2
0
    def check_lasso(self):
        w = self.target.param.data
        g = self.target.param.grad
        xp = cuda.get_array_module(w)
        decay = 0.2
        expect = w - g - decay * xp.sign(w)

        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(optimizer.Lasso(decay))
        opt.update()

        testing.assert_allclose(expect, w)
示例#3
0
    def __init__(self,
                 agent,
                 memory_size=10**4,
                 replay_size=32,
                 gamma=0.99,
                 initial_exploration=10**4,
                 target_update_freq=10**4,
                 learning_rate=0.00025,
                 epsilon_decay=1e-6,
                 minimum_epsilon=0.1,
                 L1_rate=None):
        self.agent = agent
        self.target = Q(self.agent.q.n_history,
                        self.agent.q.n_action,
                        on_gpu=self.agent.q.on_gpu)

        self.memory_size = memory_size
        self.replay_size = replay_size
        self.gamma = gamma
        self.initial_exploration = initial_exploration
        self.target_update_freq = target_update_freq
        self.laerning_rate = learning_rate
        self.epslon_decay = epsilon_decay
        self.minimum_epsilon = minimum_epsilon
        self._step = 0

        # prepare for replay
        n_hist = self.agent.q.n_history
        size = self.agent.q.SIZE
        self.memory = [
            np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32),
            np.zeros(memory_size, dtype=np.uint8),
            np.zeros((memory_size, 1), dtype=np.float32),
            np.zeros((memory_size, n_hist, 3, size, size), dtype=np.float32),
            np.zeros((memory_size, 1), dtype=np.bool)
        ]
        self.memory_text = [
            "state", "action", "reward", "next_state", "episode_end"
        ]

        #prepare optimizer
        self.optimizer = optimizers.RMSpropGraves(lr=learning_rate,
                                                  alpha=0.95,
                                                  momentum=0.95,
                                                  eps=0.01)
        self.optimizer.setup(self.agent.q)
        if L1_rate is not None:
            self.optimizer.add_hook(optimizer.Lasso(L1_rate))
        self._loss = 9
        self._qv = 0
示例#4
0
    trs = False
train_iter = iterators.SerialIterator(dataset_train,
                                      batch_size=args.batchsize,
                                      shuffle=trs)
if args.numval > 0:
    val_iter = iterators.SerialIterator(dataset_val,
                                        batch_size=len(dataset_val),
                                        repeat=False,
                                        shuffle=False)

# -- Set optimizers
optimizer1 = use_optimizer(lr=args.learning_rate)
optimizer1.setup(loss.phi)
optimizer2 = use_optimizer(lr=args.learning_rate)
optimizer2.setup(loss.net)
optimizer1.add_hook(optimizer_module.Lasso(args.beta))
optimizer2.add_hook(optimizer_module.WeightDecay(args.gamma))

# -- Set a trigger
if args.log_in_iteration:
    trigger = (1, 'iteration')
else:
    trigger = (1, 'epoch')

# -- Set a trainer
if args.fixed_embedder:
    optimizer_dict = {'net': optimizer2}
else:
    optimizer_dict = {'phi': optimizer1, 'net': optimizer2}
updater = lkis.Updater(train_iter,
                       optimizer_dict,