def get_functions(self): x = T.ftensor4('x') # input y = T.fvector('y') # expected output lr = T.scalar('lr') # learning rate pad_x = T.zeros([1, self.input_channels] + [2 * self.pad + self.input_dim] * 2) pad_x = T.set_subtensor( pad_x[:, :, self.pad:-self.pad, self.pad:-self.pad], x) out, locations, wl_grad = self.full_network(pad_x) loss = self.loss(y, out) reward = self.reward(loss) rl_gradient = (reward - self.base_reward).sum() * wl_grad / T.sqrt(T.sum(wl_grad ** 2)) br_gradient, = T.grad(T.sqr(reward - self.base_reward).sum(), [self.base_reward]) train_updates = adam.Adam(loss, self.nn_params) train_updates[self.W_l] = self.W_l + lr * rl_gradient # gradient ascent train_updates[self.base_reward] = self.base_reward - lr * br_gradient train = theano.function( inputs=[x, y, lr], outputs=[out, loss, reward], updates=train_updates) predict = theano.function( inputs=[x], outputs=[out, locations]) return train, predict
def get_optimizer(model, args): import sys sys.path.insert(0, '/home/ubuntu/skin_demo/RoP/AMSGrad/') if args.optimizer == 'sgd': return torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) elif args.optimizer == 'rmsprop': return torch.optim.RMSprop(model.parameters(), args.lr, alpha=args.alpha, weight_decay=args.weight_decay) elif args.optimizer == 'adam': return torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), args.lr, # beta=(args.beta1, args.beta2), weight_decay=args.weight_decay) elif args.optimizer == 'adam_1': import adam optimizer = adam.Adam([{'params': model.parameters(), 'lr': args.lr} ], lr=args.lr, weight_decay=0.0001, amsgrad=True) return optimizer elif args.optimizer == 'adam_2': import adam_amsgrad optimizer = adam_amsgrad.Adam([{'params': model.parameters(), 'lr': args.lr} ], lr=args.lr, weight_decay=0.0001, amsgrad=True) return optimizer elif args.optimizer == 'adam_3': import adamw optimizer = adamw.Adam([{'params': model.parameters(), 'lr': args.lr} ], lr=args.lr, weight_decay=0.0001) return optimizer else: raise NotImplementedError
def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): return adam.Adam(grads, lr, b1, b2, e)
return action # In[16]: current_model = CnnDQN(env.observation_space.shape, env.action_space.n) # current_model.load_state_dict(torch.load('current.ckpt')) target_model = CnnDQN(env.observation_space.shape, env.action_space.n) # target_model.load_state_dict(torch.load('target.ckpt')) if USE_CUDA: current_model = current_model.cuda() target_model = target_model.cuda() # optimizer = optim.Adam(current_model.parameters(), lr=0.00001) optimizer = adam.Adam(current_model.parameters(), lr=0.00001) replay_initial = 10000 replay_buffer = ReplayBuffer(100000) update_target(current_model, target_model) # In[17]: epsilon_start = 1.0 epsilon_final = 0.01 epsilon_decay = 30000 epsilon_by_frame = lambda frame_idx: epsilon_final + ( epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)