def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, eps=None, alpha=None, max_grad_norm=None, acktr=False, gradient_noise=0.0): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm if acktr: self.optimizer = KFACOptimizer(actor_critic) else: self.optimizer = optim.RMSprop(actor_critic.parameters(), lr, eps=eps, alpha=alpha) self.gradient_noise = gradient_noise
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, eps=None, alpha=None, max_grad_norm=None, acktr=False): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm # import pdb; pdb.set_trace() if acktr: self.optimizer = KFACOptimizer([actor_critic, actor_critic]) else: self.optimizer = optim.RMSprop(actor_critic.parameters(), lr, eps=eps, alpha=alpha)
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, eps=None, alpha=None, max_grad_norm=None, acktr=False, train_selfsup_attention=False): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm self.train_selfsup_attention = train_selfsup_attention if acktr: self.optimizer = KFACOptimizer(actor_critic) else: self.optimizer = optim.RMSprop(actor_critic.parameters(), lr, eps=eps, alpha=alpha) if self.train_selfsup_attention: self.selfsup_attention_optimizer = optim.Adam( actor_critic.base.selfsup_attention.parameters(), 0.001)
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, lr_beta=None, reg_beta=None, eps=None, alpha=None, max_grad_norm=None, acktr=False): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm if acktr: self.optimizer = KFACOptimizer(actor_critic) self.beta_actor_list = [] self.param_list = [] for name, param in actor_critic.named_parameters(): if "base.beta_net_actor" in name : self.beta_actor_list.append(param) else: self.param_list.append(param) else: # Pierre: separate learning rates for beta net and actor net self.optimizer = optim.RMSprop([{'params': self.param_list}, {'params': self.beta_actor_list, 'lr': lr_beta, 'weight_decay':reg_beta}], lr, eps=eps, alpha=alpha)
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, eps=None, alpha=None, max_grad_norm=None, acktr=False, path_recorder=None, cost_evaluator=None, arch_loss_coef=0): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm self.path_recorder = path_recorder self.cost_evaluator = cost_evaluator self.arch_loss_coef = arch_loss_coef if acktr: self.optimizer = KFACOptimizer(actor_critic) else: self.optimizer = optim.RMSprop(actor_critic.parameters(), lr, eps=eps, alpha=alpha)
def init_optimizer(self): if self.acktr: self.optimizer = KFACOptimizer(self.actor_critic) else: self.optimizer = optim.RMSprop(self.actor_critic.parameters(), self.lr, eps=self.eps, alpha=self.alpha) self.schedulers = ExponentialLR(self.optimizer, self.lr_decay)
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, filter_mem=None, eps=None, alpha=None, max_grad_norm=None, acktr=False): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm if acktr: self.optimizer = KFACOptimizer(actor_critic) self.filter_list = [] self.param_list = [] for name, param in actor_critic.named_parameters(): if "base.filter_net" in name: self.filter_list.append(param) else: self.param_list.append(param) else: self.optimizer = optim.RMSprop([{ 'params': self.param_list }, { 'params': self.filter_list }], lr, eps=eps, alpha=alpha)
def __init__(self, actor_critic, value_loss_coef, entropy_coef, lr=None, lr_beta=None, reg_beta=None, delib_center=0.5, eps=None, alpha=None, max_grad_norm=None, acktr=False): self.actor_critic = actor_critic self.acktr = acktr self.value_loss_coef = value_loss_coef self.entropy_coef = entropy_coef self.max_grad_norm = max_grad_norm self.reg_beta = reg_beta self.delib_center = delib_center if acktr: self.optimizer = KFACOptimizer(actor_critic) self.beta_value_list = [] self.param_list = [] for name, param in actor_critic.named_parameters(): if "base.beta_value_net" in name : self.beta_value_list.append(param) else: self.param_list.append(param) else: self.optimizer = optim.RMSprop([{'params': self.param_list}, {'params': self.beta_value_list, 'lr': lr_beta}], lr, eps=eps, alpha=alpha)