def __init__(self, grad_clip_params: Dict = None, fp16_grad_scale: float = 128.0, accumulation_steps: int = 1, optimizer_key: str = None, loss_key: str = None): """ @TODO: docs """ # hack to prevent cycle imports from catalyst.contrib.registry import Registry grad_clip_params = grad_clip_params or {} self.grad_clip_fn = Registry.get_grad_clip_fn(**grad_clip_params) self.fp16 = False self.fp16_grad_scale = fp16_grad_scale self.accumulation_steps = accumulation_steps self.optimizer_key = optimizer_key self.loss_key = loss_key self._optimizer_wd = 0 self._accumulation_counter = 0
def __init__(self, actor, critic, gamma, n_step, actor_optimizer_params, critic_optimizer_params, actor_grad_clip_params=None, critic_grad_clip_params=None, actor_loss_params=None, critic_loss_params=None, actor_scheduler_params=None, critic_scheduler_params=None, resume=None, load_optimizer=True, actor_tau=1.0, critic_tau=1.0, min_action=-1.0, max_action=1.0, **kwargs): # hack to prevent cycle dependencies from catalyst.contrib.registry import Registry self._device = UtilsFactory.prepare_device() self.actor = actor.to(self._device) self.critic = critic.to(self._device) self.target_actor = copy.deepcopy(actor).to(self._device) self.target_critic = copy.deepcopy(critic).to(self._device) self.actor_optimizer = Registry.get_optimizer(self.actor, **actor_optimizer_params) self.critic_optimizer = Registry.get_optimizer( self.critic, **critic_optimizer_params) self.actor_optimizer_params = actor_optimizer_params self.critic_optimizer_params = critic_optimizer_params actor_scheduler_params = actor_scheduler_params or {} critic_scheduler_params = critic_scheduler_params or {} self.actor_scheduler = Registry.get_scheduler(self.actor_optimizer, **actor_scheduler_params) self.critic_scheduler = Registry.get_scheduler( self.critic_optimizer, **critic_scheduler_params) self.actor_scheduler_params = actor_scheduler_params self.critic_scheduler_params = critic_scheduler_params self.n_step = n_step self.gamma = gamma actor_grad_clip_params = actor_grad_clip_params or {} critic_grad_clip_params = critic_grad_clip_params or {} self.actor_grad_clip_fn = Registry.get_grad_clip_fn( **actor_grad_clip_params) self.critic_grad_clip_fn = Registry.get_grad_clip_fn( **critic_grad_clip_params) self.actor_grad_clip_params = actor_grad_clip_params self.critic_grad_clip_params = critic_grad_clip_params self.actor_criterion = Registry.get_criterion( **(actor_loss_params or {})) self.critic_criterion = Registry.get_criterion( **(critic_loss_params or {})) self.actor_loss_params = actor_loss_params self.critic_loss_params = critic_loss_params self.actor_tau = actor_tau self.critic_tau = critic_tau self.min_action = min_action self.max_action = max_action self._init(**kwargs) if resume is not None: self.load_checkpoint(resume, load_optimizer=load_optimizer)