Пример #1
0
    def __init__(self,
                 grad_clip_params: Dict = None,
                 fp16_grad_scale: float = 128.0,
                 accumulation_steps: int = 1,
                 optimizer_key: str = None,
                 loss_key: str = None):
        """
        @TODO: docs
        """
        # hack to prevent cycle imports
        from catalyst.contrib.registry import Registry

        grad_clip_params = grad_clip_params or {}
        self.grad_clip_fn = Registry.get_grad_clip_fn(**grad_clip_params)
        self.fp16 = False
        self.fp16_grad_scale = fp16_grad_scale
        self.accumulation_steps = accumulation_steps
        self.optimizer_key = optimizer_key
        self.loss_key = loss_key
        self._optimizer_wd = 0
        self._accumulation_counter = 0
Пример #2
0
    def __init__(self,
                 actor,
                 critic,
                 gamma,
                 n_step,
                 actor_optimizer_params,
                 critic_optimizer_params,
                 actor_grad_clip_params=None,
                 critic_grad_clip_params=None,
                 actor_loss_params=None,
                 critic_loss_params=None,
                 actor_scheduler_params=None,
                 critic_scheduler_params=None,
                 resume=None,
                 load_optimizer=True,
                 actor_tau=1.0,
                 critic_tau=1.0,
                 min_action=-1.0,
                 max_action=1.0,
                 **kwargs):
        # hack to prevent cycle dependencies
        from catalyst.contrib.registry import Registry

        self._device = UtilsFactory.prepare_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self.target_actor = copy.deepcopy(actor).to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        self.actor_optimizer = Registry.get_optimizer(self.actor,
                                                      **actor_optimizer_params)
        self.critic_optimizer = Registry.get_optimizer(
            self.critic, **critic_optimizer_params)

        self.actor_optimizer_params = actor_optimizer_params
        self.critic_optimizer_params = critic_optimizer_params

        actor_scheduler_params = actor_scheduler_params or {}
        critic_scheduler_params = critic_scheduler_params or {}

        self.actor_scheduler = Registry.get_scheduler(self.actor_optimizer,
                                                      **actor_scheduler_params)
        self.critic_scheduler = Registry.get_scheduler(
            self.critic_optimizer, **critic_scheduler_params)

        self.actor_scheduler_params = actor_scheduler_params
        self.critic_scheduler_params = critic_scheduler_params

        self.n_step = n_step
        self.gamma = gamma

        actor_grad_clip_params = actor_grad_clip_params or {}
        critic_grad_clip_params = critic_grad_clip_params or {}

        self.actor_grad_clip_fn = Registry.get_grad_clip_fn(
            **actor_grad_clip_params)
        self.critic_grad_clip_fn = Registry.get_grad_clip_fn(
            **critic_grad_clip_params)

        self.actor_grad_clip_params = actor_grad_clip_params
        self.critic_grad_clip_params = critic_grad_clip_params

        self.actor_criterion = Registry.get_criterion(
            **(actor_loss_params or {}))
        self.critic_criterion = Registry.get_criterion(
            **(critic_loss_params or {}))

        self.actor_loss_params = actor_loss_params
        self.critic_loss_params = critic_loss_params

        self.actor_tau = actor_tau
        self.critic_tau = critic_tau

        self.min_action = min_action
        self.max_action = max_action

        self._init(**kwargs)

        if resume is not None:
            self.load_checkpoint(resume, load_optimizer=load_optimizer)