def __init__(self, actor: ActorSpec, critic: CriticSpec, gamma: float, n_step: int, actor_loss_params: Dict = None, critic_loss_params: Dict = None, actor_optimizer_params: Dict = None, critic_optimizer_params: Dict = None, actor_scheduler_params: Dict = None, critic_scheduler_params: Dict = None, actor_grad_clip_params: Dict = None, critic_grad_clip_params: Dict = None, **kwargs): self._device = utils.get_device() self.actor = actor.to(self._device) self.critic = critic.to(self._device) self._actor_loss_params = deepcopy(actor_loss_params) self._critic_loss_params = deepcopy(critic_loss_params) self._actor_optimizer_params = deepcopy(actor_optimizer_params) self._critic_optimizer_params = deepcopy(critic_optimizer_params) self._actor_scheduler_params = deepcopy(actor_scheduler_params) self._critic_scheduler_params = deepcopy(critic_scheduler_params) self._actor_grad_clip_params = deepcopy(actor_grad_clip_params) self._critic_grad_clip_params = deepcopy(critic_grad_clip_params) self._process_agents( actor_loss_params=actor_loss_params, critic_loss_params=critic_loss_params, actor_optimizer_params=actor_optimizer_params, critic_optimizer_params=critic_optimizer_params, actor_scheduler_params=actor_scheduler_params, critic_scheduler_params=critic_scheduler_params, actor_grad_clip_params=actor_grad_clip_params, critic_grad_clip_params=critic_grad_clip_params, ) # other hyperparameters assert n_step == 1, "For now, on-policy setup works only with n-step=1" self._n_step = n_step self._gamma = gamma # other init self._init(**kwargs)
def __init__( self, critic: CriticSpec, gamma: float, n_step: int, critic_loss_params: Dict = None, critic_optimizer_params: Dict = None, critic_scheduler_params: Dict = None, critic_grad_clip_params: Dict = None, critic_tau: float = 1.0, **kwargs ): self._device = utils.get_device() self.critic = critic.to(self._device) self.target_critic = copy.deepcopy(critic).to(self._device) # preparation agent_stuff = utils.get_trainer_components( agent=self.critic, loss_params=critic_loss_params, optimizer_params=critic_optimizer_params, scheduler_params=critic_scheduler_params, grad_clip_params=critic_grad_clip_params ) # criterion self._critic_loss_params = agent_stuff["loss_params"] self.critic_criterion = agent_stuff["criterion"] # optimizer self._critic_optimizer_params = agent_stuff["optimizer_params"] self.critic_optimizer = agent_stuff["optimizer"] # scheduler self._critic_scheduler_params = agent_stuff["scheduler_params"] self.critic_scheduler = agent_stuff["scheduler"] # grad clipping self._critic_grad_clip_params = agent_stuff["grad_clip_params"] self.critic_grad_clip_fn = agent_stuff["grad_clip_fn"] # other hyperparameters self._n_step = n_step self._gamma = gamma self.critic_tau = critic_tau # other init self._init(**kwargs)
def __init__( self, actor: ActorSpec, gamma: float, n_step: int, actor_loss_params: Dict = None, actor_optimizer_params: Dict = None, actor_scheduler_params: Dict = None, actor_grad_clip_params: Dict = None, **kwargs ): self._device = utils.get_device() self.actor = actor.to(self._device) # actor preparation actor_components = utils.get_trainer_components( agent=self.actor, loss_params=actor_loss_params, optimizer_params=actor_optimizer_params, scheduler_params=actor_scheduler_params, grad_clip_params=actor_grad_clip_params ) # criterion self._actor_loss_params = actor_components["loss_params"] self.actor_criterion = actor_components["criterion"] # optimizer self._actor_optimizer_params = actor_components["optimizer_params"] self.actor_optimizer = actor_components["optimizer"] # scheduler self._actor_scheduler_params = actor_components["scheduler_params"] self.actor_scheduler = actor_components["scheduler"] # grad clipping self._actor_grad_clip_params = actor_components["grad_clip_params"] self.actor_grad_clip_fn = actor_components["grad_clip_fn"] # other hyperparameters self._n_step = n_step self._gamma = gamma # other init self._init(**kwargs)
def __init__(self, actor: ActorSpec, critic: CriticSpec, gamma: float, n_step: int, actor_loss_params: Dict = None, critic_loss_params: Dict = None, actor_optimizer_params: Dict = None, critic_optimizer_params: Dict = None, actor_scheduler_params: Dict = None, critic_scheduler_params: Dict = None, actor_grad_clip_params: Dict = None, critic_grad_clip_params: Dict = None, **kwargs): self._device = utils.get_device() self.actor = actor.to(self._device) self.critic = critic.to(self._device) # actor preparation actor_components = utils.get_trainer_components( agent=self.actor, loss_params=actor_loss_params, optimizer_params=actor_optimizer_params, scheduler_params=actor_scheduler_params, grad_clip_params=actor_grad_clip_params) # criterion self._actor_loss_params = actor_components["loss_params"] self.actor_criterion = actor_components["criterion"] # optimizer self._actor_optimizer_params = actor_components["optimizer_params"] self.actor_optimizer = actor_components["optimizer"] # scheduler self._actor_scheduler_params = actor_components["scheduler_params"] self.actor_scheduler = actor_components["scheduler"] # grad clipping self._actor_grad_clip_params = actor_components["grad_clip_params"] self.actor_grad_clip_fn = actor_components["grad_clip_fn"] # critic preparation critic_components = utils.get_trainer_components( agent=self.critic, loss_params=critic_loss_params, optimizer_params=critic_optimizer_params, scheduler_params=critic_scheduler_params, grad_clip_params=critic_grad_clip_params) # criterion self._critic_loss_params = critic_components["loss_params"] self.critic_criterion = critic_components["criterion"] # optimizer self._critic_optimizer_params = critic_components["optimizer_params"] self.critic_optimizer = critic_components["optimizer"] # scheduler self._critic_scheduler_params = critic_components["scheduler_params"] self.critic_scheduler = critic_components["scheduler"] # grad clipping self._critic_grad_clip_params = critic_components["grad_clip_params"] self.critic_grad_clip_fn = critic_components["grad_clip_fn"] # other hyperparameters assert n_step == 1, "For now, on-policy setup works only with n-step=1" self._n_step = n_step self._gamma = gamma # other init self._init(**kwargs)
def __init__( self, actor: ActorSpec, critic: CriticSpec, gamma: float, n_step: int, actor_loss_params: Dict = None, critic_loss_params: Dict = None, actor_optimizer_params: Dict = None, critic_optimizer_params: Dict = None, actor_scheduler_params: Dict = None, critic_scheduler_params: Dict = None, actor_grad_clip_params: Dict = None, critic_grad_clip_params: Dict = None, actor_tau: float = 1.0, critic_tau: float = 1.0, action_boundaries: tuple = None, **kwargs ): self._device = utils.get_device() self.actor = actor.to(self._device) self.critic = critic.to(self._device) self.target_actor = copy.deepcopy(actor).to(self._device) self.target_critic = copy.deepcopy(critic).to(self._device) # actor preparation actor_components = utils.get_trainer_components( agent=self.actor, loss_params=actor_loss_params, optimizer_params=actor_optimizer_params, scheduler_params=actor_scheduler_params, grad_clip_params=actor_grad_clip_params ) # criterion self._actor_loss_params = actor_components["loss_params"] self.actor_criterion = actor_components["criterion"] # optimizer self._actor_optimizer_params = actor_components["optimizer_params"] self.actor_optimizer = actor_components["optimizer"] # scheduler self._actor_scheduler_params = actor_components["scheduler_params"] self.actor_scheduler = actor_components["scheduler"] # grad clipping self._actor_grad_clip_params = actor_components["grad_clip_params"] self.actor_grad_clip_fn = actor_components["grad_clip_fn"] # critic preparation critic_components = utils.get_trainer_components( agent=self.critic, loss_params=critic_loss_params, optimizer_params=critic_optimizer_params, scheduler_params=critic_scheduler_params, grad_clip_params=critic_grad_clip_params ) # criterion self._critic_loss_params = critic_components["loss_params"] self.critic_criterion = critic_components["criterion"] # optimizer self._critic_optimizer_params = critic_components["optimizer_params"] self.critic_optimizer = critic_components["optimizer"] # scheduler self._critic_scheduler_params = critic_components["scheduler_params"] self.critic_scheduler = critic_components["scheduler"] # grad clipping self._critic_grad_clip_params = critic_components["grad_clip_params"] self.critic_grad_clip_fn = critic_components["grad_clip_fn"] # other hyperparameters self._n_step = n_step self._gamma = gamma self._actor_tau = actor_tau self._critic_tau = critic_tau if action_boundaries is not None: assert len(action_boundaries) == 2, \ "Should be min and max action boundaries" self._action_boundaries = action_boundaries # other init self._init(**kwargs)