Пример #1
0
    def __init__(self,
                 actor: ActorSpec,
                 critic: CriticSpec,
                 gamma: float,
                 n_step: int,
                 actor_loss_params: Dict = None,
                 critic_loss_params: Dict = None,
                 actor_optimizer_params: Dict = None,
                 critic_optimizer_params: Dict = None,
                 actor_scheduler_params: Dict = None,
                 critic_scheduler_params: Dict = None,
                 actor_grad_clip_params: Dict = None,
                 critic_grad_clip_params: Dict = None,
                 **kwargs):
        self._device = utils.get_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self._actor_loss_params = deepcopy(actor_loss_params)
        self._critic_loss_params = deepcopy(critic_loss_params)
        self._actor_optimizer_params = deepcopy(actor_optimizer_params)
        self._critic_optimizer_params = deepcopy(critic_optimizer_params)
        self._actor_scheduler_params = deepcopy(actor_scheduler_params)
        self._critic_scheduler_params = deepcopy(critic_scheduler_params)
        self._actor_grad_clip_params = deepcopy(actor_grad_clip_params)
        self._critic_grad_clip_params = deepcopy(critic_grad_clip_params)

        self._process_agents(
            actor_loss_params=actor_loss_params,
            critic_loss_params=critic_loss_params,
            actor_optimizer_params=actor_optimizer_params,
            critic_optimizer_params=critic_optimizer_params,
            actor_scheduler_params=actor_scheduler_params,
            critic_scheduler_params=critic_scheduler_params,
            actor_grad_clip_params=actor_grad_clip_params,
            critic_grad_clip_params=critic_grad_clip_params,
        )

        # other hyperparameters
        assert n_step == 1, "For now, on-policy setup works only with n-step=1"
        self._n_step = n_step
        self._gamma = gamma

        # other init
        self._init(**kwargs)
Пример #2
0
    def __init__(
        self,
        critic: CriticSpec,
        gamma: float,
        n_step: int,
        critic_loss_params: Dict = None,
        critic_optimizer_params: Dict = None,
        critic_scheduler_params: Dict = None,
        critic_grad_clip_params: Dict = None,
        critic_tau: float = 1.0,
        **kwargs
    ):
        self._device = utils.get_device()
        self.critic = critic.to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        # preparation
        agent_stuff = utils.get_trainer_components(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params
        )
        # criterion
        self._critic_loss_params = agent_stuff["loss_params"]
        self.critic_criterion = agent_stuff["criterion"]
        # optimizer
        self._critic_optimizer_params = agent_stuff["optimizer_params"]
        self.critic_optimizer = agent_stuff["optimizer"]
        # scheduler
        self._critic_scheduler_params = agent_stuff["scheduler_params"]
        self.critic_scheduler = agent_stuff["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = agent_stuff["grad_clip_params"]
        self.critic_grad_clip_fn = agent_stuff["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma
        self.critic_tau = critic_tau

        # other init
        self._init(**kwargs)
Пример #3
0
    def __init__(
        self,
        actor: ActorSpec,
        gamma: float,
        n_step: int,
        actor_loss_params: Dict = None,
        actor_optimizer_params: Dict = None,
        actor_scheduler_params: Dict = None,
        actor_grad_clip_params: Dict = None,
        **kwargs
    ):
        self._device = utils.get_device()
        self.actor = actor.to(self._device)

        # actor preparation
        actor_components = utils.get_trainer_components(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params
        )
        # criterion
        self._actor_loss_params = actor_components["loss_params"]
        self.actor_criterion = actor_components["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_components["optimizer_params"]
        self.actor_optimizer = actor_components["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_components["scheduler_params"]
        self.actor_scheduler = actor_components["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_components["grad_clip_params"]
        self.actor_grad_clip_fn = actor_components["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma

        # other init
        self._init(**kwargs)
Пример #4
0
    def __init__(self,
                 actor: ActorSpec,
                 critic: CriticSpec,
                 gamma: float,
                 n_step: int,
                 actor_loss_params: Dict = None,
                 critic_loss_params: Dict = None,
                 actor_optimizer_params: Dict = None,
                 critic_optimizer_params: Dict = None,
                 actor_scheduler_params: Dict = None,
                 critic_scheduler_params: Dict = None,
                 actor_grad_clip_params: Dict = None,
                 critic_grad_clip_params: Dict = None,
                 **kwargs):
        self._device = utils.get_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        # actor preparation
        actor_components = utils.get_trainer_components(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params)
        # criterion
        self._actor_loss_params = actor_components["loss_params"]
        self.actor_criterion = actor_components["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_components["optimizer_params"]
        self.actor_optimizer = actor_components["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_components["scheduler_params"]
        self.actor_scheduler = actor_components["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_components["grad_clip_params"]
        self.actor_grad_clip_fn = actor_components["grad_clip_fn"]

        # critic preparation
        critic_components = utils.get_trainer_components(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params)
        # criterion
        self._critic_loss_params = critic_components["loss_params"]
        self.critic_criterion = critic_components["criterion"]
        # optimizer
        self._critic_optimizer_params = critic_components["optimizer_params"]
        self.critic_optimizer = critic_components["optimizer"]
        # scheduler
        self._critic_scheduler_params = critic_components["scheduler_params"]
        self.critic_scheduler = critic_components["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = critic_components["grad_clip_params"]
        self.critic_grad_clip_fn = critic_components["grad_clip_fn"]

        # other hyperparameters
        assert n_step == 1, "For now, on-policy setup works only with n-step=1"
        self._n_step = n_step
        self._gamma = gamma

        # other init
        self._init(**kwargs)
Пример #5
0
    def __init__(
        self,
        actor: ActorSpec,
        critic: CriticSpec,
        gamma: float,
        n_step: int,
        actor_loss_params: Dict = None,
        critic_loss_params: Dict = None,
        actor_optimizer_params: Dict = None,
        critic_optimizer_params: Dict = None,
        actor_scheduler_params: Dict = None,
        critic_scheduler_params: Dict = None,
        actor_grad_clip_params: Dict = None,
        critic_grad_clip_params: Dict = None,
        actor_tau: float = 1.0,
        critic_tau: float = 1.0,
        action_boundaries: tuple = None,
        **kwargs
    ):
        self._device = utils.get_device()

        self.actor = actor.to(self._device)
        self.critic = critic.to(self._device)

        self.target_actor = copy.deepcopy(actor).to(self._device)
        self.target_critic = copy.deepcopy(critic).to(self._device)

        # actor preparation
        actor_components = utils.get_trainer_components(
            agent=self.actor,
            loss_params=actor_loss_params,
            optimizer_params=actor_optimizer_params,
            scheduler_params=actor_scheduler_params,
            grad_clip_params=actor_grad_clip_params
        )
        # criterion
        self._actor_loss_params = actor_components["loss_params"]
        self.actor_criterion = actor_components["criterion"]
        # optimizer
        self._actor_optimizer_params = actor_components["optimizer_params"]
        self.actor_optimizer = actor_components["optimizer"]
        # scheduler
        self._actor_scheduler_params = actor_components["scheduler_params"]
        self.actor_scheduler = actor_components["scheduler"]
        # grad clipping
        self._actor_grad_clip_params = actor_components["grad_clip_params"]
        self.actor_grad_clip_fn = actor_components["grad_clip_fn"]

        # critic preparation
        critic_components = utils.get_trainer_components(
            agent=self.critic,
            loss_params=critic_loss_params,
            optimizer_params=critic_optimizer_params,
            scheduler_params=critic_scheduler_params,
            grad_clip_params=critic_grad_clip_params
        )
        # criterion
        self._critic_loss_params = critic_components["loss_params"]
        self.critic_criterion = critic_components["criterion"]
        # optimizer
        self._critic_optimizer_params = critic_components["optimizer_params"]
        self.critic_optimizer = critic_components["optimizer"]
        # scheduler
        self._critic_scheduler_params = critic_components["scheduler_params"]
        self.critic_scheduler = critic_components["scheduler"]
        # grad clipping
        self._critic_grad_clip_params = critic_components["grad_clip_params"]
        self.critic_grad_clip_fn = critic_components["grad_clip_fn"]

        # other hyperparameters
        self._n_step = n_step
        self._gamma = gamma
        self._actor_tau = actor_tau
        self._critic_tau = critic_tau

        if action_boundaries is not None:
            assert len(action_boundaries) == 2, \
                "Should be min and max action boundaries"
            self._action_boundaries = action_boundaries

        # other init
        self._init(**kwargs)