示例#1
0
    def configure_optimizers(self):
        optimizers = []
        optimizers.append(
            self.q_network_optimizer.make_optimizer_scheduler(
                self.q_network.parameters()))
        if self.calc_cpe_in_training:
            optimizers.append(
                self.reward_network_optimizer.make_optimizer_scheduler(
                    self.reward_network.parameters()))
            optimizers.append(
                self.q_network_cpe_optimizer.make_optimizer_scheduler(
                    self.q_network_cpe.parameters()))

        # soft-update
        target_params = list(self.q_network_target.parameters())
        source_params = list(self.q_network.parameters())
        if self.calc_cpe_in_training:
            target_params += list(self.q_network_cpe_target.parameters())
            source_params += list(self.q_network_cpe.parameters())
        optimizers.append(
            SoftUpdate.make_optimizer_scheduler(target_params,
                                                source_params,
                                                tau=self.tau))

        return optimizers
示例#2
0
    def configure_optimizers(self):
        optimizers = []
        target_params = list(self.q_network_target.parameters())
        source_params = list(self.q_network.parameters())

        optimizers.append(
            self.q_network_optimizer.make_optimizer_scheduler(
                self.q_network.parameters()))

        if self.calc_cpe_in_training:
            (
                cpe_target_params,
                cpe_source_params,
                cpe_optimizers,
            ) = self._configure_cpe_optimizers()
            target_params += cpe_target_params
            source_params += cpe_source_params
            optimizers += cpe_optimizers

        optimizers.append(
            SoftUpdate.make_optimizer_scheduler(target_params,
                                                source_params,
                                                tau=self.tau))

        return optimizers
示例#3
0
    def configure_optimizers(self):
        optimizers = []

        optimizers.append(
            self.q_network_optimizer.make_optimizer_scheduler(
                self.q1_network.parameters()))
        if self.q2_network:
            optimizers.append(
                self.q_network_optimizer.make_optimizer_scheduler(
                    self.q2_network.parameters()))
        optimizers.append(
            self.actor_network_optimizer.make_optimizer_scheduler(
                self.actor_network.parameters()))

        # soft-update
        target_params = list(self.q1_network_target.parameters())
        source_params = list(self.q1_network.parameters())
        if self.q2_network:
            target_params += list(self.q2_network_target.parameters())
            source_params += list(self.q2_network.parameters())
        target_params += list(self.actor_network_target.parameters())
        source_params += list(self.actor_network.parameters())
        optimizers.append(
            SoftUpdate.make_optimizer_scheduler(target_params,
                                                source_params,
                                                tau=self.tau))

        return optimizers
示例#4
0
    def configure_optimizers(self):
        optimizers = []

        optimizers.append(
            self.q_network_optimizer.make_optimizer_scheduler(
                self.q1_network.parameters()
            )
        )
        if self.q2_network:
            optimizers.append(
                self.q_network_optimizer.make_optimizer_scheduler(
                    self.q2_network.parameters()
                )
            )
        optimizers.append(
            self.actor_network_optimizer.make_optimizer_scheduler(
                self.actor_network.parameters()
            )
        )
        if self.alpha_optimizer is not None:
            optimizers.append(
                self.alpha_optimizer.make_optimizer_scheduler([self.log_alpha])
            )
        if self.value_network:
            optimizers.append(
                self.value_network_optimizer.make_optimizer_scheduler(
                    self.value_network.parameters()
                )
            )
        # soft-update
        if self.value_network:
            target_params = self.value_network_target.parameters()
            source_params = self.value_network.parameters()
        else:
            target_params = list(self.q1_network_target.parameters())
            source_params = list(self.q1_network.parameters())
            if self.q2_network:
                target_params += list(self.q2_network_target.parameters())
                source_params += list(self.q2_network.parameters())
        optimizers.append(
            SoftUpdate.make_optimizer_scheduler(
                target_params, source_params, tau=self.tau
            )
        )

        return optimizers