示例#1
0
    def __init__(self, observation_space, action_space, config):
        config = dict(
            ray.rllib.algorithms.marwil.marwil.MARWILConfig().to_dict(),
            **config)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        ValueNetworkMixin.__init__(self, config)
        PostprocessAdvantages.__init__(self)

        # Not needed for pure BC.
        if config["beta"] != 0.0:
            # Set up a torch-var for the squared moving avg. advantage norm.
            self._moving_average_sqd_adv_norm = torch.tensor(
                [config["moving_average_sqd_adv_norm_start"]],
                dtype=torch.float32,
                requires_grad=False,
            ).to(self.device)

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#2
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.agents.ppo.appo.DEFAULT_CONFIG, **config)

        # Although this is a no-op, we call __init__ here to make it clear
        # that base.__init__ will use the make_model() call.
        VTraceOptimizer.__init__(self)
        LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"])

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        EntropyCoeffSchedule.__init__(
            self, config["entropy_coeff"], config["entropy_coeff_schedule"]
        )
        ValueNetworkMixin.__init__(self, config)
        KLCoeffMixin.__init__(self, config)

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()

        # Initiate TargetNetwork ops after loss initialization.
        TargetNetworkMixin.__init__(self)
示例#3
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        config: AlgorithmConfigDict,
    ):
        config = dict(ray.rllib.algorithms.ddpg.ddpg.DDPGConfig().to_dict(), **config)

        # Create global step for counting the number of update operations.
        self.global_step = 0

        # Validate action space for DDPG
        validate_spaces(self, observation_space, action_space)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        ComputeTDErrorMixin.__init__(self)

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()

        TargetNetworkMixin.__init__(self)
示例#4
0
    def __init__(self, observation_space, action_space, config):

        config = dict(ray.rllib.algorithms.pg.PGConfig().to_dict(), **config)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#5
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.algorithms.maml.maml.DEFAULT_CONFIG, **config)
        validate_config(config)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        KLCoeffMixin.__init__(self, config)
        ValueNetworkMixin.__init__(self, config)

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#6
0
    def __init__(
        self,
        observation_space: gym.spaces.Space,
        action_space: gym.spaces.Space,
        config: TrainerConfigDict,
    ):

        self.target_model = None  # assign it in self.make_model
        self._is_action_discrete = isinstance(action_space, gym.spaces.Discrete)
        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )
        """
示例#7
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.agents.a3c.a3c.A3CConfig().to_dict(), **config)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )
        ValueNetworkMixin.__init__(self, config)
        LearningRateSchedule.__init__(self, config["lr"],
                                      config["lr_schedule"])
        EntropyCoeffSchedule.__init__(self, config["entropy_coeff"],
                                      config["entropy_coeff_schedule"])

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#8
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.agents.impala.impala.DEFAULT_CONFIG, **config)

        VTraceOptimizer.__init__(self)
        # Need to initialize learning rate variable before calling
        # TorchPolicyV2.__init__.
        LearningRateSchedule.__init__(self, config["lr"],
                                      config["lr_schedule"])
        EntropyCoeffSchedule.__init__(self, config["entropy_coeff"],
                                      config["entropy_coeff_schedule"])

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#9
0
    def __init__(self, observation_space, action_space, config):
        config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(),
                      **config)
        # TODO: Move into Policy API, if needed at all here. Why not move this into
        #  `PPOConfig`?.
        validate_config(config)

        TorchPolicyV2.__init__(
            self,
            observation_space,
            action_space,
            config,
            max_seq_len=config["model"]["max_seq_len"],
        )

        ValueNetworkMixin.__init__(self, config)
        LearningRateSchedule.__init__(self, config["lr"],
                                      config["lr_schedule"])
        EntropyCoeffSchedule.__init__(self, config["entropy_coeff"],
                                      config["entropy_coeff_schedule"])
        KLCoeffMixin.__init__(self, config)

        # TODO: Don't require users to call this manually.
        self._initialize_loss_from_dummy_batch()
示例#10
0
 def set_weights(self: TorchPolicyV2, weights):
     # Makes sure that whenever we restore weights for this policy's
     # model, we sync the target network (from the main model)
     # at the same time.
     TorchPolicyV2.set_weights(self, weights)
     self.update_target()