def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) warmup_steps = config["model"]["custom_options"].get( "warmup_steps", 100000) TransformerLearningRateSchedule.__init__( policy, config["model"]["custom_options"]["transformer"]["num_heads"], warmup_steps)
def setup_late_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict): """Call all mixin classes' constructors after APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy)
def setup_late_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict): """Call all mixin classes' constructors after APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy, obs_space, action_space, config) # Move target net to device (this is done automatically for the # policy.model, but not for any other models the policy has). policy.target_model = policy.target_model.to(policy.device)
def setup_late_mixins(policy, obs_space, action_space, config): KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_torch_mixins(policy, obs_space, action_space, config): # Copied from PPOTorchPolicy (w/o ValueNetworkMixin). TorchKLCoeffMixin.__init__(policy, config) TorchEntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) TorchLR.__init__(policy, config["lr"], config["lr_schedule"])