示例#1
0
 def __init__(self, *args, td3_kwargs, her_kwargs, base_kwargs, **kwargs):
     HERTrainer.__init__(
         self,
         **her_kwargs,
     )
     TD3.__init__(self, *args, **kwargs, **td3_kwargs, **base_kwargs)
     assert isinstance(self.replay_buffer, ObsDictRelabelingBuffer)
示例#2
0
 def __init__(self,
              env,
              qf1,
              qf2,
              exploration_policy,
              td3_kwargs,
              tdm_kwargs,
              base_kwargs,
              policy=None,
              eval_policy=None,
              replay_buffer=None,
              optimizer_class=optim.Adam,
              use_policy_saturation_cost=False,
              **kwargs):
     TD3.__init__(self,
                  env=env,
                  qf1=qf1,
                  qf2=qf2,
                  policy=policy,
                  exploration_policy=exploration_policy,
                  replay_buffer=replay_buffer,
                  eval_policy=eval_policy,
                  optimizer_class=optimizer_class,
                  **td3_kwargs,
                  **base_kwargs)
     super().__init__(**tdm_kwargs)
     self.use_policy_saturation_cost = use_policy_saturation_cost
示例#3
0
 def __init__(
         self,
         *args,
         td3_kwargs,
         her_kwargs,
         base_kwargs,
         **kwargs
 ):
     HER.__init__(
         self,
         **her_kwargs,
     )
     TD3.__init__(self, *args, **kwargs, **td3_kwargs, **base_kwargs)
     assert isinstance(
         self.replay_buffer, SimpleHerReplayBuffer
     ) or isinstance(
         self.replay_buffer, RelabelingReplayBuffer
     ) or isinstance(
         self.replay_buffer, ObsDictRelabelingBuffer
     )