Exemplo n.º 1
0
    def __init__(self,
                 observation_space,
                 action_space,
                 net,
                 rew_func,
                 n_samples=1000,
                 horizon=20,
                 mean_obs=0.,
                 std_obs=1.,
                 mean_acs=0.,
                 std_acs=1.,
                 rnn=False,
                 normalize_ac=True):
        BasePol.__init__(self,
                         observation_space,
                         action_space,
                         net,
                         rnn=rnn,
                         normalize_ac=normalize_ac)
        self.rew_func = rew_func
        self.n_samples = n_samples
        self.horizon = horizon
        self.to(get_device())

        self.mean_obs = torch.tensor(mean_obs,
                                     dtype=torch.float).repeat(n_samples, 1)
        self.std_obs = torch.tensor(std_obs,
                                    dtype=torch.float).repeat(n_samples, 1)
        self.mean_acs = torch.tensor(mean_acs,
                                     dtype=torch.float).repeat(n_samples, 1)
        self.std_acs = torch.tensor(std_acs,
                                    dtype=torch.float).repeat(n_samples, 1)
Exemplo n.º 2
0
 def __init__(self, ob_space, ac_space, qfunc, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0, eps=0.2):
     BasePol.__init__(self, ob_space, ac_space, None, rnn,
                      normalize_ac, data_parallel, parallel_dim)
     self.qfunc = qfunc
     self.eps = eps
     self.a_i_shape = (1, )
     self.to(get_device())
Exemplo n.º 3
0
 def __init__(self,
              observation_space,
              action_space,
              net,
              normalize_ac=True):
     BasePol.__init__(self, observation_space, action_space, normalize_ac)
     self.net = net
     self.pd = MixtureGaussianPd()
     self.to(get_device())
Exemplo n.º 4
0
 def __init__(self,
              observation_space,
              action_space,
              net,
              rnn=False,
              normalize_ac=True):
     BasePol.__init__(self, observation_space, action_space, net, rnn,
                      normalize_ac)
     self.pd = GaussianPd()
     self.to(get_device())
Exemplo n.º 5
0
 def __init__(self,
              observation_space,
              action_space,
              net,
              rnn=False,
              normalize_ac=True):
     BasePol.__init__(self, observation_space, action_space, net, rnn,
                      normalize_ac)
     self.pd = MultiCategoricalPd()
     self.to(get_device())
Exemplo n.º 6
0
 def __init__(self,
              observation_space,
              action_space,
              net,
              rnn=False,
              normalize_ac=True,
              data_parallel=False,
              parallel_dim=0):
     BasePol.__init__(self, observation_space, action_space, net, rnn,
                      normalize_ac, data_parallel, parallel_dim)
     self.pd = GaussianPd()
     self.to(get_device())
Exemplo n.º 7
0
 def __init__(self,
              ob_space,
              ac_space,
              net,
              rnn=False,
              normalize_ac=True,
              data_parallel=False,
              parallel_dim=0):
     BasePol.__init__(self, ob_space, ac_space, net, rnn, normalize_ac,
                      data_parallel, parallel_dim)
     self.pd = MultiCategoricalPd()
     self.to(get_device())
Exemplo n.º 8
0
 def __init__(self,
              observation_space,
              action_space,
              qfunc,
              rnn=False,
              normalize_ac=True,
              eps=0.2):
     BasePol.__init__(self, observation_space, action_space, None, rnn,
                      normalize_ac)
     self.qfunc = qfunc
     self.eps = eps
     self.a_i_shape = (1, )
     self.to(get_device())
Exemplo n.º 9
0
 def __init__(self,
              observation_space,
              action_space,
              net=None,
              rnn=False,
              normalize_ac=True,
              data_parallel=False,
              parallel_dim=0):
     BasePol.__init__(self,
                      observation_space,
                      action_space,
                      net,
                      rnn=rnn,
                      normalize_ac=normalize_ac,
                      data_parallel=data_parallel,
                      parallel_dim=parallel_dim)
Exemplo n.º 10
0
    def __init__(self, ob_space, ac_space, net, rew_func, n_samples=1000, horizon=20,
                 mean_obs=0., std_obs=1., mean_acs=0., std_acs=1., rnn=False,
                 normalize_ac=True, data_parallel=False, parallel_dim=0):
        BasePol.__init__(self, ob_space, ac_space, net, rnn=rnn, normalize_ac=normalize_ac,
                         data_parallel=data_parallel, parallel_dim=parallel_dim)
        self.rew_func = rew_func
        self.n_samples = n_samples
        self.horizon = horizon
        self.to(get_device())

        self.mean_obs = torch.tensor(
            mean_obs, dtype=torch.float).repeat(n_samples, 1)
        self.std_obs = torch.tensor(
            std_obs, dtype=torch.float).repeat(n_samples, 1)
        self.mean_acs = torch.tensor(
            mean_acs, dtype=torch.float).repeat(n_samples, 1)
        self.std_acs = torch.tensor(
            std_acs, dtype=torch.float).repeat(n_samples, 1)
Exemplo n.º 11
0
 def __init__(self,
              observation_space,
              action_space,
              net,
              noise=None,
              rnn=False,
              normalize_ac=True):
     if rnn:
         raise ValueError(
             'rnn with DeterministicActionNoisePol is not supported now')
     BasePol.__init__(self,
                      observation_space,
                      action_space,
                      net,
                      rnn=rnn,
                      normalize_ac=normalize_ac)
     self.noise = noise
     self.pd = DeterministicPd()
     self.to(get_device())
 def __init__(self,
              ob_space,
              ac_space,
              net,
              noise=None,
              rnn=False,
              normalize_ac=True,
              data_parallel=False,
              parallel_dim=0):
     if rnn:
         raise ValueError(
             'rnn with DeterministicActionNoisePol is not supported now')
     BasePol.__init__(self,
                      ob_space,
                      ac_space,
                      net,
                      rnn=rnn,
                      normalize_ac=normalize_ac,
                      data_parallel=data_parallel,
                      parallel_dim=parallel_dim)
     self.noise = noise
     self.pd = DeterministicPd()
     self.to(get_device())
Exemplo n.º 13
0
 def __init__(self, observation_space, action_space, net=None, rnn=False, normalize_ac=True):
     BasePol.__init__(self, observation_space, action_space, net, rnn=rnn,
                      normalize_ac=normalize_ac)