Пример #1
0
 def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
Пример #2
0
 def __init__(self, obs_dim, act_dim, size, gamma, lam, coeff):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                             dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                             dtype=np.float32)
     self.n_adv = len(lam)
     self.adv_coeff = np.expand_dims(coeff, axis=-1)
     self.adv_bufs = [
         np.zeros(size, dtype=np.float32) for _ in range(self.n_adv)
     ]
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size