def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.logp_buf = np.zeros(size, dtype=np.float32) self.gamma, self.lam = gamma, lam self.ptr, self.path_start_idx, self.max_size = 0, 0, size
def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95): self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.logp_buf = np.zeros(size, dtype=np.float32) self.gamma, self.lam = gamma, lam self.device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") self.device = torch.device("cpu") self.ptr, self.path_start_idx, self.max_size = 0, 0, size
def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95, hierstep=3): self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.init_obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.goal_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.count_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.logp_buf = np.zeros(size, dtype=np.float32) self.obshi_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.acthi_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32) self.advhi_buf = np.zeros(size, dtype=np.float32) self.rewhi_buf = np.zeros(size, dtype=np.float32) self.rethi_buf = np.zeros(size, dtype=np.float32) self.valhi_buf = np.zeros(size, dtype=np.float32) self.logphi_buf = np.zeros(size, dtype=np.float32) self.gamma, self.lam, self.hierstep = gamma, lam, hierstep self.ptr, self.path_start_idx, self.max_size = 0, 0, size self.ptrhi, self.path_start_idxhi = 0, 0