Пример #1
0
 def __init__(self, obs_dim, act_dim, size):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
     self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.done_buf = np.zeros(size, dtype=np.float32)
     self.ptr, self.size, self.max_size = 0, 0, size
Пример #2
0
 def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95):
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(size, dtype=np.float32)
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
Пример #3
0
    def reset(self):
        self.obs_buf = np.zeros(core.combined_shape(self.buffer_size, self.obs_dim), dtype=np.float32)
        self.act_buf = np.zeros(core.combined_shape(self.buffer_size, self.act_dim), dtype=np.float32)
        self.adv_buf = np.zeros(self.buffer_size, dtype=np.float32)
        self.rew_buf = np.zeros(self.buffer_size, dtype=np.float32)
        self.ret_buf = np.zeros(self.buffer_size, dtype=np.float32)
        self.val_buf = np.zeros(self.buffer_size, dtype=np.float32)
        self.logp_buf = np.zeros(self.buffer_size, dtype=np.float32)
        self.seq_len_buf = np.zeros(self.batch_size, dtype=np.int32)

        self.ptr, self.path_start_idx = 0, 0
Пример #4
0
 def __init__(self, obs_dim, act_dim, buffer_size, batch_size, gamma=0.99, lam=0.95):
     self.obs_dim = obs_dim
     self.act_dim = act_dim
     self.buffer_size = buffer_size
     self.batch_size = batch_size
     self.obs_buf = np.zeros(core.combined_shape(buffer_size, obs_dim), dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(buffer_size, act_dim), dtype=np.float32)
     self.adv_buf = np.zeros(buffer_size, dtype=np.float32)
     self.rew_buf = np.zeros(buffer_size, dtype=np.float32)
     self.ret_buf = np.zeros(buffer_size, dtype=np.float32)
     self.val_buf = np.zeros(buffer_size, dtype=np.float32)
     self.logp_buf = np.zeros(buffer_size, dtype=np.float32)
     self.seq_len_buf = np.zeros(batch_size, dtype=np.int32)
     self.gamma, self.lam = gamma, lam
     self.ptr, self.path_start_idx, self.max_size = 0, 0, buffer_size
Пример #5
0
 def __init__(self, obs_dim, act_dim, size, gamma=0.99, lam=0.95):
     # size 是批量大小. 这些变量用于在智能体与环境交互过程中保存记忆
     self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                             dtype=np.float32)
     self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                             dtype=np.float32)
     self.adv_buf = np.zeros(size, dtype=np.float32)  # advantage, 使用 GAE 计算
     self.rew_buf = np.zeros(size, dtype=np.float32)
     self.ret_buf = np.zeros(
         size, dtype=np.float32)  # target-value, critic使用的target
     self.val_buf = np.zeros(size, dtype=np.float32)
     self.logp_buf = np.zeros(size, dtype=np.float32)
     # 计算 GAE 使用的两个参数, gamma 和 lambda
     self.gamma, self.lam = gamma, lam
     # ptr 代表当前时间步, path_start_idx 代表初始时间步
     self.ptr, self.path_start_idx, self.max_size = 0, 0, size
Пример #6
0
    def __init__(self, obs_dim, act_dim, size):
        self.obs_buf = np.zeros(core.combined_shape(size, obs_dim),
                                dtype=np.float32)
        self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim),
                                 dtype=np.float32)
        self.act_buf = np.zeros(core.combined_shape(size, act_dim),
                                dtype=np.float32)
        self.rew_buf = np.zeros(size, dtype=np.float32)
        self.done_buf = np.zeros(size, dtype=np.float32)
        self.ptr, self.size, self.max_size = 0, 0, size

        #adding extra initializations for normalization
        self.obs_buf_max = np.zeros((self.obs_buf.shape[1], 1),
                                    dtype=np.float32)
        self.obs2_buf_max = np.zeros((self.obs2_buf.shape[1], 1),
                                     dtype=np.float32)
        self.rew_buf_max = np.zeros((1, 1), dtype=np.float32)

        self.obs_buf_min = np.zeros((self.obs_buf.shape[1], 1),
                                    dtype=np.float32)
        self.obs2_buf_min = np.zeros((self.obs2_buf.shape[1], 1),
                                     dtype=np.float32)
        self.rew_buf_min = np.zeros((1, 1), dtype=np.float32)