def __init__( self, size, obs_shape, act_shape, pi_info_shapes, gamma=0.99, lam=0.95, cost_gamma=0.99, cost_lam=0.95, ): self.obs_buf = np.zeros(combined_shape(size, obs_shape), dtype=np.float32) self.act_buf = np.zeros(combined_shape(size, act_shape), dtype=np.float32) self.adv_buf = np.zeros(size, dtype=np.float32) self.rew_buf = np.zeros(size, dtype=np.float32) self.ret_buf = np.zeros(size, dtype=np.float32) self.val_buf = np.zeros(size, dtype=np.float32) self.cadv_buf = np.zeros(size, dtype=np.float32) # cost advantage self.cost_buf = np.zeros(size, dtype=np.float32) # costs self.cret_buf = np.zeros(size, dtype=np.float32) # cost return self.cval_buf = np.zeros(size, dtype=np.float32) # cost value self.logp_buf = np.zeros(size, dtype=np.float32) self.pi_info_bufs = { k: np.zeros([size] + list(v), dtype=np.float32) for k, v in pi_info_shapes.items() } self.sorted_pi_info_keys = keys_as_sorted_list(self.pi_info_bufs) self.gamma, self.lam = gamma, lam self.cost_gamma, self.cost_lam = cost_gamma, cost_lam self.ptr, self.path_start_idx, self.max_size = 0, 0, size
def placeholder(dim=None): return tf.placeholder(dtype=tf.float32, shape=combined_shape(None, dim))