示例#1
0
 def cal_gae_adv(self, lambda_, gamma):
     '''
     计算GAE优势估计
     adv = td(s) + gamma * lambda * (1 - done) * td(s')
     '''
     assert 'td_error' in self.buffer.keys()
     adv = np.asarray(
         sth.discounted_sum(self.buffer['td_error'], lambda_ * gamma, 0,
                            self.buffer['done']))
     self.buffer['gae_adv'] = list(standardization(adv))
示例#2
0
文件: trpo.py 项目: Abluceli/RLs
 def calculate_statistics(self):
     init_value = np.squeeze(
         self._get_value(self.s_, self.visual_s_).numpy())
     self.data['total_reward'] = sth.discounted_sum(self.data.r.values, 1,
                                                    init_value,
                                                    self.data.done.values)
     self.data['discounted_reward'] = sth.discounted_sum(
         self.data.r.values, self.gamma, init_value, self.data.done.values)
     self.data['td_error'] = sth.discounted_sum_minus(
         self.data.r.values, self.gamma, init_value, self.data.done.values,
         self.data.value.values)
     # GAE
     adv = np.asarray(
         sth.discounted_sum(self.data.td_error.values,
                            self.lambda_ * self.gamma, 0,
                            self.data.done.values))
     self.data['advantage'] = list(standardization(adv))