def cal_gae_adv(self, lambda_, gamma): ''' 计算GAE优势估计 adv = td(s) + gamma * lambda * (1 - done) * td(s') ''' assert 'td_error' in self.buffer.keys() adv = np.asarray( sth.discounted_sum(self.buffer['td_error'], lambda_ * gamma, 0, self.buffer['done'])) self.buffer['gae_adv'] = list(standardization(adv))
def calculate_statistics(self): init_value = np.squeeze( self._get_value(self.s_, self.visual_s_).numpy()) self.data['total_reward'] = sth.discounted_sum(self.data.r.values, 1, init_value, self.data.done.values) self.data['discounted_reward'] = sth.discounted_sum( self.data.r.values, self.gamma, init_value, self.data.done.values) self.data['td_error'] = sth.discounted_sum_minus( self.data.r.values, self.gamma, init_value, self.data.done.values, self.data.value.values) # GAE adv = np.asarray( sth.discounted_sum(self.data.td_error.values, self.lambda_ * self.gamma, 0, self.data.done.values)) self.data['advantage'] = list(standardization(adv))