def _build_normalizer_val(self): mean = 0.0 std = 1.0 / (1.0 - self._discount) norm = normalizer.Normalizer(sess=self._sess, scope="val_norm", size=1, init_mean=mean, init_std=std) return norm
def _build_normalizer_action(self): size = self.get_action_size() high = self.get_action_bound_max().copy() low = self.get_action_bound_min().copy() inf_mask = np.logical_or((high >= np.finfo(np.float32).max), (low <= np.finfo(np.float32).min)) high[inf_mask] = 1.0 low[inf_mask] = -1.0 mean = 0.5 * (high + low) std = 0.5 * (high - low) norm = normalizer.Normalizer(sess=self._sess, scope="a_norm", size=size, init_mean=mean, init_std=std) return norm