示例#1
0
文件: agent.py 项目: benzei/ray
 def compute_trajectory(self, gamma, lam, horizon):
     """Compute a single rollout on the agent and return."""
     trajectory = rollouts(
         self.common_policy,
         self.env, horizon, self.observation_filter, self.reward_filter)
     if self.config["use_gae"]:
         add_advantage_values(trajectory, gamma, lam, self.reward_filter)
     else:
         add_return_values(trajectory, gamma, self.reward_filter)
     return trajectory
示例#2
0
文件: agent.py 项目: xgong/ray
 def compute_trajectory(self, gamma, lam, horizon):
     trajectory = rollouts(
         self.common_policy,
         self.env, horizon, self.observation_filter, self.reward_filter)
     add_advantage_values(trajectory, gamma, lam, self.reward_filter)
     return trajectory