Exemplo n.º 1
0
 def _compute_reward(self, goal, obs):
     goal_pos, = goal
     qpos, = obs
     if at_goal(self._gripper_pos(qpos), goal_pos, self._geofence):
         return 1
     elif self._neg_reward:
         return -.0001
     else:
         return 0
Exemplo n.º 2
0
 def _compute_reward(self, goal, obs):
     pos = obs[0]
     if at_goal(pos, goal, self._geofence):
         return 1
     elif escaped(pos, self._world_upper_bound, self._world_lower_bound):
         return -1
     elif self._neg_reward:
         return -0.01
     else:
         return 0
Exemplo n.º 3
0
 def _compute_terminal(self, goal, obs):
     goal, = goal
     qpos, = obs
     return at_goal(self._gripper_pos(qpos), goal, self._geofence)
Exemplo n.º 4
0
 def _achieved_goal(self, goal, obs):
     goal_pos, (should_lift, ) = goal
     qpos, (fingers_touching, block_lifted) = obs
     _at_goal = at_goal(self._gripper_pos(qpos), goal_pos, self._geofence)
     return _at_goal and should_lift == (block_lifted and fingers_touching)
Exemplo n.º 5
0
 def at_goal(self, goal, new_obs):
     without_goal = new_obs[:-2]
     position_orientation = without_goal[-4:]
     position = position_orientation[:2]
     return at_goal(position, goal, self._geofence)
Exemplo n.º 6
0
 def _compute_terminal(self, goal, obs):
     goal, = goal
     pos = obs[0]
     return at_goal(pos, goal, self._geofence)