Exemplo n.º 1
0
 def run(self, agent, world):
     """
     Start a teaching episode for this task.
     Args:
         agent (pygazebo.Agent): the learning agent
         world (pygazebo.World): the simulation world
     """
     agent_sentence = yield
     goal = world.get_agent(self._goal_name)
     ball = world.get_agent('ball')
     goal_loc, dir = goal.get_pose()
     self._move_goal(ball, np.array(goal_loc))
     agent_loc, dir = agent.get_pose()
     ball_loc, _ = ball.get_pose()
     prev_dist = np.linalg.norm(
         np.array(ball_loc)[:2] - np.array(agent_loc)[:2])
     init_goal_dist = np.linalg.norm(
         np.array(ball_loc)[:2] - np.array(goal_loc)[:2])
     steps = 0
     hitted_ball = False
     while steps < self._max_steps:
         steps += 1
         if not hitted_ball:
             agent_loc, dir = agent.get_pose()
             if self._agent_name.find('icub') != -1:
                 # For agent icub, we need to use the average pos here
                 agent_loc = ICubAuxiliaryTask.get_icub_extra_obs(
                     self._agent)[:3]
             ball_loc, _ = ball.get_pose()
             dist = np.linalg.norm(
                 np.array(ball_loc)[:2] - np.array(agent_loc)[:2])
             # distance/step_time so that number is in m/s, trunk to target_speed
             progress_reward = min(self._target_speed,
                                   (prev_dist - dist) / self._step_time)
             prev_dist = dist
             if dist < 0.3:
                 dir = np.array([math.cos(dir[2]), math.sin(dir[2])])
                 goal_dir = (np.array(ball_loc[0:2]) -
                             np.array(agent_loc[0:2])) / dist
                 dot = sum(dir * goal_dir)
                 if dot > 0.707:
                     # within 45 degrees of the agent direction
                     hitted_ball = True
             agent_sentence = yield TeacherAction(reward=progress_reward)
         else:
             goal_loc, _ = goal.get_pose()
             ball_loc, _ = ball.get_pose()
             dist = np.linalg.norm(
                 np.array(ball_loc)[:2] - np.array(goal_loc)[:2])
             if dist < self._success_distance_thresh:
                 agent_sentence = yield TeacherAction(reward=100.0,
                                                      sentence="well done",
                                                      done=True)
             else:
                 agent_sentence = yield TeacherAction(
                     reward=self._target_speed + 3 - dist / init_goal_dist)
     yield TeacherAction(reward=-1.0, sentence="failed", done=True)
Exemplo n.º 2
0
 def run(self, agent, world):
     """
     Start a teaching episode for this task.
     Args:
         agent (pygazebo.Agent): the learning agent 
         world (pygazebo.World): the simulation world
     """
     agent_sentence = yield
     agent.reset()
     goal = world.get_agent(self._goal_name)
     loc, dir = agent.get_pose()
     loc = np.array(loc)
     self._move_goal(goal, loc)
     steps_since_last_reward = 0
     while steps_since_last_reward < self._max_steps:
         steps_since_last_reward += 1
         loc, dir = agent.get_pose()
         goal_loc, _ = goal.get_pose()
         loc = np.array(loc)
         goal_loc = np.array(goal_loc)
         dist = np.linalg.norm(loc - goal_loc)
         if dist < self._success_distance_thresh:
             # dir from get_pose is (roll, pitch, roll)
             dir = np.array([math.cos(dir[2]), math.sin(dir[2])])
             goal_dir = (goal_loc[0:2] - loc[0:2]) / dist
             dot = sum(dir * goal_dir)
             if dot > 0.707:
                 # within 45 degrees of the agent direction
                 logger.debug("loc: " + str(loc) + " goal: " +
                              str(goal_loc) + "dist: " + str(dist))
                 agent_sentence = yield TeacherAction(reward=1.0,
                                                      sentence="Well done!",
                                                      done=False)
                 steps_since_last_reward = 0
                 self._move_goal(goal, loc)
             else:
                 agent_sentence = yield TeacherAction()
         elif dist > self._initial_dist + self._fail_distance_thresh:
             logger.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                          "dist: " + str(dist))
             yield TeacherAction(reward=-1.0, sentence="Failed", done=True)
         else:
             agent_sentence = yield TeacherAction()
     logger.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                  "dist: " + str(dist))
     yield TeacherAction(reward=-1.0, sentence="Failed", done=True)
Exemplo n.º 3
0
 def run(self, agent, world):
     """
     Start a teaching episode for this task.
     Args:
         agent (pygazebo.Agent): the learning agent 
         world (pygazebo.World): the simulation world
     """
     agent_sentence = yield
     agent.reset()
     goal = world.get_agent(self._goal_name)
     loc, dir = agent.get_pose()
     loc = np.array(loc)
     self._move_goal(goal, loc)
     steps_since_last_reward = 0
     while steps_since_last_reward < self._max_steps:
         steps_since_last_reward += 1
         loc, dir = agent.get_pose()
         goal_loc, _ = goal.get_pose()
         loc = np.array(loc)
         goal_loc = np.array(goal_loc)
         dist = np.linalg.norm(loc - goal_loc)
         if dist < self._success_distance_thresh:
             logger.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                          "dist: " + str(dist))
             agent_sentence = yield TeacherAction(reward=10.0,
                                                  sentence="Well done!",
                                                  done=True)
             steps_since_last_reward = 0
             self._move_goal(goal, loc)
         else:
             if self._reward_shaping:
                 agent_sentence = yield TeacherAction(reward=-0.1 * dist /
                                                      self._random_range,
                                                      sentence="Failed",
                                                      done=False)
             else:
                 agent_sentence = yield TeacherAction()
     logger.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                  "dist: " + str(dist))
     yield TeacherAction(reward=-10.0, sentence="Failed", done=True)
Exemplo n.º 4
0
 def run(self):
     """ Start a teaching episode for this task. """
     agent_sentence = yield
     goal_loc, _ = self._goal.get_pose()
     reaching_loc, _ = self._agent.get_link_pose(self._agent.type +
                                                 self._reaching_link)
     self._move_goal(self._goal, np.array(reaching_loc))
     steps = 0
     while steps < self._max_steps:
         steps += 1
         reaching_loc, _ = self._agent.get_link_pose(self._agent.type +
                                                     self._reaching_link)
         goal_loc, _ = self._goal.get_pose()
         dist = np.linalg.norm(np.array(goal_loc) - np.array(reaching_loc))
         if dist < self._success_distance_thresh:
             agent_sentence = yield TeacherAction(reward=1.0,
                                                  sentence="well done",
                                                  done=True)
         else:
             reward = (-dist) if self._reward_shaping else 0
             agent_sentence = yield TeacherAction(reward=reward, done=False)
     yield TeacherAction(reward=-1.0, sentence="failed", done=True)
Exemplo n.º 5
0
 def run(self):
     """ Start a teaching episode for this task. """
     self._pre_agent_pos = self.get_icub_extra_obs(self._agent)[:3]
     agent_sentence = yield
     done = False
     # set icub random initial pose
     x = self._agent_init_pos[0] + random.random() * self._random_range
     y = self._agent_init_pos[1] + random.random() * self._random_range
     orient = (random.random() - 0.5) * np.pi
     if self._target_name and random.randint(0, 1) == 0:
         # a trick from roboschool humanoid flag run, important to learn to steer
         pos = np.array([x, y, 0.6])
         orient = self._get_angle_to_target(
             self._agent, pos, self._agent.type + '::root_link', np.pi)
     self._agent.set_pose((np.array([x, y, 0.6]), np.array([0, 0, orient])))
     while not done:
         # reward for not falling (alive reward)
         agent_height = np.array(
             self._agent.get_link_pose(self._agent.type + '::head'))[0][2]
         done = agent_height < 0.7  # fall down
         standing_reward = agent_height
         # movement cost, to avoid uncessary movements
         joint_pos = []
         for joint_name in self._joints:
             joint_state = self._agent.get_joint_state(joint_name)
             joint_pos.append(joint_state.get_positions())
         joint_pos = np.array(joint_pos).flatten()
         movement_cost = np.sum(np.abs(joint_pos)) / joint_pos.shape[0]
         # orientation cost, the agent should face towards the target
         if self._target_name:
             agent_pos = self.get_icub_extra_obs(self._agent)[:3]
             head_angle = self._get_angle_to_target(
                 self._agent, agent_pos, self._agent.type + '::head')
             root_angle = self._get_angle_to_target(
                 self._agent, agent_pos, self._agent.type + '::root_link')
             l_foot_angle = self._get_angle_to_target(
                 self._agent, agent_pos,
                 self._agent.type + '::l_leg::l_foot', np.pi)
             r_foot_angle = self._get_angle_to_target(
                 self._agent, agent_pos,
                 self._agent.type + '::r_leg::r_foot', np.pi)
             orient_cost = (np.abs(head_angle) + np.abs(root_angle) +
                            np.abs(l_foot_angle) + np.abs(r_foot_angle)) / 4
         else:
             orient_cost = 0
         # sum all
         reward = standing_reward - 0.5 * movement_cost - 0.2 * orient_cost
         agent_sentence = yield TeacherAction(reward=reward, done=done)
Exemplo n.º 6
0
 def run(self, agent, world):
     """
     Start a teaching episode for this task.
     Args:
         agent (pygazebo.Agent): the learning agent 
         world (pygazebo.World): the simulation world
     """
     self._pre_agent_pos = self.get_icub_extra_obs(agent)[:3]
     agent_sentence = yield
     done = False
     # set icub random initial pose
     x = self._agent_init_pos[0] + random.random() * self._random_range
     y = self._agent_init_pos[1] + random.random() * self._random_range
     orient = (random.random() - 0.5) * np.pi
     agent.set_pose(np.array([x, y, 0.6]), np.array([0, 0, orient]))
     while not done:
         # reward for not falling (alive reward)
         agent_height = np.array(agent.get_link_pose('iCub::head'))[0][2]
         done = agent_height < 0.7  # fall down
         standing_reward = agent_height
         # movement cost, to avoid uncessary movements
         joint_pos = []
         for joint_name in self._joints:
             joint_state = self._agent.get_joint_state(joint_name)
             joint_pos.append(joint_state.get_positions())
         joint_pos = np.array(joint_pos).flatten()
         movement_cost = np.sum(np.abs(joint_pos)) / joint_pos.shape[0]
         # orientation cost, the agent should face towards the target
         # only orientation of root link is not enough here
         agent_pos = self.get_icub_extra_obs(agent)[:3]
         head_angle = self._get_angle_to_target(agent_pos, 'iCub::head')
         root_angle = self._get_angle_to_target(agent_pos,
                                                'iCub::root_link')
         l_foot_angle = self._get_angle_to_target(agent_pos,
                                                  'iCub::l_leg::l_foot',
                                                  np.pi)
         r_foot_angle = self._get_angle_to_target(agent_pos,
                                                  'iCub::r_leg::r_foot',
                                                  np.pi)
         orient_cost = (np.abs(head_angle) + np.abs(root_angle) +
                        np.abs(l_foot_angle) + np.abs(r_foot_angle)) / 4
         # sum all
         reward = standing_reward - 0.5 * movement_cost - 0.2 * orient_cost
         agent_sentence = yield TeacherAction(reward=reward, done=done)
Exemplo n.º 7
0
 def run(self, agent, world):
     """
     Start a teaching episode for this task.
     Args:
         agent (pygazebo.Agent): the learning agent 
         world (pygazebo.World): the simulation world
     """
     pre_agent_pos = self.task_specific_observation()[:2]
     agent_sentence = yield
     done = False
     while not done:
         agent_height = np.array(agent.get_link_pose('iCub::head'))[0][2]
         done = agent_height < 0.68
         alive_reward = agent_height - 0.68
         joint_pos = []
         for joint_name in self._joints:
             joint_state = self._agent.get_joint_state(joint_name)
             joint_pos.append(joint_state.get_positions())
         joint_pos = np.array(joint_pos).flatten()
         movement_cost = np.sum(np.abs(joint_pos)) / joint_pos.shape[0]
         reward = 3.0 * alive_reward - 0.5 * movement_cost
         agent_sentence = yield TeacherAction(reward=reward, done=done)
Exemplo n.º 8
0
    def run(self, agent, world, distractions=None):
        """
        Start a teaching episode for this task.
        Args:
            agent (pygazebo.Agent): the learning agent
            world (pygazebo.World): the simulation world
        """
        agent_sentence = yield
        agent.reset()
        goal = world.get_agent(self._goal_name)
        loc, dir = agent.get_pose()
        loc = np.array(loc)
        self._move_goal(goal, loc)
        steps_since_last_reward = 0
        while steps_since_last_reward < self._max_steps:
            steps_since_last_reward += 1
            loc, dir = agent.get_pose()
            goal_loc, _ = goal.get_pose()
            loc = np.array(loc)
            goal_loc = np.array(goal_loc)
            dist = np.linalg.norm(loc - goal_loc)
            # dir from get_pose is (roll, pitch, roll)
            dir = np.array([math.cos(dir[2]), math.sin(dir[2])])
            goal_dir = (goal_loc[0:2] - loc[0:2]) / dist
            dot = sum(dir * goal_dir)

            distraction_penalty = 0
            if self._distraction_penalty_distance_thresh > 0 and distractions:
                for obj_name in distractions:
                    obj = world.get_agent(obj_name)
                    if obj:
                        obj_loc, obj_dir = obj.get_pose()
                        obj_loc = np.array(obj_loc)
                        distraction_dist = np.linalg.norm(loc - obj_loc)
                        if distraction_dist < self._distraction_penalty_distance_thresh:
                            distraction_penalty += self._distraction_penalty

            if dist < self._success_distance_thresh and dot > 0.707:
                # within 45 degrees of the agent direction
                reward = 1.0 - distraction_penalty
                self._push_reward_queue(reward)
                logging.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                              "dist: " + str(dist))
                agent_sentence = yield TeacherAction(
                    reward=reward, sentence="well done", done=False)
                steps_since_last_reward = 0
                self._move_goal(goal, loc)
            elif dist > self._initial_dist + self._fail_distance_thresh:
                reward = -1.0 - distraction_penalty
                self._push_reward_queue(0)
                logging.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                              "dist: " + str(dist))
                yield TeacherAction(reward=reward, sentence="failed", done=True)
            else:
                if self._sparse_reward:
                    reward = 0
                else:
                    reward = (self._prev_dist - dist) / self._initial_dist
                reward=reward - distraction_penalty
                self._push_reward_queue(reward)
                self._prev_dist = dist
                agent_sentence = yield TeacherAction(
                    reward=reward, sentence=self._goal_name)
        logging.debug("loc: " + str(loc) + " goal: " + str(goal_loc) +
                      "dist: " + str(dist))
        self._push_reward_queue(0)
        yield TeacherAction(reward=-1.0, sentence="failed", done=True)
Exemplo n.º 9
0
    def run(self):
        """ Start a teaching episode for this task. """
        agent_sentence = yield
        self._agent.reset()
        loc, agent_dir = self._agent.get_pose()
        loc = np.array(loc)
        self._random_move_objects()
        self.pick_goal()
        goal = self._world.get_model(self._goal_name)
        self._move_goal(goal, loc, agent_dir)
        steps_since_last_reward = 0
        prev_min_dist_to_distraction = 100
        while steps_since_last_reward < self._max_steps:
            steps_since_last_reward += 1
            loc, agent_dir = self._agent.get_pose()
            if self._agent.type.find('icub') != -1:
                # For agent icub, we need to use the average pos here
                loc = ICubAuxiliaryTask.get_icub_extra_obs(self._agent)[:3]
            goal_loc, _ = goal.get_pose()
            loc = np.array(loc)
            goal_loc = np.array(goal_loc)
            dist = np.linalg.norm(loc - goal_loc)
            # dir from get_pose is (roll, pitch, roll)
            dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])])
            goal_dir = (goal_loc[0:2] - loc[0:2]) / dist
            dot = sum(dir * goal_dir)

            distraction_penalty, prev_min_dist_to_distraction = (
                self._get_distraction_penalty(loc, dot,
                                              prev_min_dist_to_distraction))

            if dist < self._success_distance_thresh and (
                    not self._success_with_angle_requirement or dot > 0.707):
                # within 45 degrees of the agent direction
                reward = 1.0 - distraction_penalty
                self._push_reward_queue(max(reward, 0))
                logging.debug("yielding reward: " + str(reward))
                agent_sentence = yield TeacherAction(reward=reward,
                                                     sentence="well done",
                                                     done=False)
                steps_since_last_reward = 0
                if self._switch_goal_within_episode:
                    self.pick_goal()
                    goal = self._world.get_agent(self._goal_name)
                if self._move_goal_during_episode:
                    self._move_goal(goal, loc, agent_dir)
            elif dist > self._initial_dist + self._fail_distance_thresh:
                reward = -1.0 - distraction_penalty
                self._push_reward_queue(0)
                logging.debug("yielding reward: " + str(reward))
                yield TeacherAction(reward=reward,
                                    sentence="failed",
                                    done=True)
            else:
                if self._sparse_reward:
                    reward = 0
                else:
                    reward = (self._prev_dist - dist) / self._initial_dist
                reward = reward - distraction_penalty
                if distraction_penalty > 0:
                    logging.debug("yielding reward: " + str(reward))
                    self._push_reward_queue(0)
                self._prev_dist = dist
                agent_sentence = yield TeacherAction(reward=reward,
                                                     sentence=self._goal_name)
        reward = -1.0
        logging.debug("yielding reward: " + str(reward))
        self._push_reward_queue(0)
        if self.should_use_curriculum_training():
            logging.debug("reward queue len: {}, sum: {}".format(
                str(len(self._q)), str(sum(self._q))))
        yield TeacherAction(reward=reward, sentence="failed", done=True)