Пример #1
0
 def __init__(self,
              env,
              max_steps,
              target=None,
              agent_init_pos=(0, 0),
              agent_pos_random_range=0,
              reward_weight=1.0):
     """
     Args:
         env (gym.Env): an instance of Environment
         max_steps (int): episode will end in so many steps
         reward_weight (float): the weight of the reward, should be tuned
             accroding to reward range of other tasks 
         target (string): this is the target icub should face towards, since
             you may want the agent interact with something
         agent_init_pos (tuple): the expected initial position of the agent
         pos_random_range (float): random range of the initial position
     """
     super().__init__(env=env,
                      max_steps=max_steps,
                      reward_weight=reward_weight)
     self.task_vocab = ['icub']
     self._target_name = target
     self._pre_agent_pos = np.array([0, 0, 0], dtype=np.float32)
     self._agent_init_pos = agent_init_pos
     self._random_range = agent_pos_random_range
     if self._target_name:
         self._target = self._world.get_model(self._target_name)
     with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
               'r') as cfg_file:
         agent_cfgs = json.load(cfg_file)
     self._joints = agent_cfgs[self._agent.type]['control_joints']
Пример #2
0
 def setup(self, world, agent_name):
     """
     Setting things up during the initialization
     """
     super().setup(world, agent_name)
     agent_cfgs = json.load(
         open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
              'r'))
     self._joints = agent_cfgs[self._agent_name]['control_joints']
Пример #3
0
 def setup(self, world, agent_name):
     """
     Setting things up during the initialization
     """
     super().setup(world, agent_name)
     if self._target_name:
         self._target = world.get_agent(self._target_name)
     with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
             'r') as cfg_file:
         agent_cfgs = json.load(cfg_file)
     self._joints = agent_cfgs[self._agent_name]['control_joints']
Пример #4
0
 def test_play_ground(self):
     with_language = True
     with_agent_language = False
     agents = [
         'pioneer2dx_noplugin', 'pr2_noplugin', 'icub', 'icub_with_hands',
         'youbot_noplugin'
     ]
     with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
               'r') as cfg_file:
         agent_cfgs = json.load(cfg_file)
     tasks = [GoalTask, KickingBallTask, Reaching3D, PickAndPlace, Stack]
     for task in tasks:
         for agent_type in task.compatible_agents:
             for with_language in [True, False]:
                 for use_image_obs in [True, False]:
                     for image_with_internal_states in [True, False]:
                         agent_cfg = agent_cfgs[agent_type]
                         test_tasks = [task]
                         if agent_type.find('icub') != -1:
                             test_tasks.append(ICubAuxiliaryTask)
                         if agent_cfg[
                                 'camera_sensor'] == '' and use_image_obs:
                             continue
                         logging.info("Testing Case: Agent " + agent_type +
                                      ", Task " + str(test_tasks) +
                                      ", UseImage: " + str(use_image_obs))
                         env = PlayGround(
                             with_language=with_language,
                             with_agent_language=with_agent_language,
                             use_image_observation=use_image_obs,
                             image_with_internal_states=
                             image_with_internal_states,
                             agent_type=agent_type,
                             tasks=test_tasks)
                         step_cnt = 0
                         last_done_time = time.time()
                         while step_cnt < 100 and (time.time() -
                                                   last_done_time) < 5:
                             actions = env._control_space.sample()
                             if with_agent_language:
                                 actions = dict(control=actions,
                                                sentence="hello")
                             env.step(actions)
                             step_cnt += 1
                         step_per_sec = step_cnt / (time.time() -
                                                    last_done_time)
                         logging.info("Test Passed, FPS: " +
                                      str(step_per_sec))
                         env.close()
                         gazebo.close()
Пример #5
0
 def test_grocery(self):
     with_language = True
     agents = [
         'pioneer2dx_noplugin', 'pr2_noplugin', 'icub', 'icub_with_hands',
         'youbot_noplugin'
     ]
     tasks = ['goal', 'kickball']
     with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
               'r') as cfg_file:
         agent_cfgs = json.load(cfg_file)
     for agent_type in agents:
         for task_name in tasks:
             for use_image_obs in [True, False]:
                 agent_cfg = agent_cfgs[agent_type]
                 if agent_cfg['camera_sensor'] == '' and use_image_obs:
                     continue
                 logging.info("Testing Case: Agent " + agent_type +
                              ", Task " + task_name + ", UseImage: " +
                              str(use_image_obs))
                 env = GroceryGround(with_language=with_language,
                                     use_image_observation=use_image_obs,
                                     image_with_internal_states=True,
                                     agent_type=agent_type,
                                     task_name=task_name)
                 step_cnt = 0
                 last_done_time = time.time()
                 while step_cnt < 500 and (time.time() -
                                           last_done_time) < 10:
                     actions = env._control_space.sample()
                     if with_language:
                         actions = dict(control=actions, sentence="hello")
                     env.step(actions)
                     step_cnt += 1
                 step_per_sec = step_cnt / (time.time() - last_done_time)
                 logging.info("Test Passed, FPS: " + str(step_per_sec))
                 env.close()
Пример #6
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 task_name='goal',
                 agent_type='pioneer2dx_noplugin',
                 world_time_precision=None,
                 step_time=0.1,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 image_data_format='channels_last',
                 vocab_sequence_length=20):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            task_name (string): the teacher task, now there are 2 tasks,
                a simple goal task: 'goal'
                a simple kicking ball task: 'kickball'
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            world_time_precision (float|None): if not none, the time precision of
                simulator, i.e., the max_step_size defined in the agent cfg file, will be
                override. e.g., '0.002' for a 2ms sim step
            step_time (float): the peroid of one step of the environment.
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            image_data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """

        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        sub_steps = int(round(step_time / world_time_precision))
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision)
        ]

        super(GroceryGround, self).__init__(world_string=world_string,
                                            world_config=sim_time_cfg,
                                            port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        if task_name is None or task_name == 'goal':
            main_task = GroceryGroundGoalTask()
        elif task_name == 'kickball':
            main_task = GroceryGroundKickBallTask(step_time=step_time)
        else:
            logging.debug("upsupported task name: " + task_name)

        main_task_group = TaskGroup()
        main_task_group.add_task(main_task)
        self._teacher.add_task_group(main_task_group)
        if agent_type.find('icub') != -1:
            icub_aux_task_group = TaskGroup()
            icub_standing_task = ICubAuxiliaryTask(step_time=step_time)
            icub_aux_task_group.add_task(icub_standing_task)
            self._teacher.add_task_group(icub_aux_task_group)
        self._teacher._build_vocab_from_tasks()
        self._seq_length = vocab_sequence_length
        if self._teacher.vocab_size:
            # using MultiDiscrete instead of DiscreteSequence so gym
            # _spec_from_gym_space won't complain.
            self._sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._sub_steps = sub_steps

        self._world.step(20)
        self._agent = self._world.get_agent()
        for task_group in self._teacher.get_task_groups():
            for task in task_group.get_tasks():
                task.setup(self._world, agent_type)

        logging.debug(self._world.info())
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._action_cost = action_cost
        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert image_data_format in ('channels_first', 'channels_last')
        self._data_format = image_data_format
        self._resized_image_size = resized_image_size
        self._substep_time = world_time_precision

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space

        self.reset()
        obs_sample = self._get_observation_with_sentence("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)
Пример #7
0
    def __init__(self,
                 agent_type='pioneer2dx_noplugin',
                 world_name="play_ground.world",
                 tasks=[GoalTask],
                 with_language=False,
                 with_agent_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 max_steps=200,
                 world_time_precision=None,
                 step_time=0.1,
                 real_time_update_rate=0,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 vocab_sequence_length=20,
                 action_wrapper=None):
        """
        Args:
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now
                note that 'agent_type' should be exactly the same string as the model's
                name at the beginning of model's sdf file
            world_name (string): Select the world file, e.g., empty.world, play_ground.world,
                grocery_ground.world
            tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask
            with_language (bool): The observation will be a dict with an extra sentence
            with_agent_language (bool): Include agent sentence in action space.
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            max_steps (int): episode will end in so many steps, will be passed to the tasks
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            world_time_precision (float|None): this parameter depends on the agent.
                if not none, the default time precision of simulator, i.e., the max_step_size
                defined in the agent cfg file, will be override. Note that pr2 and iCub
                requires a max_step_size <= 0.001, otherwise cannot train a successful policy.
            step_time (float): the peroid of one step() function of the environment in simulation.
                step_time is rounded to multiples of world_time_precision
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for the tasks need higher control frequency (such as the
                tasks need walking by 2 legs), using a smaller step_time like 0.05 is better.
                experiments show that iCub can not learn how to walk in a 0.1 step_time
            real_time_update_rate (int): max update rate per second. There is no limit if
                this is set to 0 as default in the world file. If 1:1 real time is prefered
                (like playing or recording video), it should be set to 1.0/world_time_precision.
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            vocab_sequence_length (int): the length of encoded sequence
            action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has
                redundant dimensions or offset. If not None, this is used to transform the agent
                actions. See ActionWrapper of gazebo_agent.py for example.
        """

        self._action_cost = action_cost
        self._with_language = with_language
        self._seq_length = vocab_sequence_length
        self._with_agent_language = with_language and with_agent_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states

        # Load agent and world file
        with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                  'r') as cfg_file:
            agent_cfgs = json.load(cfg_file)
        agent_cfg = agent_cfgs[agent_type]
        wd_path = os.path.join(social_bot.get_world_dir(), world_name)
        with open(wd_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        self._sub_steps = int(round(step_time / world_time_precision))
        self._step_time = world_time_precision * self._sub_steps
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision),
            "//physics//real_time_update_rate=" + str(real_time_update_rate)
        ]
        super().__init__(world_string=world_string,
                         world_config=sim_time_cfg,
                         port=port)
        logging.debug(self._world.info())

        # Setup agent
        self._agent = GazeboAgent(
            world=self._world,
            agent_type=agent_type,
            config=agent_cfg,
            with_language=with_language,
            with_agent_language=with_agent_language,
            vocab_sequence_length=self._seq_length,
            use_image_observation=use_image_observation,
            resized_image_size=resized_image_size,
            image_with_internal_states=image_with_internal_states,
            action_wrapper=action_wrapper)

        # Setup teacher and tasks
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        self._has_goal_task = False
        for task in tasks:
            if task == GoalTask:
                self._has_goal_task = True
            task_group = TaskGroup()
            task_group.add_task(task(env=self, max_steps=max_steps))
            self._teacher.add_task_group(task_group)
        self._teacher._build_vocab_from_tasks()

        # Setup action space and observation space
        if self._teacher.vocab_size:
            sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._agent.set_sentence_space(sentence_space)
        self._control_space = self._agent.get_control_space()
        self.action_space = self._agent.get_action_space()
        self.reset()
        self.observation_space = self._agent.get_observation_space(
            self._teacher)
Пример #8
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 random_goal=False,
                 agent_type='pioneer2dx_noplugin',
                 max_steps=200,
                 port=None,
                 resized_image_size=(64, 64),
                 data_format='channels_last'):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as 
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            random_goal (bool): If ture, teacher will randomly select goal from the 
                object list each episode
            agent_type (string): Select the agent robot, supporting pr2_noplugin, 
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            port: Gazebo port, need to specify when run multiple environment in parallel
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """
        super(GroceryGround, self).__init__(port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps,
                                                   success_distance_thresh=0.5,
                                                   fail_distance_thresh=3.0,
                                                   reward_shaping=True,
                                                   random_goal=random_goal,
                                                   random_range=10.0)
        task_group.add_task(self._teacher_task)
        self._teacher.add_task_group(task_group)
        self._teacher.build_vocab_from_tasks()
        self._seq_length = 20
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        self._object_list = self._teacher_task.get_object_list()
        self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
        self._pos_list.remove((0, 0))

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        self._world = gazebo.new_world_from_string(world_string)
        self._world.step(20)
        self._insert_objects(self._object_list)
        self._agent = self._world.get_agent()
        logging.debug(self._world.info())
        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert data_format in ('channels_first', 'channels_last')
        self._data_format = data_format
        self._resized_image_size = resized_image_size
        self._random_goal = random_goal

        self.reset()
        obs_sample = self._get_observation("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space