示例#1
0
    def __init__(self,
                 world_file=None,
                 world_string=None,
                 world_config=None,
                 port=None,
                 quiet=False):
        """
        Args:
             world_file (str|None): world file path
             world_string (str|None): world xml string content,
             world_config (list[str]): list of str config `key=value`
                see `_xpath_modify_xml` for details
             port (int): Gazebo port
             quiet (bool) Set quiet output
        """
        if port is None:
            port = 0
        self._port = port
        # to avoid different parallel simulation has the same randomness
        random.seed(port)
        self._rendering_process = None
        self._rendering_camera = None
        # the default camera pose for rendering rgb_array, could be override
        self._rendering_cam_pose = "10 -10 6 0 0.4 2.4"
        gazebo.initialize(port=port, quiet=quiet)

        if world_file:
            world_file_abs_path = os.path.join(social_bot.get_world_dir(),
                                               world_file)
            world_string = gazebo.world_sdf(world_file_abs_path)

        if world_config:
            world_string = _xpath_modify_xml(world_string, world_config)

        self._world = gazebo.new_world_from_string(world_string)
示例#2
0
    def __init__(self, x_threshold=5, theta_threshold=0.314, noise=0.001):
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(), "cartpole.world"))

        self._agent = self._world.get_agent()
        logger.info("joint names: %s" % self._agent.get_joint_names())
        self._x_threshold = x_threshold
        self._theta_threshold = theta_threshold
        high = np.array([
            self._x_threshold * 2,
            np.finfo(np.float32).max, self._theta_threshold * 2,
            np.finfo(np.float32).max
        ])
        self.noise = noise
        self.action_space = spaces.Box(-1, 1, shape=(1, ), dtype='float32')
        self.observation_space = spaces.Box(-high, high, dtype=np.float32)
        self._world.info()
    def __init__(self, with_language=True, port=None):
        if port is None:
            port = 0
        gazebo.initialize(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(),
                         "pioneer2dx_camera.world"))
        self._agent = self._world.get_agent()
        assert self._agent is not None
        logger.info("joint names: %s" % self._agent.get_joint_names())
        self._joint_names = self._agent.get_joint_names()
        self._teacher = teacher.Teacher(False)
        task_group = teacher.TaskGroup()
        task_group.add_task(GoalTask())
        self._teacher.add_task_group(task_group)
        self._with_language = with_language

        # get observation dimension
        image = self._agent.get_camera_observation("camera")
        image = np.array(image, copy=False)
        if with_language:
            self._observation_space = gym.spaces.Dict(
                image=gym.spaces.Box(low=0,
                                     high=1,
                                     shape=image.shape,
                                     dtype=np.uint8),
                sentence=DiscreteSequence(256, 20))

            self._action_space = gym.spaces.Dict(
                control=gym.spaces.Box(low=-0.2,
                                       high=0.2,
                                       shape=[len(self._joint_names)],
                                       dtype=np.float32),
                sentence=DiscreteSequence(256, 20))
        else:
            self._observation_space = gym.spaces.Box(low=0,
                                                     high=1,
                                                     shape=image.shape,
                                                     dtype=np.uint8)
            self._action_space = gym.spaces.Box(low=-0.2,
                                                high=0.2,
                                                shape=[len(self._joint_names)],
                                                dtype=np.float32)
示例#4
0
    def __init__(self,
                 world_file=None,
                 world_string=None,
                 world_config=None,
                 sim_time_precision=0.001,
                 port=None,
                 quiet=False):
        """
        Args:
             world_file (str|None): world file path
             world_string (str|None): world xml string content,
             world_config (list[str]): list of str config `key=value`
                see `_modify_world_xml` for details
             sim_time_precision (float): the time precision of the simulator
             port (int): Gazebo port
             quiet (bool) Set quiet output
        """
        os.environ["GAZEBO_MODEL_DATABASE_URI"] = ""
        if port is None:
            port = 0
        self._port = port
        # This avoids different parallel simulations having the same randomness.
        # When calling from alf, alf.environments.utils.create_environment calls
        # env.seed() afterwards, which is the real seed being used.  Not this one.
        random.seed(port)
        self._rendering_process = None
        self._rendering_camera = None
        # the default camera pose for rendering rgb_array, could be override
        self._rendering_cam_pose = "10 -10 6 0 0.4 2.4"
        gazebo.initialize(port=port, quiet=quiet)

        if world_file:
            world_file_abs_path = os.path.join(social_bot.get_world_dir(),
                                               world_file)
            world_string = gazebo.world_sdf(world_file_abs_path)

        if world_config:
            world_string = _modify_world_xml(world_string, world_config)

        self._world = gazebo.new_world_from_string(world_string)
示例#5
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 task_name='goal',
                 agent_type='pioneer2dx_noplugin',
                 world_time_precision=None,
                 step_time=0.1,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 image_data_format='channels_last',
                 vocab_sequence_length=20):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            task_name (string): the teacher task, now there are 2 tasks,
                a simple goal task: 'goal'
                a simple kicking ball task: 'kickball'
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            world_time_precision (float|None): if not none, the time precision of
                simulator, i.e., the max_step_size defined in the agent cfg file, will be
                override. e.g., '0.002' for a 2ms sim step
            step_time (float): the peroid of one step of the environment.
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            image_data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """

        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        sub_steps = int(round(step_time / world_time_precision))
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision)
        ]

        super(GroceryGround, self).__init__(world_string=world_string,
                                            world_config=sim_time_cfg,
                                            port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        if task_name is None or task_name == 'goal':
            main_task = GroceryGroundGoalTask()
        elif task_name == 'kickball':
            main_task = GroceryGroundKickBallTask(step_time=step_time)
        else:
            logging.debug("upsupported task name: " + task_name)

        main_task_group = TaskGroup()
        main_task_group.add_task(main_task)
        self._teacher.add_task_group(main_task_group)
        if agent_type.find('icub') != -1:
            icub_aux_task_group = TaskGroup()
            icub_standing_task = ICubAuxiliaryTask(step_time=step_time)
            icub_aux_task_group.add_task(icub_standing_task)
            self._teacher.add_task_group(icub_aux_task_group)
        self._teacher._build_vocab_from_tasks()
        self._seq_length = vocab_sequence_length
        if self._teacher.vocab_size:
            # using MultiDiscrete instead of DiscreteSequence so gym
            # _spec_from_gym_space won't complain.
            self._sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._sub_steps = sub_steps

        self._world.step(20)
        self._agent = self._world.get_agent()
        for task_group in self._teacher.get_task_groups():
            for task in task_group.get_tasks():
                task.setup(self._world, agent_type)

        logging.debug(self._world.info())
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._action_cost = action_cost
        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert image_data_format in ('channels_first', 'channels_last')
        self._data_format = image_data_format
        self._resized_image_size = resized_image_size
        self._substep_time = world_time_precision

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space

        self.reset()
        obs_sample = self._get_observation_with_sentence("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)
示例#6
0
    def __init__(self,
                 goal_name='beer',
                 max_steps=100,
                 reward_shaping=True,
                 motion_loss=0.0000,
                 use_internal_states_only=True,
                 port=None):
        """
        Args:
            goal_name (string): name of the object to lift off ground
            max_steps (int): episode will end when the agent exceeds the number of steps.
            reward_shaping (boolean): whether it adds distance based reward shaping.
            motion_loss (float): if not zero, it will add -motion_loss * || V_{all_joints} ||^2 in
                 the reward when episode ends.
            use_internal_states_only (boolean): whether to only use internal states (joint positions
                  and velocities) and goal positions, and not use camera sensors.
            port: Gazebo port, need to specify when run multiple environment in parallel
        """

        if port is None:
            port = 0
        gazebo.initialize(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(), "pr2.world"))
        self._agent = self._world.get_agent()
        #logger.info("joint names: %s" % self._agent.get_joint_names())

        self._all_joints = self._agent.get_joint_names()

        # to avoid different parallel simulation has the same randomness
        random.seed(port)

        #self._world.info()

        # passive joints are joints that could move but have no actuators, are only indirectly
        # controled by the motion of active joints.
        # Though in the simulation, we could "control" through API, we chose to be more realistic.
        # from https://github.com/PR2/pr2_mechanism/blob/melodic-devel/pr2_mechanism_model/pr2.urdf
        passive_joints = set(["r_gripper_l_finger_joint",
                              "r_gripper_r_finger_joint",
                              "r_gripper_r_finger_tip_joint",
                              "r_gripper_l_finger_tip_joint",
                              "r_gripper_r_parallel_root_joint",
                              "r_gripper_l_parallel_root_joint"])

        # PR2 right arm has 7 DOF, gripper has 1 DoF, as specified on p18/p26 on PR2 manual
        # https://www.clearpathrobotics.com/wp-content/uploads/2014/08/pr2_manual_r321.pdf
        # we exclude rest of the joints
        unused_joints = set([
            "r_gripper_motor_slider_joint",
            "r_gripper_motor_screw_joint",
            "r_gripper_r_screw_screw_joint",
            "r_gripper_l_screw_screw_joint",
            "r_gripper_r_parallel_tip_joint",
            "r_gripper_l_parallel_tip_joint"])

        unused_joints = passive_joints.union(unused_joints)

        self._r_arm_joints = list(
            filter(lambda s: s.find('pr2::r_') != -1
                   and s.split("::")[-1] not in unused_joints, self._all_joints))
        logger.info("joints in the right arm to control: %s" % self._r_arm_joints)

        joint_states = list(map(lambda s: self._agent.get_joint_state(s), self._r_arm_joints))

        self._r_arm_joints_limits = list(map(lambda s: s.get_effort_limits()[0],
                                        joint_states));

        logger.info('\n'.join(map(lambda s: str(s[0]) + ":" + str(s[1]), zip(
            self._r_arm_joints, self._r_arm_joints_limits))))

        self._goal_name = goal_name
        self._goal = self._world.get_agent(self._goal_name)
        self._max_steps = max_steps
        self._steps_in_this_episode = 0
        self._reward_shaping = reward_shaping
        self._resized_image_size = (128, 128) #(84, 84)
        self._use_internal_states_only = use_internal_states_only
        self._cum_reward = 0.0
        self._motion_loss = motion_loss

        # a hack to work around gripper not open initially, might not need now.
        self._gripper_reward_dir = 1
        self._gripper_upper_limit = 0.07
        self._gripper_lower_limit = 0.01

        # whether to move head cameras and gripper when episode starts.
        self._adjust_position_at_start = False

        obs = self._get_observation()
        self._prev_dist = self._get_finger_tip_distance()
        self._prev_gripper_pos = self._get_gripper_pos()

        if self._use_internal_states_only:
            self.observation_space = gym.spaces.Box(
                low=-np.inf, high=np.inf, shape=obs.shape, dtype=np.float32)
        else:
            self.observation_space = gym.spaces.Box(
                low=0, high=256.0, shape=obs.shape, dtype=np.float32)
        self.action_space = gym.spaces.Box(
            low=-1, high=1, shape=[len(self._r_arm_joints)], dtype=np.float32)
    def __init__(self,
                 with_language=False,
                 image_with_internal_states=False,
                 port=None,
                 resized_image_size=None,
                 data_format='channels_last'):
        """Create SimpleNavigation environment.

        Args:
            with_language (bool): whether to generate language for observation
            image_with_internal_states (bool): If true, the agent's self internal
                states i.e., joint position and velocities would be available
                together with the image.
            port (int): TCP/IP port for the simulation server
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            data_format (str):  one of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """
        super(SimpleNavigation, self).__init__(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(),
                         "pioneer2dx_camera.world"))
        self._agent = self._world.get_agent()
        self._rendering_cam_pose = "4 -4 3 0 0.4 2.3"
        assert self._agent is not None
        logging.debug("joint names: %s" % self._agent.get_joint_names())
        self._all_joints = self._agent.get_joint_names()
        self._joint_names = list(
            filter(lambda s: s.find('wheel') != -1, self._all_joints))
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        task_group.add_task(GoalTask())
        self._teacher.add_task_group(task_group)
        self._seq_length = 20
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        self._with_language = with_language
        self._image_with_internal_states = image_with_internal_states
        self._resized_image_size = resized_image_size
        assert data_format in ('channels_first', 'channels_last')
        self._data_format = data_format

        time.sleep(0.1)  # Allow Gazebo threads to be fully ready
        self.reset()

        # Get observation dimension
        obs_sample = self._get_observation('hello')
        if self._with_language or self._image_with_internal_states:
            self._observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        else:
            self._observation_space = gym.spaces.Box(
                low=0, high=255, shape=obs_sample.shape, dtype=np.uint8)

        control_space = gym.spaces.Box(
            low=-0.2,
            high=0.2,
            shape=[len(self._joint_names)],
            dtype=np.float32)
        if with_language:
            self._action_space = gym.spaces.Dict(
                control=control_space, sentence=self._sentence_space)
        else:
            self._action_space = control_space
示例#8
0
    def __init__(self, use_pid=False, obs_stack=True, sub_steps=50,
                 port=None):
        """
        Args:
            use_pid (bool): use pid or direct force to control
            port: Gazebo port, need to specify when run multiple environment in parallel
            obs_stack (bool): Use staked multi step observation if True
            sub_steps (int): take how many simulator substeps during one gym step
        """
        super(ICubWalk, self).__init__(port=port)
        self._sub_steps = sub_steps
        self._obs_stack = obs_stack
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(), "icub.world"))
        self._agent = self._world.get_agent('icub')
        logging.debug(self._world.info())

        self._agent_joints = [
            'icub::iCub::l_leg::l_hip_pitch',
            'icub::iCub::l_leg::l_hip_roll',
            'icub::iCub::l_leg::l_hip_yaw',
            'icub::iCub::l_leg::l_knee',
            'icub::iCub::l_leg::l_ankle_pitch',
            'icub::iCub::l_leg::l_ankle_roll',
            'icub::iCub::r_leg::r_hip_pitch',
            'icub::iCub::r_leg::r_hip_roll',
            'icub::iCub::r_leg::r_hip_yaw',
            'icub::iCub::r_leg::r_knee',
            'icub::iCub::r_leg::r_ankle_pitch',
            'icub::iCub::r_leg::r_ankle_roll',
            'icub::iCub::torso_yaw',
            'icub::iCub::torso_roll',
            'icub::iCub::torso_pitch',
            'icub::iCub::l_shoulder_pitch',
            'icub::iCub::l_shoulder_roll',
            'icub::iCub::l_shoulder_yaw',
            'icub::iCub::l_elbow',
            'icub::iCub::neck_pitch',
            'icub::iCub::neck_roll',
            'icub::iCub::r_shoulder_pitch',
            'icub::iCub::r_shoulder_roll',
            'icub::iCub::r_shoulder_yaw',
            'icub::iCub::r_elbow',
        ]

        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if use_pid:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = 3.0 * np.pi
        else:
            self._agent_control_range = np.array(self._joints_limits)

        logging.info("joints to control: %s" % self._agent_joints)

        self.action_space = gym.spaces.Box(
            low=-1.0,
            high=1.0,
            shape=[len(self._agent_joints)],
            dtype=np.float32)

        self.reset()
        obs = self._get_observation()
        if self._obs_stack:
            obs = np.concatenate((obs, obs), axis=0)
        self.observation_space = gym.spaces.Box(
            low=-100, high=100, shape=obs.shape, dtype=np.float32)
示例#9
0
    def __init__(self,
                 agent_type='pioneer2dx_noplugin',
                 world_name="play_ground.world",
                 tasks=[GoalTask],
                 with_language=False,
                 with_agent_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 max_steps=200,
                 world_time_precision=None,
                 step_time=0.1,
                 real_time_update_rate=0,
                 port=None,
                 action_cost=0.0,
                 resized_image_size=(64, 64),
                 vocab_sequence_length=20,
                 action_wrapper=None):
        """
        Args:
            agent_type (string): Select the agent robot, supporting pr2_noplugin,
                pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now
                note that 'agent_type' should be exactly the same string as the model's
                name at the beginning of model's sdf file
            world_name (string): Select the world file, e.g., empty.world, play_ground.world,
                grocery_ground.world
            tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask
            with_language (bool): The observation will be a dict with an extra sentence
            with_agent_language (bool): Include agent sentence in action space.
            use_image_observation (bool): Use image, or use low-dimentional states as
                observation. Poses in the states observation are in world coordinate
            max_steps (int): episode will end in so many steps, will be passed to the tasks
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            world_time_precision (float|None): this parameter depends on the agent.
                if not none, the default time precision of simulator, i.e., the max_step_size
                defined in the agent cfg file, will be override. Note that pr2 and iCub
                requires a max_step_size <= 0.001, otherwise cannot train a successful policy.
            step_time (float): the peroid of one step() function of the environment in simulation.
                step_time is rounded to multiples of world_time_precision
                step_time / world_time_precision is how many simulator substeps during one
                environment step. for the tasks need higher control frequency (such as the
                tasks need walking by 2 legs), using a smaller step_time like 0.05 is better.
                experiments show that iCub can not learn how to walk in a 0.1 step_time
            real_time_update_rate (int): max update rate per second. There is no limit if
                this is set to 0 as default in the world file. If 1:1 real time is prefered
                (like playing or recording video), it should be set to 1.0/world_time_precision.
            port: Gazebo port, need to specify when run multiple environment in parallel
            action_cost (float): Add an extra action cost to reward, which helps to train
                an energy/forces efficency policy or reduce unnecessary movements
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            vocab_sequence_length (int): the length of encoded sequence
            action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has
                redundant dimensions or offset. If not None, this is used to transform the agent
                actions. See ActionWrapper of gazebo_agent.py for example.
        """

        self._action_cost = action_cost
        self._with_language = with_language
        self._seq_length = vocab_sequence_length
        self._with_agent_language = with_language and with_agent_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states

        # Load agent and world file
        with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                  'r') as cfg_file:
            agent_cfgs = json.load(cfg_file)
        agent_cfg = agent_cfgs[agent_type]
        wd_path = os.path.join(social_bot.get_world_dir(), world_name)
        with open(wd_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        if world_time_precision is None:
            world_time_precision = agent_cfg['max_sim_step_time']
        self._sub_steps = int(round(step_time / world_time_precision))
        self._step_time = world_time_precision * self._sub_steps
        sim_time_cfg = [
            "//physics//max_step_size=" + str(world_time_precision),
            "//physics//real_time_update_rate=" + str(real_time_update_rate)
        ]
        super().__init__(world_string=world_string,
                         world_config=sim_time_cfg,
                         port=port)
        logging.debug(self._world.info())

        # Setup agent
        self._agent = GazeboAgent(
            world=self._world,
            agent_type=agent_type,
            config=agent_cfg,
            with_language=with_language,
            with_agent_language=with_agent_language,
            vocab_sequence_length=self._seq_length,
            use_image_observation=use_image_observation,
            resized_image_size=resized_image_size,
            image_with_internal_states=image_with_internal_states,
            action_wrapper=action_wrapper)

        # Setup teacher and tasks
        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        self._has_goal_task = False
        for task in tasks:
            if task == GoalTask:
                self._has_goal_task = True
            task_group = TaskGroup()
            task_group.add_task(task(env=self, max_steps=max_steps))
            self._teacher.add_task_group(task_group)
        self._teacher._build_vocab_from_tasks()

        # Setup action space and observation space
        if self._teacher.vocab_size:
            sentence_space = gym.spaces.MultiDiscrete(
                [self._teacher.vocab_size] * self._seq_length)
        self._agent.set_sentence_space(sentence_space)
        self._control_space = self._agent.get_control_space()
        self.action_space = self._agent.get_action_space()
        self.reset()
        self.observation_space = self._agent.get_observation_space(
            self._teacher)
示例#10
0
    def __init__(self,
                 with_language=False,
                 use_image_observation=False,
                 image_with_internal_states=False,
                 random_goal=False,
                 agent_type='pioneer2dx_noplugin',
                 max_steps=200,
                 port=None,
                 resized_image_size=(64, 64),
                 data_format='channels_last'):
        """
        Args:
            with_language (bool): The observation will be a dict with an extra sentence
            use_image_observation (bool): Use image, or use low-dimentional states as 
                observation. Poses in the states observation are in world coordinate
            image_with_internal_states (bool): If true, the agent's self internal states
                i.e., joint position and velocities would be available together with image.
                Only affect if use_image_observation is true
            random_goal (bool): If ture, teacher will randomly select goal from the 
                object list each episode
            agent_type (string): Select the agent robot, supporting pr2_noplugin, 
                pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now
                note that 'agent_type' should be the same str as the model's name
            port: Gazebo port, need to specify when run multiple environment in parallel
            resized_image_size (None|tuple): If None, use the original image size
                from the camera. Otherwise, the original image will be resized
                to (width, height)
            data_format (str):  One of `channels_last` or `channels_first`.
                The ordering of the dimensions in the images.
                `channels_last` corresponds to images with shape
                `(height, width, channels)` while `channels_first` corresponds
                to images with shape `(channels, height, width)`.
        """
        super(GroceryGround, self).__init__(port=port)

        self._teacher = teacher.Teacher(task_groups_exclusive=False)
        task_group = teacher.TaskGroup()
        self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps,
                                                   success_distance_thresh=0.5,
                                                   fail_distance_thresh=3.0,
                                                   reward_shaping=True,
                                                   random_goal=random_goal,
                                                   random_range=10.0)
        task_group.add_task(self._teacher_task)
        self._teacher.add_task_group(task_group)
        self._teacher.build_vocab_from_tasks()
        self._seq_length = 20
        self._sentence_space = DiscreteSequence(self._teacher.vocab_size,
                                                self._seq_length)

        self._object_list = self._teacher_task.get_object_list()
        self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
        self._pos_list.remove((0, 0))

        wf_path = os.path.join(social_bot.get_world_dir(),
                               "grocery_ground.world")
        with open(wf_path, 'r+') as world_file:
            world_string = self._insert_agent_to_world_file(
                world_file, agent_type)
        self._world = gazebo.new_world_from_string(world_string)
        self._world.step(20)
        self._insert_objects(self._object_list)
        self._agent = self._world.get_agent()
        logging.debug(self._world.info())
        agent_cfgs = json.load(
            open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"),
                 'r'))
        agent_cfg = agent_cfgs[agent_type]
        self._agent_joints = agent_cfg['control_joints']
        joint_states = list(
            map(lambda s: self._agent.get_joint_state(s), self._agent_joints))
        self._joints_limits = list(
            map(lambda s: s.get_effort_limits()[0], joint_states))
        if agent_cfg['use_pid']:
            for joint_index in range(len(self._agent_joints)):
                self._agent.set_pid_controller(
                    self._agent_joints[joint_index],
                    'velocity',
                    p=0.02,
                    d=0.00001,
                    max_force=self._joints_limits[joint_index])
            self._agent_control_range = agent_cfg['pid_control_limit']
        else:
            self._agent_control_range = np.array(self._joints_limits)
        self._agent_camera = agent_cfg['camera_sensor']

        logging.debug("joints to control: %s" % self._agent_joints)

        self._with_language = with_language
        self._use_image_obs = use_image_observation
        self._image_with_internal_states = self._use_image_obs and image_with_internal_states
        assert data_format in ('channels_first', 'channels_last')
        self._data_format = data_format
        self._resized_image_size = resized_image_size
        self._random_goal = random_goal

        self.reset()
        obs_sample = self._get_observation("hello")
        if self._with_language or self._image_with_internal_states:
            self.observation_space = self._construct_dict_space(
                obs_sample, self._teacher.vocab_size)
        elif self._use_image_obs:
            self.observation_space = gym.spaces.Box(low=0,
                                                    high=255,
                                                    shape=obs_sample.shape,
                                                    dtype=np.uint8)
        else:
            self.observation_space = gym.spaces.Box(low=-np.inf,
                                                    high=np.inf,
                                                    shape=obs_sample.shape,
                                                    dtype=np.float32)

        self._control_space = gym.spaces.Box(low=-1.0,
                                             high=1.0,
                                             shape=[len(self._agent_joints)],
                                             dtype=np.float32)
        if self._with_language:
            self.action_space = gym.spaces.Dict(control=self._control_space,
                                                sentence=self._sentence_space)
        else:
            self.action_space = self._control_space
示例#11
0
    def __init__(self,
                 with_language=False,
                 use_image_obs=False,
                 agent_type='pioneer2dx_noplugin',
                 goal_name='table',
                 max_steps=160,
                 port=None):
        """
        Args:
            with_language (bool): the observation will be a dict with an extra sentence
            use_image_obs (bool): use image, or use internal states as observation
                poses in internal states observation are in world coordinate
            agent_type (string): select the agent robot, supporting pr2_differential, 
                pioneer2dx_noplugin, turtlebot, and irobot create for now
            port: Gazebo port, need to specify when run multiple environment in parallel
        """
        if port is None:
            port = 0
        gazebo.initialize(port=port)
        self._world = gazebo.new_world_from_file(
            os.path.join(social_bot.get_world_dir(), "grocery_ground.world"))
        self._object_types = [
            'coke_can', 'cube_20k', 'car_wheel', 'plastic_cup', 'beer',
            'hammer'
        ]
        self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5)))
        self._pos_list.remove((0, 0))
        self._world.info()
        self._world.insertModelFile('model://' + agent_type)
        self._world.step(20)
        time.sleep(0.1)  # Avoid 'px!=0' error
        self._random_insert_objects()
        self._world.model_list_info()
        self._world.info()

        # Specify joints and sensors for the robots
        control_joints = {
            'pr2_differential': [
                'pr2_differential::fl_caster_r_wheel_joint',
                'pr2_differential::fr_caster_l_wheel_joint'
            ],
            'pioneer2dx_noplugin': [
                'pioneer2dx_noplugin::left_wheel_hinge',
                'pioneer2dx_noplugin::right_wheel_hinge'
            ],
            'turtlebot': [
                'turtlebot::create::left_wheel',
                'turtlebot::create::right_wheel'
            ],
            'create': ['create::left_wheel', 'create::right_wheel'],
        }
        control_force = {
            'pr2_differential': 20,
            'pioneer2dx_noplugin': 0.5,
            'turtlebot': 0.5,
            'create': 0.5,
        }
        camera_sensor = {
            'pr2_differential':
            'default::pr2_differential::head_tilt_link::head_mount_prosilica_link_sensor',
            'pioneer2dx_noplugin': ' ',
            'turtlebot': 'default::turtlebot::kinect::link::camera',
            'create': ' ',
        }

        self._agent = self._world.get_agent(agent_type)
        self._agent_joints = control_joints[agent_type]
        self._agent_control_force = control_force[agent_type]
        self._agent_camera = camera_sensor[agent_type]
        self._goal_name = goal_name
        self._goal = self._world.get_model(goal_name)

        logger.info("joints to control: %s" % self._agent_joints)

        self._teacher = teacher.Teacher(False)
        task_group = teacher.TaskGroup()
        self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps,
                                                   goal_name=self._goal_name,
                                                   success_distance_thresh=0.5,
                                                   fail_distance_thresh=3.0,
                                                   reward_shaping=True,
                                                   random_range=10.0)
        task_group.add_task(self._teacher_task)
        self._teacher.add_task_group(task_group)

        self._with_language = with_language
        self._use_image_obs = use_image_obs

        obs = self.reset()
        if self._use_image_obs:
            obs_data_space = gym.spaces.Box(low=0,
                                            high=255,
                                            shape=obs.shape,
                                            dtype=np.uint8)
        else:
            obs_data_space = gym.spaces.Box(low=-50,
                                            high=50,
                                            shape=obs.shape,
                                            dtype=np.float32)

        control_space = gym.spaces.Box(low=-self._agent_control_force,
                                       high=self._agent_control_force,
                                       shape=[len(self._agent_joints)],
                                       dtype=np.float32)

        if self._with_language:
            self.observation_space = gym.spaces.Dict(data=obs_data_space,
                                                     sentence=DiscreteSequence(
                                                         128, 24))
            self._action_space = gym.spaces.Dict(control=control_space,
                                                 sentence=DiscreteSequence(
                                                     128, 24))
        else:
            self.observation_space = obs_data_space
            self.action_space = control_space