def __init__(self, with_language=False, image_with_internal_states=False, port=None, resized_image_size=None): """Create SimpleNavigation environment. Args: with_language (bool): whether to generate language for observation image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with the image. port (int): TCP/IP port for the simulation server resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) """ super(SimpleNavigation, self).__init__(world_file='pioneer2dx_camera.world', port=port) self._with_language = with_language self._image_with_internal_states = image_with_internal_states self.set_rendering_cam_pose('4 -4 3 0 0.4 2.3') self._seq_length = 20 # Setup agent self._agent = GazeboAgent( world=self._world, agent_type='pioneer2dx_noplugin', with_language=with_language, vocab_sequence_length=self._seq_length, use_image_observation=True, resized_image_size=resized_image_size, image_with_internal_states=image_with_internal_states) # Setup teacher and tasks self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() task = GoalTask(env=self, max_steps=120, goal_name="goal", fail_distance_thresh=0.5, distraction_list=[], random_range=2.0) task_group.add_task(task) self._teacher.add_task_group(task_group) self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) # Setup action space and observation space self.reset() self._agent.set_sentence_space(self._sentence_space) self._control_space = self._agent.get_control_space() self._action_space = self._agent.get_action_space() self._observation_space = self._agent.get_observation_space( self._teacher)
def __init__(self, with_language=True, port=None): if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() assert self._agent is not None logger.info("joint names: %s" % self._agent.get_joint_names()) self._joint_names = self._agent.get_joint_names() self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._with_language = with_language # get observation dimension image = self._agent.get_camera_observation("camera") image = np.array(image, copy=False) if with_language: self._observation_space = gym.spaces.Dict( image=gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8), sentence=DiscreteSequence(256, 20)) self._action_space = gym.spaces.Dict( control=gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32), sentence=DiscreteSequence(256, 20)) else: self._observation_space = gym.spaces.Box(low=0, high=1, shape=image.shape, dtype=np.uint8) self._action_space = gym.spaces.Box(low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32)
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, task_name='goal', agent_type='pioneer2dx_noplugin', world_time_precision=None, step_time=0.1, port=None, action_cost=0.0, resized_image_size=(64, 64), image_data_format='channels_last', vocab_sequence_length=20): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true task_name (string): the teacher task, now there are 2 tasks, a simple goal task: 'goal' a simple kicking ball task: 'kickball' agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name world_time_precision (float|None): if not none, the time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. e.g., '0.002' for a 2ms sim step step_time (float): the peroid of one step of the environment. step_time / world_time_precision is how many simulator substeps during one environment step. for some complex agent, i.e., icub, using step_time of 0.05 is better port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) image_data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] sub_steps = int(round(step_time / world_time_precision)) sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision) ] super(GroceryGround, self).__init__(world_string=world_string, world_config=sim_time_cfg, port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) if task_name is None or task_name == 'goal': main_task = GroceryGroundGoalTask() elif task_name == 'kickball': main_task = GroceryGroundKickBallTask(step_time=step_time) else: logging.debug("upsupported task name: " + task_name) main_task_group = TaskGroup() main_task_group.add_task(main_task) self._teacher.add_task_group(main_task_group) if agent_type.find('icub') != -1: icub_aux_task_group = TaskGroup() icub_standing_task = ICubAuxiliaryTask(step_time=step_time) icub_aux_task_group.add_task(icub_standing_task) self._teacher.add_task_group(icub_aux_task_group) self._teacher._build_vocab_from_tasks() self._seq_length = vocab_sequence_length if self._teacher.vocab_size: # using MultiDiscrete instead of DiscreteSequence so gym # _spec_from_gym_space won't complain. self._sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._sub_steps = sub_steps self._world.step(20) self._agent = self._world.get_agent() for task_group in self._teacher.get_task_groups(): for task in task_group.get_tasks(): task.setup(self._world, agent_type) logging.debug(self._world.info()) self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._action_cost = action_cost self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert image_data_format in ('channels_first', 'channels_last') self._data_format = image_data_format self._resized_image_size = resized_image_size self._substep_time = world_time_precision self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space self.reset() obs_sample = self._get_observation_with_sentence("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32)
def __init__(self, with_language=False, image_with_internal_states=False, port=None, resized_image_size=None, data_format='channels_last'): """Create SimpleNavigation environment. Args: with_language (bool): whether to generate language for observation image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with the image. port (int): TCP/IP port for the simulation server resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): one of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(SimpleNavigation, self).__init__(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "pioneer2dx_camera.world")) self._agent = self._world.get_agent() self._rendering_cam_pose = "4 -4 3 0 0.4 2.3" assert self._agent is not None logging.debug("joint names: %s" % self._agent.get_joint_names()) self._all_joints = self._agent.get_joint_names() self._joint_names = list( filter(lambda s: s.find('wheel') != -1, self._all_joints)) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() task_group.add_task(GoalTask()) self._teacher.add_task_group(task_group) self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._with_language = with_language self._image_with_internal_states = image_with_internal_states self._resized_image_size = resized_image_size assert data_format in ('channels_first', 'channels_last') self._data_format = data_format time.sleep(0.1) # Allow Gazebo threads to be fully ready self.reset() # Get observation dimension obs_sample = self._get_observation('hello') if self._with_language or self._image_with_internal_states: self._observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) else: self._observation_space = gym.spaces.Box( low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) control_space = gym.spaces.Box( low=-0.2, high=0.2, shape=[len(self._joint_names)], dtype=np.float32) if with_language: self._action_space = gym.spaces.Dict( control=control_space, sentence=self._sentence_space) else: self._action_space = control_space
def __init__(self, agent_type='pioneer2dx_noplugin', world_name="play_ground.world", tasks=[GoalTask], with_language=False, with_agent_language=False, use_image_observation=False, image_with_internal_states=False, max_steps=200, world_time_precision=None, step_time=0.1, real_time_update_rate=0, port=None, action_cost=0.0, resized_image_size=(64, 64), vocab_sequence_length=20, action_wrapper=None): """ Args: agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, youbot_noplugin and icub_with_hands for now note that 'agent_type' should be exactly the same string as the model's name at the beginning of model's sdf file world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate max_steps (int): episode will end in so many steps, will be passed to the tasks image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true world_time_precision (float|None): this parameter depends on the agent. if not none, the default time precision of simulator, i.e., the max_step_size defined in the agent cfg file, will be override. Note that pr2 and iCub requires a max_step_size <= 0.001, otherwise cannot train a successful policy. step_time (float): the peroid of one step() function of the environment in simulation. step_time is rounded to multiples of world_time_precision step_time / world_time_precision is how many simulator substeps during one environment step. for the tasks need higher control frequency (such as the tasks need walking by 2 legs), using a smaller step_time like 0.05 is better. experiments show that iCub can not learn how to walk in a 0.1 step_time real_time_update_rate (int): max update rate per second. There is no limit if this is set to 0 as default in the world file. If 1:1 real time is prefered (like playing or recording video), it should be set to 1.0/world_time_precision. port: Gazebo port, need to specify when run multiple environment in parallel action_cost (float): Add an extra action cost to reward, which helps to train an energy/forces efficency policy or reduce unnecessary movements resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) vocab_sequence_length (int): the length of encoded sequence action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has redundant dimensions or offset. If not None, this is used to transform the agent actions. See ActionWrapper of gazebo_agent.py for example. """ self._action_cost = action_cost self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states # Load agent and world file with open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r') as cfg_file: agent_cfgs = json.load(cfg_file) agent_cfg = agent_cfgs[agent_type] wd_path = os.path.join(social_bot.get_world_dir(), world_name) with open(wd_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) if world_time_precision is None: world_time_precision = agent_cfg['max_sim_step_time'] self._sub_steps = int(round(step_time / world_time_precision)) self._step_time = world_time_precision * self._sub_steps sim_time_cfg = [ "//physics//max_step_size=" + str(world_time_precision), "//physics//real_time_update_rate=" + str(real_time_update_rate) ] super().__init__(world_string=world_string, world_config=sim_time_cfg, port=port) logging.debug(self._world.info()) # Setup agent self._agent = GazeboAgent( world=self._world, agent_type=agent_type, config=agent_cfg, with_language=with_language, with_agent_language=with_agent_language, vocab_sequence_length=self._seq_length, use_image_observation=use_image_observation, resized_image_size=resized_image_size, image_with_internal_states=image_with_internal_states, action_wrapper=action_wrapper) # Setup teacher and tasks self._teacher = teacher.Teacher(task_groups_exclusive=False) self._has_goal_task = False for task in tasks: if task == GoalTask: self._has_goal_task = True task_group = TaskGroup() task_group.add_task(task(env=self, max_steps=max_steps)) self._teacher.add_task_group(task_group) self._teacher._build_vocab_from_tasks() # Setup action space and observation space if self._teacher.vocab_size: sentence_space = gym.spaces.MultiDiscrete( [self._teacher.vocab_size] * self._seq_length) self._agent.set_sentence_space(sentence_space) self._control_space = self._agent.get_control_space() self.action_space = self._agent.get_action_space() self.reset() self.observation_space = self._agent.get_observation_space( self._teacher)
def __init__(self, with_language=False, use_image_observation=False, image_with_internal_states=False, random_goal=False, agent_type='pioneer2dx_noplugin', max_steps=200, port=None, resized_image_size=(64, 64), data_format='channels_last'): """ Args: with_language (bool): The observation will be a dict with an extra sentence use_image_observation (bool): Use image, or use low-dimentional states as observation. Poses in the states observation are in world coordinate image_with_internal_states (bool): If true, the agent's self internal states i.e., joint position and velocities would be available together with image. Only affect if use_image_observation is true random_goal (bool): If ture, teacher will randomly select goal from the object list each episode agent_type (string): Select the agent robot, supporting pr2_noplugin, pioneer2dx_noplugin, turtlebot, irobot create and icub_with_hands for now note that 'agent_type' should be the same str as the model's name port: Gazebo port, need to specify when run multiple environment in parallel resized_image_size (None|tuple): If None, use the original image size from the camera. Otherwise, the original image will be resized to (width, height) data_format (str): One of `channels_last` or `channels_first`. The ordering of the dimensions in the images. `channels_last` corresponds to images with shape `(height, width, channels)` while `channels_first` corresponds to images with shape `(channels, height, width)`. """ super(GroceryGround, self).__init__(port=port) self._teacher = teacher.Teacher(task_groups_exclusive=False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_goal=random_goal, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._teacher.build_vocab_from_tasks() self._seq_length = 20 self._sentence_space = DiscreteSequence(self._teacher.vocab_size, self._seq_length) self._object_list = self._teacher_task.get_object_list() self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) wf_path = os.path.join(social_bot.get_world_dir(), "grocery_ground.world") with open(wf_path, 'r+') as world_file: world_string = self._insert_agent_to_world_file( world_file, agent_type) self._world = gazebo.new_world_from_string(world_string) self._world.step(20) self._insert_objects(self._object_list) self._agent = self._world.get_agent() logging.debug(self._world.info()) agent_cfgs = json.load( open(os.path.join(social_bot.get_model_dir(), "agent_cfg.json"), 'r')) agent_cfg = agent_cfgs[agent_type] self._agent_joints = agent_cfg['control_joints'] joint_states = list( map(lambda s: self._agent.get_joint_state(s), self._agent_joints)) self._joints_limits = list( map(lambda s: s.get_effort_limits()[0], joint_states)) if agent_cfg['use_pid']: for joint_index in range(len(self._agent_joints)): self._agent.set_pid_controller( self._agent_joints[joint_index], 'velocity', p=0.02, d=0.00001, max_force=self._joints_limits[joint_index]) self._agent_control_range = agent_cfg['pid_control_limit'] else: self._agent_control_range = np.array(self._joints_limits) self._agent_camera = agent_cfg['camera_sensor'] logging.debug("joints to control: %s" % self._agent_joints) self._with_language = with_language self._use_image_obs = use_image_observation self._image_with_internal_states = self._use_image_obs and image_with_internal_states assert data_format in ('channels_first', 'channels_last') self._data_format = data_format self._resized_image_size = resized_image_size self._random_goal = random_goal self.reset() obs_sample = self._get_observation("hello") if self._with_language or self._image_with_internal_states: self.observation_space = self._construct_dict_space( obs_sample, self._teacher.vocab_size) elif self._use_image_obs: self.observation_space = gym.spaces.Box(low=0, high=255, shape=obs_sample.shape, dtype=np.uint8) else: self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=obs_sample.shape, dtype=np.float32) self._control_space = gym.spaces.Box(low=-1.0, high=1.0, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.action_space = gym.spaces.Dict(control=self._control_space, sentence=self._sentence_space) else: self.action_space = self._control_space
def __init__(self, with_language=False, use_image_obs=False, agent_type='pioneer2dx_noplugin', goal_name='table', max_steps=160, port=None): """ Args: with_language (bool): the observation will be a dict with an extra sentence use_image_obs (bool): use image, or use internal states as observation poses in internal states observation are in world coordinate agent_type (string): select the agent robot, supporting pr2_differential, pioneer2dx_noplugin, turtlebot, and irobot create for now port: Gazebo port, need to specify when run multiple environment in parallel """ if port is None: port = 0 gazebo.initialize(port=port) self._world = gazebo.new_world_from_file( os.path.join(social_bot.get_world_dir(), "grocery_ground.world")) self._object_types = [ 'coke_can', 'cube_20k', 'car_wheel', 'plastic_cup', 'beer', 'hammer' ] self._pos_list = list(itertools.product(range(-5, 5), range(-5, 5))) self._pos_list.remove((0, 0)) self._world.info() self._world.insertModelFile('model://' + agent_type) self._world.step(20) time.sleep(0.1) # Avoid 'px!=0' error self._random_insert_objects() self._world.model_list_info() self._world.info() # Specify joints and sensors for the robots control_joints = { 'pr2_differential': [ 'pr2_differential::fl_caster_r_wheel_joint', 'pr2_differential::fr_caster_l_wheel_joint' ], 'pioneer2dx_noplugin': [ 'pioneer2dx_noplugin::left_wheel_hinge', 'pioneer2dx_noplugin::right_wheel_hinge' ], 'turtlebot': [ 'turtlebot::create::left_wheel', 'turtlebot::create::right_wheel' ], 'create': ['create::left_wheel', 'create::right_wheel'], } control_force = { 'pr2_differential': 20, 'pioneer2dx_noplugin': 0.5, 'turtlebot': 0.5, 'create': 0.5, } camera_sensor = { 'pr2_differential': 'default::pr2_differential::head_tilt_link::head_mount_prosilica_link_sensor', 'pioneer2dx_noplugin': ' ', 'turtlebot': 'default::turtlebot::kinect::link::camera', 'create': ' ', } self._agent = self._world.get_agent(agent_type) self._agent_joints = control_joints[agent_type] self._agent_control_force = control_force[agent_type] self._agent_camera = camera_sensor[agent_type] self._goal_name = goal_name self._goal = self._world.get_model(goal_name) logger.info("joints to control: %s" % self._agent_joints) self._teacher = teacher.Teacher(False) task_group = teacher.TaskGroup() self._teacher_task = GroceryGroundGoalTask(max_steps=max_steps, goal_name=self._goal_name, success_distance_thresh=0.5, fail_distance_thresh=3.0, reward_shaping=True, random_range=10.0) task_group.add_task(self._teacher_task) self._teacher.add_task_group(task_group) self._with_language = with_language self._use_image_obs = use_image_obs obs = self.reset() if self._use_image_obs: obs_data_space = gym.spaces.Box(low=0, high=255, shape=obs.shape, dtype=np.uint8) else: obs_data_space = gym.spaces.Box(low=-50, high=50, shape=obs.shape, dtype=np.float32) control_space = gym.spaces.Box(low=-self._agent_control_force, high=self._agent_control_force, shape=[len(self._agent_joints)], dtype=np.float32) if self._with_language: self.observation_space = gym.spaces.Dict(data=obs_data_space, sentence=DiscreteSequence( 128, 24)) self._action_space = gym.spaces.Dict(control=control_space, sentence=DiscreteSequence( 128, 24)) else: self.observation_space = obs_data_space self.action_space = control_space