Exemplo n.º 1
0
 def __init__(self) -> None:
     super().__init__()
     self.state = None
     self.current_goal = None
     self.iter = 0
     self._action_space = FloatBox(low=-0.01, high=0.01, shape=2)
     self._observation_space = FloatBox(low=-1., high=1.,
                                        shape=4)  # current state and goal
     self.goal_space = FloatBox(low=-1., high=1., shape=2)
Exemplo n.º 2
0
    def get_obs(self, get_space=False):
        """
            Get observation of state for RL. If get_space is true return space description.
            Space is defined as:
                0: time [1]
                1:7 hand pos + rotation vector [6]
                7: gripper state [1]
                8:14 obj pos + rotation vector [6],
                14: obj height
                15: obj radius
            In case of two objects:
                16:22 obj pos + rotation vector [6],
                22: obj height
                23: obj radius
        """
        if get_space:
            if self.two_objects:
                return FloatBox(low=[0.] + [-1.] * 6 + [0.] + [-1.] * 6 +
                                [0.] * 2 + [-1.] * 6 + [0.] * 2,
                                high=[1.] + [1.] * 6 + [self.FINGER_OPEN_POS] +
                                [1.] * 6 + [0.2] * 2 + [1.] * 6 + [0.2] * 2)
            return FloatBox(low=[0.] + [-1.] * 6 + [0.] + [-1.] * 6 + [0.] * 2,
                            high=[1.] + [1.] * 6 + [self.FINGER_OPEN_POS] +
                            [1.] * 6 + [0.2] * 2)

        t = self.scene.simulation_time
        hand_pose = self.hand_actors[0].get_global_pose()
        grip = self.get_grip()
        obj_pose = self.obj.get_global_pose()
        if self.two_objects:
            obj2_pose = self.obj2.get_global_pose()
            return np.concatenate([
                [t / 10.],
                hand_pose[0],
                npq.as_rotation_vector(hand_pose[1]),
                [grip],
                obj_pose[0],
                npq.as_rotation_vector(obj_pose[1]),
                [self.obj_height, self.obj_radius],
                obj2_pose[0],
                npq.as_rotation_vector(obj2_pose[1]),
                [self.obj2_height, self.obj2_radius],
            ]).astype(np.float32)

        return np.concatenate([
            [t / 10.],
            hand_pose[0],
            npq.as_rotation_vector(hand_pose[1]),
            [grip],
            obj_pose[0],
            npq.as_rotation_vector(obj_pose[1]),
            [self.obj_height, self.obj_radius],
        ]).astype(np.float32)
Exemplo n.º 3
0
 def __init__(self):
     self.end_pos = 10
     self.cur_pos = 0
     # self._action_space = IntBox(low=0, high=2, shape=(2,))
     self._action_space = IntBox(low=0, high=2)
     self._observation_space = Composite(
         [
             FloatBox(low=0, high=self.end_pos),
             FloatBox(low=0, high=self.end_pos)
         ],
         Obs,
     )
Exemplo n.º 4
0
    def __init__(self, horizon=100, weight_are_actions=False) -> None:
        super().__init__()
        self.weight_are_actions = weight_are_actions
        self._horizon = horizon
        self.state = None

        self.start_space = FloatBox(low=0., high=1., shape=2)
        self.start_state = np.zeros(2, dtype=np.float32)
        self.goal = np.zeros(2, dtype=np.float32)

        self.iter = 0

        self._action_space = FloatBox(low=-10, high=10, shape=2)  # new state
        self._observation_space = self.get_obs(shape=True)
Exemplo n.º 5
0
class MyEnv(Env):
    def __init__(self) -> None:
        super().__init__()
        self.state = None
        self.current_goal = None
        self.iter = 0
        self._action_space = FloatBox(low=-0.01, high=0.01, shape=2)
        self._observation_space = FloatBox(low=-1., high=1.,
                                           shape=4)  # current state and goal
        self.goal_space = FloatBox(low=-1., high=1., shape=2)

    def get_obs(self):
        return np.concatenate([self.state,
                               self.current_goal]).astype(np.float32)

    def step(self, action):
        self.iter += 1
        self.state += action * self._action_space.high[0]
        dist = np.linalg.norm(self.state - self.current_goal)
        rew = np.exp(-0.5 * dist) / self.horizon
        return EnvStep(self.get_obs(), rew, self.iter == self.horizon,
                       EnvInfo())

    def reset(self):
        self.state = np.zeros(2)
        self.current_goal = self.goal_space.sample()
        self.iter = 0
        return self.get_obs()

    @property
    def horizon(self):
        return horizon
Exemplo n.º 6
0
 def __init__(self) -> None:
     super().__init__()
     self.state = None
     self.current_goal = None
     self.iter = 0
     self.action_discrete_mapping = [
         np.array([0.0, 0.0]),
         np.array([-0.1, 0.0]),
         np.array([0.0, -0.1]),
         np.array([0.1, 0.0]),
         np.array([0.0, 0.1]),
     ]
     self._action_space = IntBox(low=0,
                                 high=len(self.action_discrete_mapping))
     self._observation_space = FloatBox(low=-1., high=1.,
                                        shape=4)  # current state and goal
     self.goal_space = FloatBox(low=-1., high=1., shape=2)
Exemplo n.º 7
0
 def __init__(self,
              id: str,
              exp_kwargs: dict = None,
              external_logging: str = 'none',
              save_path: str = '',
              overwrite: bool = True):
     assert (id in VALID_ENV_SWEEP_IDS) or (
         id in VALID_ENV_IDS and exp_kwargs is not None
     )  # Either using one of presets or using base experiment with other settings
     aug_path = osp.join(LOG_DIR, save_path)  # LOG_DIR + save_path
     if id in VALID_ENV_SWEEP_IDS:  # Pre-parameterized experiments
         if external_logging == 'none':
             env = bsuite.load_from_id(id)  # No recording
         else:
             env = bsuite.load_and_record(
                 id, aug_path, external_logging, overwrite=overwrite
             )  # Record in sql or csv. same sql for each id
         self.num_episodes = env.bsuite_num_episodes
     else:
         noise_scale = exp_kwargs.pop('noise_scale', 0.)
         noise_scale_seed = exp_kwargs.pop('noise_scale_seed', 0.)
         reward_scale = exp_kwargs.pop('reward_scale', 0.)
         env = bsuite.load(id, **exp_kwargs)
         if noise_scale:
             env = RewardNoise(env, noise_scale, noise_scale_seed)
         if reward_scale: env = RewardScale(env, reward_scale)
         self.num_episodes = 1e4  # Default
     self.env = env
     self._action_space = IntBox(low=0,
                                 high=self.env.action_spec().num_values)
     o_spec = self.env.observation_spec()
     if isinstance(o_spec, specs.BoundedArray):
         self._observation_space = FloatBox(low=o_spec.minimum.item(),
                                            high=o_spec.maximum.item(),
                                            shape=o_spec.shape,
                                            dtype=o_spec.dtype)
     else:
         self._observation_space = FloatBox(low=-float('inf'),
                                            high=float('inf'),
                                            shape=o_spec.shape,
                                            dtype=o_spec.dtype)
     self._last_observation = None
     self.game_over = False,
     self.viewer = None
Exemplo n.º 8
0
class Box(Space):
    """A box in R^n, with specificiable bound and dtype."""
    def __init__(self,
                 low,
                 high,
                 shape=None,
                 dtype="float32",
                 null_value=None):
        """
        low and high are scalars, applied across all dimensions of shape.
        """
        dtype = np.dtype(dtype)
        if dtype.kind == 'i' or dtype.kind == 'u':
            self.box = IntBox(low,
                              high,
                              shape=shape,
                              dtype=dtype,
                              null_value=None)
        elif dtype.kind == 'f':
            self.box = FloatBox(low,
                                high,
                                shape=shape,
                                dtype=dtype,
                                null_value=None)
        else:
            raise NotImplementedError(dtype)

    def sample(self):
        return self.box.sample()

    def null_value(self):
        return self.box.null_value()

    def __repr__(self):
        return f"Box({self.box.low}-{self.box.high - 1} shape={self.box.shape} dtype={self.box.dtype})"

    @property
    def shape(self):
        return self.box.shape

    @property
    def bounds(self):
        return self.box.bounds
Exemplo n.º 9
0
 def __init__(self, batch_T, batch_B) -> None:
     super().__init__()
     self.batch_T = batch_T
     self.batch_B = batch_B
     self.state = None
     self.current_goal = None
     self.iter = 0
     self.action_discrete_mapping = np.array([
         [0.0, 0.0],
         [-0.1, 0.0],
         [0.0, -0.1],
         [0.1, 0.0],
         [0.0, 0.1],
     ])
     self._action_space = IntBox(low=0,
                                 high=len(self.action_discrete_mapping))
     self._observation_space = FloatBox(low=-1., high=1.,
                                        shape=4)  # current state and goal
     self.goal_space = FloatBox(low=-1., high=1., shape=(self.batch_B, 2))
Exemplo n.º 10
0
 def observation_space(self):
     img_obs = IntBox(low=0, high=255, shape=(3,) + self._size,
                   dtype="uint8")
     if self.use_state:
         state_obs = self._env.observation_spec()
         total_shape = np.sum([v.shape[0] for k, v in state_obs.items()])
         state_obs_box = FloatBox(low=-float('inf'), high=float('inf'), shape=total_shape)
         return StateObs(img_obs, state_obs_box)
     else:
         return img_obs
Exemplo n.º 11
0
    def __init__(
        self,
        vsrl_env: VSRLEnv,
        oracle_safety: bool = False,
        log_unsafe_transitions: bool = False,
    ):
        """
        :param log_unsafe_transitions: only used when sym_features is given at `step`
        """
        super().__init__()
        self._env = vsrl_env
        action_space = vsrl_env.action_space

        if isinstance(action_space, FiniteSpace):
            # rlpyt discrete spaces can only be `IntBox`s, so I just make them
            # IntBox(0, n_actions) and then map the action back to our space in step
            n_actions = len(action_space.elements)
            self._action_space = IntBox(0, n_actions)
            # self._action_space = IntBox(0, n_actions, shape=(1,))
            self._actions = np.stack(action_space.elements)
            self.constrained_sample = partial(
                constrained_sample_finite,
                constraint=self._env.constraint_func,
                actions=action_space.elements[:],
            )
        elif isinstance(action_space, CompactSet):
            low, high = zip(*action_space.bounds.values())
            ones = np.ones_like(low)
            self._action_space = FloatBox(-ones, ones)
            self._actions = None
            self._action_lb = np.array(low, dtype=np.float32)
            self._action_ub = np.array(high, dtype=np.float32)
            self._action_range = self._action_ub - self._action_lb
            if hasattr(self._env, "constrained_sample"):
                self.constrained_sample = self._env.constrained_sample
            else:
                self.constrained_sample = partial(
                    constrained_sample_continuous,
                    constraint=self._env.constraint_func,
                    lower_bounds=self._action_lb,
                    upper_bounds=self._action_ub,
                )
        else:
            raise ValueError(
                f"Do not know how to convert action space {action_space} to rlpyt."
            )

        self._observation_space = vsrl_env.observation_space
        self.log_unsafe_transitions = log_unsafe_transitions
        self.oracle_safety = oracle_safety
        self._fallback_action = None
Exemplo n.º 12
0
 def __init__(self,
              low,
              high,
              shape=None,
              dtype="float32",
              null_value=None):
     """
     low and high are scalars, applied across all dimensions of shape.
     """
     dtype = np.dtype(dtype)
     if dtype.kind == 'i' or dtype.kind == 'u':
         self.box = IntBox(low,
                           high,
                           shape=shape,
                           dtype=dtype,
                           null_value=None)
     elif dtype.kind == 'f':
         self.box = FloatBox(low,
                             high,
                             shape=shape,
                             dtype=dtype,
                             null_value=None)
     else:
         raise NotImplementedError(dtype)
Exemplo n.º 13
0
    def __init__(self):
        self.cur_step = 0
        self.width = CANVAS_WIDTH
        self.obj_wh = np.array([OBJ_WIDTH, OBJ_WIDTH], dtype=np.float32) / self.width
        self.n_items = N_ITEMS

        self._action_space = IntBox(low=0, high=N_ACTIONS)
        self._observation_space = Composite(
            [
                FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)),
                FloatBox(low=-1, high=1, shape=(self.width, self.width, 1)),
                FloatBox(low=-10, high=10, shape=(self.n_items, 4)),
            ],
            Obs,
        )
        # self._observation_space = FloatBox(
        #     low=-1, high=1, shape=(self.width, self.width, 1)
        # )
        
        self.target_im = np.zeros(shape=(self.width, self.width, 1), dtype=np.float32)
        self.target_coord = None  # (n_obj, 4=(x0, y0, x1, y1))
        self.cur_im = None
        self.cur_coord = None  # (n_obj, 4=(x0, y0, x1, y1))
        self.item = None
Exemplo n.º 14
0
class MyEnv(Env):
    def __init__(self, batch_T, batch_B) -> None:
        super().__init__()
        self.batch_T = batch_T
        self.batch_B = batch_B
        self.state = None
        self.current_goal = None
        self.iter = 0
        self.action_discrete_mapping = np.array([
            [0.0, 0.0],
            [-0.1, 0.0],
            [0.0, -0.1],
            [0.1, 0.0],
            [0.0, 0.1],
        ])
        self._action_space = IntBox(low=0,
                                    high=len(self.action_discrete_mapping))
        self._observation_space = FloatBox(low=-1., high=1.,
                                           shape=4)  # current state and goal
        self.goal_space = FloatBox(low=-1., high=1., shape=(self.batch_B, 2))

    def get_obs(self):
        return np.concatenate([self.state, self.current_goal],
                              axis=-1).astype(np.float32)

    def step(self, action):
        self.iter += 1
        self.state += self.action_discrete_mapping[action]
        dist = np.linalg.norm(self.state - self.current_goal, axis=-1)
        rew = np.exp(-0.5 * dist) / self.horizon
        return EnvStep(self.get_obs(), rew, self.iter == self.horizon,
                       EnvInfo())

    def reset(self):
        self.state = np.zeros((self.batch_B, 2))
        self.current_goal = self.goal_space.sample()
        self.iter = 0
        return self.get_obs()

    @property
    def horizon(self):
        return self.batch_T
Exemplo n.º 15
0
    def __init__(self, scenario_name='', scenario_cfg=None, max_steps=200, 
                 gif_freq=500, steps_per_action=4, image_dir='images/test', 
                 viewport=False):

        # Load holodeck environment
        if scenario_cfg is not None and \
            scenario_cfg['package_name'] not in holodeck.installed_packages():
            
            holodeck.install(scenario_cfg['package_name'])

        self._env = holodeck.make(scenario_name=scenario_name, 
                                  scenario_cfg=scenario_cfg, 
                                  show_viewport=viewport)

        # Get action space from holodeck env and store for use with rlpyt
        if self.is_action_continuous:
            self._action_space = FloatBox(-1, 1, self._env.action_space.shape)

        else:
            self._action_space = IntBox(self._env.action_space.get_low(), 
                                        self._env.action_space.get_high(), 
                                        ())

        # Calculate observation space with all sensor data
        max_width = 0
        max_height = 0
        num_img = 0
        num_lin = 0
        for sensor in self._env._agent.sensors.values():
            if 'Task' in sensor.name:
                continue
            shape = sensor.sensor_data.shape
            if len(shape) == 3:
                max_width = max(max_width, shape[0])
                max_height = max(max_height, shape[1])
                num_img += shape[2]
            else:
                num_lin += np.prod(shape)
        
        if num_img > 0 and num_lin == 0:
            self.has_img = True
            self.has_lin = False
            self._observation_space = FloatBox(0, 1, 
                (num_img, max_width, max_height))
        elif num_lin > 0 and num_img == 0:
            self.has_img = False
            self.has_lin = True
            self._observation_space = FloatBox(-256, 256, (num_lin,))
        else:
            self.has_img = True
            self.has_lin = True
            self._observation_space = Composite([
                FloatBox(0, 1, (num_img, max_width, max_height)),
                FloatBox(-256, 256, (num_lin,))],
                HolodeckObservation)

        # Set data members
        self._max_steps = max_steps
        self._image_dir = image_dir
        self._steps_per_action = steps_per_action
        self.curr_step = 0
        self.gif_freq = gif_freq
        self.rollout_count = -1
        self.gif_images = []
Exemplo n.º 16
0
 def action_space(self):
     spec = self._env.action_spec()
     return FloatBox(low=spec.minimum, high=spec.maximum)
Exemplo n.º 17
0
    def __init__(
        self,
        assortment_size=1000,  # number of items to train
        max_stock=1000,  # Size of maximum stock
        clip_reward=False,
        episodic_lives=True,
        repeat_action_probability=0.0,
        horizon=10000,
        seed=None,
        substep_count=4,
        bucket_customers=torch.tensor([800., 400., 500., 900.]),
        bucket_cov=torch.eye(4) / 100,
        forecastBias=0.0,
        forecastVariance=0.0,
        freshness=1,
        utility_function='homogeneous',
        utility_weights={
            'alpha': 1.,
            'beta': 1.,
            'gamma': 1.
        },
        characDim=4,
        lead_time=1,  # Defines how quickly the orders goes through the buffer - also impacts the relevance of the observation
        lead_time_fast=0,
        symmetric_action_space=False,
    ):
        save__init__args(locals(), underscore=True)
        logging.info("Creating new StoreEnv")

        self.bucket_customers = bucket_customers

        # Spaces

        if symmetric_action_space:
            self._action_space = FloatBox(low=-max_stock / 2,
                                          high=max_stock / 2,
                                          shape=[assortment_size])
        else:
            self._action_space = IntBox(low=0,
                                        high=max_stock,
                                        shape=[assortment_size])
        self.stock = torch.zeros(assortment_size,
                                 max_stock,
                                 requires_grad=False)

        # correct high with max shelf life

        self._observation_space = FloatBox(
            low=0,
            high=1000,
            shape=(assortment_size,
                   max_stock + characDim + lead_time + lead_time_fast + 1))
        self._horizon = int(horizon)
        self.assortment = Assortment(assortment_size, freshness, seed)
        self._repeater = torch.stack(
            (self.assortment.shelf_lives, torch.zeros(
                self._assortment_size))).transpose(0, 1).reshape(-1).detach()
        self.forecast = torch.zeros(assortment_size, 1)  # DAH forecast.
        self._step_counter = 0

        # Needs to move towards env parameters

        self._customers = \
            d.multivariate_normal.MultivariateNormal(bucket_customers,
                                                     bucket_cov)
        self.assortment.base_demand = \
            self.assortment.base_demand.detach() \
            / bucket_customers.sum()
        self._bias = d.normal.Normal(forecastBias, forecastVariance)

        # We want a yearly seasonality - We have a cosinus argument and a phase.
        # Note that, as we take the absolute value, 2*pi/365 becomes pi/365.

        self._year_multiplier = torch.arange(0.0, horizon, pi / 365)
        self._week_multiplier = torch.arange(0.0, horizon, pi / 7)
        self._phase = 2 * pi * torch.rand(assortment_size)
        self._phase2 = 2 * pi * torch.rand(assortment_size)
        self.create_buffers(lead_time, lead_time_fast)
        if utility_function == 'linear':
            self.utility_function = LinearUtility(**utility_weights)
        elif utility_function == 'loglinear':
            self.utility_function = LogLinearUtility(**utility_weights)
        elif utility_function == 'cobbdouglas':
            self.utility_function = CobbDouglasUtility(**utility_weights)
        elif utility_function == 'homogeneous':
            self.utility_function = HomogeneousReward(**utility_weights)
        else:
            self.utility_function = utility_function
        self._updateEnv()
        for i in range(self._lead_time):
            units_to_order = torch.as_tensor(
                self.forecast.squeeze() * bucket_customers[i]).round().clamp(
                    0, self._max_stock)
            self._addStock(units_to_order)
Exemplo n.º 18
0
 def action_space(self):
     shape = (self.env.action_space.n, )
     space = FloatBox(low=0, high=1, shape=shape, dtype=self._dtype)
     space.sample = self._sample_action
     return space
Exemplo n.º 19
0
 def get_obs(self, shape=False):
     if shape:
         return FloatBox(low=-10, high=10, shape=3)
     return np.concatenate([[self.get_time()], self.state]).astype(np.float32)
Exemplo n.º 20
0
    def __init__(self,
                 horizon=100,
                 render=False,
                 realtime=False,
                 reward_params=None,
                 reset_state_sampler=None,
                 state_trajectories: List[StateTrajectory] = None,
                 video_filename=None,
                 reset_on_singularity=True,
                 reset_on_plane_hit=True,
                 action_start_time=0.,
                 action_end_time=0.,
                 two_objects=False,
                 dz=0.2,
                 open_gripper_on_leave=True,
                 close_gripper_on_leave=False,
                 use_max_reward=False,
                 add_obstacle=False) -> None:
        super().__init__()
        self.use_max_reward = use_max_reward
        self.close_gripper_on_leave = close_gripper_on_leave
        self.open_gripper_on_leave = open_gripper_on_leave
        assert not (self.close_gripper_on_leave and self.open_gripper_on_leave)
        self.action_end_time = action_end_time
        self.action_start_time = action_start_time
        self.two_objects = two_objects
        self.reset_on_singularity = reset_on_singularity
        self.reset_on_plane_hit = reset_on_plane_hit
        self.realtime = realtime
        self.reset_state_sampler = reset_state_sampler
        self.reward_params = reward_params or GripperCylinderEnv.get_default_reward_params(
        )
        self.state_trajectories = state_trajectories or []
        self.render = render
        self._horizon = horizon
        self.iter = 0
        self.num_sub_steps = 10

        self.control_frequency = Rate(12)
        """ Define action and observation space. """
        self._action_space = FloatBox(low=-1., high=1., shape=7)
        self._observation_space = self.get_obs(get_space=True)
        """ Create scene. """
        self.scene = Scene(scene_flags=[
            SceneFlag.ENABLE_FRICTION_EVERY_ITERATION,
            SceneFlag.ENABLE_STABILIZATION
        ])
        self.plane_mat = Material(static_friction=0, dynamic_friction=0)
        self.obj_mat = Material(static_friction=1.,
                                dynamic_friction=1.,
                                restitution=0)
        self.plane_actor = RigidStatic.create_plane(material=self.plane_mat)
        self.scene.add_actor(self.plane_actor)
        self.hand_actors = self.create_hand_actors(use_mesh_fingers=False,
                                                   use_mesh_base=False)
        self.joints = self.create_hand_joints()
        self.obj = self.create_obj()
        self.obj_height = 0.1
        self.obj_radius = 0.03
        if self.two_objects:
            self.obj2 = self.create_obj()
            self.obj2_height = 0.1
            self.obj2_radius = 0.03
        if add_obstacle:
            actor = RigidStatic()
            actor.attach_shape(
                Shape.create_box([0.02, 0.02, 0.2], self.obj_mat))
            actor.set_global_pose([0.075, 0.015, 0.1])
            self.scene.add_actor(actor)

        if self.render:
            from pyphysx_render.pyrender import PyPhysxViewer, RoboticTrackball
            import pyrender
            render_scene = pyrender.Scene()
            render_scene.bg_color = np.array([0.75] * 3)
            cam = pyrender.PerspectiveCamera(yfov=np.deg2rad(60),
                                             aspectRatio=1.414,
                                             znear=0.005)
            cam_pose = np.eye(4)
            cam_pose[:3, 3] = RoboticTrackball.spherical_to_cartesian(
                0.75, np.deg2rad(-90), np.deg2rad(60))
            cam_pose[:3, :3] = RoboticTrackball.look_at_rotation(
                eye=cam_pose[:3, 3], target=np.zeros(3), up=[0, 0, 1])
            nc = pyrender.Node(camera=cam, matrix=cam_pose)
            render_scene.add_node(nc)
            render_scene.main_camera_node = nc
            self.renderer = PyPhysxViewer(video_filename=video_filename,
                                          render_scene=render_scene,
                                          viewer_flags={
                                              'axes_scale': 0.2,
                                              'plane_grid_spacing': 0.1,
                                          })
            self.renderer.add_physx_scene(self.scene)
            # from pyphysx_render.renderer import PyPhysXParallelRenderer
            # self.renderer = PyPhysXParallelRenderer(render_window_kwargs=dict(
            #     video_filename=video_filename, coordinates_scale=0.2, coordinate_lw=1.,
            #     cam_pos_distance=0.75, cam_pos_elevation=np.deg2rad(60), cam_pos_azimuth=np.deg2rad(-90.),
            # ))
        """ Compute trajectory caches. Stores quantities in MxN arrays [# of time steps, # of trajectories] """
        timesteps = np.arange(
            0.,
            self.control_frequency.period() * (1 + self.horizon),
            self.control_frequency.period())
        self.demo_opos = np.zeros(
            (len(timesteps), len(self.state_trajectories), 3))
        self.demo_hpos = np.zeros(
            (len(timesteps), len(self.state_trajectories), 3))
        self.demo_grip = np.zeros(
            (len(timesteps), len(self.state_trajectories)))
        self.demo_hrot = np.zeros(
            (len(timesteps), len(self.state_trajectories), 2))

        if self.two_objects:
            self.demo_opos2 = np.zeros(
                (len(timesteps), len(self.state_trajectories), 3))

        for i, st in enumerate(self.state_trajectories):
            half_height = 0.5 * st.get_property_at_time('o0szz', timesteps)
            self.demo_opos[:, i,
                           0] = st.get_property_at_time('o0posx', timesteps)
            self.demo_opos[:, i,
                           1] = st.get_property_at_time('o0posy', timesteps)
            self.demo_opos[:, i, 2] = np.maximum(
                st.get_property_at_time('o0posz', timesteps) - half_height, 0.)

            if self.two_objects:
                half_height2 = 0.5 * st.get_property_at_time(
                    'o1szz', timesteps)
                self.demo_opos2[:, i, 0] = st.get_property_at_time(
                    'o1posx', timesteps)
                self.demo_opos2[:, i, 1] = st.get_property_at_time(
                    'o1posy', timesteps)
                self.demo_opos2[:, i, 2] = np.maximum(
                    st.get_property_at_time('o1posz', timesteps) -
                    half_height2, 0.)

            self.demo_hpos[:, i,
                           0] = st.get_property_at_time('hposx', timesteps)
            self.demo_hpos[:, i,
                           1] = st.get_property_at_time('hposy', timesteps)
            self.demo_hpos[:, i, 2] = np.maximum(
                st.get_property_at_time('hposz', timesteps) - half_height, 0.)
            self.demo_grip[:, i] = (1 - st.get_property_at_time(
                'touch', timesteps)) * self.FINGER_OPEN_POS
            self.demo_hrot[:, i,
                           0] = st.get_property_at_time('hazi', timesteps)
            self.demo_hrot[:, i,
                           1] = st.get_property_at_time('hele', timesteps)

        self.demo_hpos[int(action_end_time *
                           12):, :, :] = self.demo_opos[int(action_end_time *
                                                            12):, :, :].copy()
        if self.two_objects:
            self.demo_hpos[int(action_end_time * 12):, :, :] = self.demo_opos2[
                int(action_end_time * 12):, :, :].copy()

        self.demo_hpos[int(action_end_time * 12):, :, 2] += dz

        self.demo_grip_weak_closed = np.zeros(
            (len(timesteps), len(self.state_trajectories)), dtype=np.bool)
        self.demo_grip_weak_closed[int(action_start_time *
                                       12):int(action_end_time * 12
                                               )] = True  # those inside action
        self.demo_grip_weak_closed &= self.demo_grip < self.FINGER_OPEN_POS / 2  # that are also closed

        # self.demo_grip_strong_open = np.ones((len(timesteps), len(self.state_trajectories)), dtype=np.bool)
        # self.demo_grip_strong_open[int(action_start_time * 12):int(action_end_time * 12)] = False

        tmp_rot_vec = np.zeros(
            (len(timesteps), len(self.state_trajectories), 3))
        for i, st in enumerate(self.state_trajectories):
            for j in range(3):
                tmp_rot_vec[:, i, j] = st.get_property_at_time(
                    'o0rot{}'.format(j), timesteps)
        self.demo_orot = npq.from_rotation_vector(tmp_rot_vec)

        if self.two_objects:
            tmp_rot_vec = np.zeros(
                (len(timesteps), len(self.state_trajectories), 3))
            for i, st in enumerate(self.state_trajectories):
                for j in range(3):
                    tmp_rot_vec[:, i, j] = st.get_property_at_time(
                        'o1rot{}'.format(j), timesteps)
            self.demo_orot2 = npq.from_rotation_vector(tmp_rot_vec)
Exemplo n.º 21
0
 def action_space(self):
     low = np.where(self._mask, -np.ones_like(self._low), self._low)
     high = np.where(self._mask, np.ones_like(self._low), self._high)
     return FloatBox(low, high, dtype=np.float32)