Пример #1
0
    def _goal_distance(self, goal_a, goal_b):
        assert goal_a.shape == goal_b.shape
        assert goal_a.shape[-1] == 7

        d_pos = np.zeros_like(goal_a[..., 0])
        d_rot = np.zeros_like(goal_b[..., 0])
        if self.target_position != 'ignore':
            delta_pos = goal_a[..., :3] - goal_b[..., :3]
            d_pos = np.linalg.norm(delta_pos, axis=-1)

        if self.target_rotation != 'ignore':
            quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

            if self.ignore_z_target_rotation:
                # Special case: We want to ignore the Z component of the rotation.
                # This code here assumes Euler angles with xyz convention. We first transform
                # to euler, then set the Z component to be equal between the two, and finally
                # transform back into quaternions.
                euler_a = rotations.quat2euler(quat_a)
                euler_b = rotations.quat2euler(quat_b)
                euler_a[2] = euler_b[2]
                quat_a = rotations.euler2quat(euler_a)

            # Subtract quaternions and extract angle between them.
            quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
            angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
            d_rot = angle_diff
        assert d_pos.shape == d_rot.shape
        return d_pos, d_rot
Пример #2
0
    def _sample_goal(self):
        home_path = os.getenv("HOME")
        goal_path = os.path.join(*[
            home_path, "DRL_AI4RoMoCo", "code", "environment", 
            "experiment_configs", "goal_ur10_simpheg_conf2.json"])

        with open(goal_path, encoding='utf-8') as file:
            goal = json.load(file)
            xpos =  goal['xpos'] + self.offset['body_pos_offset']
            xquat = goal['xquat']
            rpy = normalize_rad(rotations.quat2euler(xquat)+self.offset['body_euler_offset'])
        return numpy.concatenate([xpos, rpy]).copy()
Пример #3
0
    def _get_obs(self):
        # positions
        force = self.sim.data.sensordata
        x_pos = self.sim.data.get_body_xpos("gripper_dummy_heg")
        x_quat = self.sim.data.get_body_xquat("gripper_dummy_heg")
        rpy = rotations.quat2euler(x_quat)

        obs = np.concatenate(
            [x_pos - self.goal[:3], rpy - self.goal[3:], force])

        return obs
        '''
Пример #4
0
    def _sample_goal(self):
        home_path = os.getenv("HOME")
        goal_path = os.path.join(*[
            home_path, "DRL_SetBot-RearVentilation", "experiment_configs",
            "goal_ur10_simpheg_conf2.json"
        ])

        with open(goal_path, encoding='utf-8') as file:
            goal = json.load(file)
            xpos = goal['xpos']
            xquat = goal['xquat']
            rpy = normalize_rad(rotations.quat2euler(xquat))
        return numpy.concatenate([xpos, rpy]).copy()
Пример #5
0
def rotation_goal_distance(goal_a, goal_b, ignore_z_target_rotation = False):
    assert goal_a.shape == goal_b.shape
    assert goal_a.shape[-1] == 7
    d_rot = np.zeros_like(goal_b[..., 0])
    quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:]

    if ignore_z_target_rotation:
        # Special case: We want to ignore the Z component of the rotation.
        # This code here assumes Euler angles with xyz convention. We first transform
        # to euler, then set the Z component to be equal between the two, and finally
        # transform back into quaternions.
        euler_a = rotations.quat2euler(quat_a)
        euler_b = rotations.quat2euler(quat_b)
        euler_a[2] = euler_b[2]
        quat_a = rotations.euler2quat(euler_a)

    # Subtract quaternions and extract angle between them.
    quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b))
    angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1., 1.))
    d_rot = angle_diff

    return d_rot
Пример #6
0
    def generate_next_action(self,
                             curr_state,
                             end_goal,
                             imagination,
                             env,
                             num_acs=5000,
                             noise=0.0,
                             average=True):

        if average == False:
            num_acs = 3

        if env.name.startswith("fetch_push") or env.name.startswith(
                "fetch_pick"):
            state = env.save_state()
            quaternions = np.tile(state.qpos[-4:],
                                  (len(self.quaternion_invariants), 1))
            quaternion_rot = quat_mul(quaternions, self.quaternion_invariants)
            euler_angles = quat2euler(quaternion_rot)
            scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs(
                euler_angles[:, -1] -
                np.tile(self.prev_angle[-1], len(euler_angles)))
            ind = np.argmin(scores)
            self.prev_angle = euler_angles[ind, :]
            state.qpos[-4:] = quaternion_rot[ind, :]
            state.qvel[-3:] = quat_rot_vec(
                quat_conjugate(self.quaternion_invariants[ind]),
                state.qvel[-3:])
            env.restore_state(state)
            curr_state = env._get_obs()["observation"]

        init_states = np.tile(curr_state, (num_acs, 1))
        init_states += noise * np.random.randn(*init_states.shape)

        _, gen_actions = imagination.test_traj_rand_gan(init_states,
                                                        np.tile(
                                                            end_goal,
                                                            (num_acs, 1)),
                                                        num_steps=1)
        if average:
            return np.mean(gen_actions[:, 0, :], axis=0), 1
        else:
            return gen_actions[0, 0, :], 1
Пример #7
0
def _solve_qp_ik_pos(current_pose,
                     target_pose,
                     jac,
                     joint_pos,
                     joint_lims=None,
                     duration=None,
                     margin=0.2):
    pos_delta = target_pose[:3] - current_pose[:3]
    q_diff = rotations.quat_mul(target_pose[3:],
                                rotations.quat_conjugate(current_pose[3:]))
    ang_delta = rotations.quat2euler(q_diff)
    vel = np.r_[pos_delta, ang_delta * 2.0]
    jac += np.random.uniform(size=jac.shape) * 1e-5
    qvel, optimal = _solve_qp_ik_vel(vel, jac, joint_pos, joint_lims, duration,
                                     margin)
    qvel = np.clip(qvel, -1.0, 1.0)
    if not optimal:
        qvel *= 0.1
    new_q = joint_pos + qvel
    return new_q
Пример #8
0
 def get_ori_diff(self):
     return self.get_finger_ori() - quat2euler(self.sim.data.get_body_xquat("knob_link"))
    def _get_achieved_goal(self):
        """
        Cube center position (3), cube center rotation (4), layer angle (1)
        total 8.
        """
        # Cube center position and rotation (quaternion).
        cube_qpos = self.sim.data.get_site_xpos('rubik:cube_center')
        cube_rot = rotations.mat2quat(
            self.sim.data.get_site_xmat('rubik:cube_center'))

        # Cube ball joint angles
        cube_joint_pos, cube_joint_vel = shadow_get_obs(self.sim, name='rubik')
        assert cube_joint_pos.shape == (8, )

        if self.rot_layer in ["up", "up'"]:
            box_0_0_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[4], 2))
            box_1_0_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[5], 2))
            box_0_1_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[6], 2))
            box_1_1_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[7], 2))

            for data in [
                    box_0_0_1_euler, box_1_0_1_euler, box_0_1_1_euler,
                    box_1_1_1_euler
            ]:
                data[data < -3] += 6.28

            U_x_angle = [
                box_1_1_1_euler[0], box_1_0_1_euler[0], box_0_1_1_euler[0],
                box_0_0_1_euler[0]
            ]
            U_y_angle = [
                box_1_1_1_euler[1], box_1_0_1_euler[1], box_0_1_1_euler[1],
                box_0_0_1_euler[1]
            ]
            U_z_angle = [
                box_1_1_1_euler[2], box_1_0_1_euler[2], box_0_1_1_euler[2],
                box_0_0_1_euler[2]
            ]

            U_bool = ((np.max(U_x_angle) - np.min(U_x_angle)) < 0.1) & \
                     ((np.max(U_y_angle) - np.min(U_y_angle)) < 0.1) & \
                     ((np.max(U_z_angle) - np.min(U_z_angle)) < 0.1)

            if U_bool:
                angle = [U_z_angle[0]]
            else:
                angle = [0]

        elif self.rot_layer in ["right", "right'"]:
            box_0_1_0_euler = rotations.quat2euler(
                np.round(cube_joint_pos[2], 2))
            box_1_1_0_euler = rotations.quat2euler(
                np.round(cube_joint_pos[3], 2))
            box_0_1_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[6], 2))
            box_1_1_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[7], 2))

            for data in [
                    box_0_1_0_euler, box_1_1_0_euler, box_0_1_1_euler,
                    box_1_1_1_euler
            ]:
                data[data < -3] += 6.28

            R_x_angle = [
                box_1_1_1_euler[0], box_1_1_0_euler[0], box_0_1_1_euler[0],
                box_0_1_0_euler[0]
            ]
            R_y_angle = [
                box_1_1_1_euler[1], box_1_1_0_euler[1], box_0_1_1_euler[1],
                box_0_1_0_euler[1]
            ]
            R_z_angle = [
                box_1_1_1_euler[2], box_1_1_0_euler[2], box_0_1_1_euler[2],
                box_0_1_0_euler[2]
            ]

            R_bool = ((np.max(R_x_angle) - np.min(R_x_angle)) < 0.1) & \
                     ((np.max(R_y_angle) - np.min(R_y_angle)) < 0.1) & \
                     ((np.max(R_z_angle) - np.min(R_z_angle)) < 0.1)

            if R_bool:
                angle = [R_y_angle[0]]
            else:
                angle = [0]

        elif self.rot_layer in ["front", "front'"]:
            box_1_0_0_euler = rotations.quat2euler(
                np.round(cube_joint_pos[1], 2))
            box_1_1_0_euler = rotations.quat2euler(
                np.round(cube_joint_pos[3], 2))
            box_1_0_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[5], 2))
            box_1_1_1_euler = rotations.quat2euler(
                np.round(cube_joint_pos[7], 2))

            for data in [
                    box_1_0_0_euler, box_1_1_0_euler, box_1_0_1_euler,
                    box_1_1_1_euler
            ]:
                data[data < -3] += 6.28

            F_x_angle = [
                box_1_0_1_euler[0], box_1_0_0_euler[0], box_1_1_1_euler[0],
                box_1_1_0_euler[0]
            ]
            F_y_angle = [
                box_1_0_1_euler[1], box_1_0_0_euler[1], box_1_1_1_euler[1],
                box_1_1_0_euler[1]
            ]
            F_z_angle = [
                box_1_0_1_euler[2], box_1_0_0_euler[2], box_1_1_1_euler[2],
                box_1_1_0_euler[2]
            ]

            F_bool = ((np.max(F_x_angle) - np.min(F_x_angle)) < 0.1) & \
                     ((np.max(F_y_angle) - np.min(F_y_angle)) < 0.1) & \
                     ((np.max(F_z_angle) - np.min(F_z_angle)) < 0.1)

            if F_bool:
                angle = [F_x_angle[0]]
            else:
                angle = [0]

        cube_qpos = np.concatenate([cube_qpos, cube_rot, angle])
        assert cube_qpos.shape == (8, )
        return cube_qpos
Пример #10
0
 def get_finger_ori(self):
     finger_rel_quat = self.sim.data.get_body_xquat("rightfinger")
     hand_quat = self.sim.data.get_body_xquat("right_hand")
     finger_world_quat = quat_mul(finger_rel_quat, hand_quat)  # TODO: which one at first?
     return quat2euler(finger_world_quat)
Пример #11
0
    def generate_next_action(self,
                             curr_state,
                             end_goal,
                             imagination,
                             env,
                             num_acs=50,
                             max_steps=50,
                             num_copies=100,
                             num_reps=5,
                             num_iterations=2,
                             alpha=1.0,
                             osm_frac=0.0,
                             tol=0.05,
                             return_average=True,
                             noise=0.1):
        if env.name.startswith("fetch_push") or env.name.startswith(
                "fetch_pick"):
            state = env.save_state()
            quaternions = np.tile(state.qpos[-4:],
                                  (len(self.quaternion_invariants), 1))
            quaternion_rot = quat_mul(quaternions, self.quaternion_invariants)
            euler_angles = quat2euler(quaternion_rot)
            scores = np.linalg.norm(euler_angles[:, :2], axis=-1) + np.abs(
                euler_angles[:, -1] -
                np.tile(self.prev_angle[-1], len(euler_angles)))
            ind = np.argmin(scores)
            self.prev_angle = euler_angles[ind, :]
            state.qpos[-4:] = quaternion_rot[ind, :]
            state.qvel[-3:] = quat_rot_vec(
                quat_conjugate(self.quaternion_invariants[ind]),
                state.qvel[-3:])
            env.restore_state(state)
            curr_state = env._get_obs()["observation"]

        gen_states, gen_actions = imagination.test_traj_rand_gan(
            np.tile(curr_state, (num_acs, 1)),
            np.tile(end_goal, (num_acs, 1)),
            num_steps=1,
            frac_replaced_with_osm_pred=0.0)
        curr_states = gen_states[:, 0, :]
        curr_init_actions = gen_actions[:, 0, :]
        start_states = np.repeat(curr_states, num_copies, axis=0)
        end_goals = np.tile(end_goal, (start_states.shape[0], 1))
        num_osms = len(imagination.one_step_model.networks)
        best_action = None

        for it in range(num_iterations):
            inds = np.array_split(np.random.permutation(start_states.shape[0]),
                                  num_osms)
            rep_acs = np.repeat(curr_init_actions, num_copies, axis=0)

            success_fracs = []
            for j in range(num_reps):
                next_states = np.zeros_like(start_states)
                for i in range(num_osms):
                    next_states[inds[i],
                                ...] = imagination.one_step_model.predict(
                                    start_states[inds[i], ...],
                                    rep_acs[inds[i], ...],
                                    osm_ind=i,
                                    normed_input=False)
                gen_states, _ = imagination.test_traj_rand_gan(
                    next_states,
                    end_goals,
                    num_steps=max_steps - 1,
                    frac_replaced_with_osm_pred=osm_frac)
                success_mat = env.batch_goal_achieved(gen_states,
                                                      end_goals[:,
                                                                np.newaxis, :],
                                                      tol=tol)
                success_fracs.append(
                    np.sum(success_mat, axis=-1).reshape(num_acs, num_copies) /
                    max_steps)
            success_fracs = np.concatenate(success_fracs, axis=1)
            pseudo_rewards = np.mean(success_fracs, axis=-1)
            succ_frac = pseudo_rewards.copy()

            if return_average:
                #pseudo_rewards = np.exp(alpha * np.clip((pseudo_rewards - pseudo_rewards.mean()) / (pseudo_rewards.std() + 1e-4), -2.0, 2.0))
                pseudo_rewards = (pseudo_rewards - pseudo_rewards.mean()) / (
                    pseudo_rewards.std() + 1e-4)
                pseudo_rewards = np.exp(
                    alpha * (pseudo_rewards - pseudo_rewards.max()))
                best_action = np.sum(
                    curr_init_actions * pseudo_rewards[:, np.newaxis],
                    axis=0) / np.sum(pseudo_rewards)
            else:
                best_action = curr_init_actions[np.argmax(pseudo_rewards), :]

            curr_init_actions = np.tile(best_action, (num_acs, 1))
            curr_init_actions[1:] = np.clip(
                curr_init_actions[1:] +
                noise * np.random.randn(*curr_init_actions[1:].shape), -1.0,
                1.0)

        return best_action, 1
Пример #12
0
 def get_finger_ori(self):
     return quat2euler(self.sim.data.get_body_xquat("robotwrist_rolllink"))
Пример #13
0
def randomize_ur10_xml(var_mass=0.0, var_damp=0.0, var_fr=0.0, var_grav_x_y=0.0, var_grav_z=0.0, var_body_pos=0.00,
                       var_body_rot=0.0, worker_id=1):

    # parameters:
    #
    # var_mass        : mass variance relative to actual mass
    #
    # var_damp        : damping variance relative to the standard value
    # var_fr          : friction variance relative to the standard value
    #
    # var_grav_x_y    : gravity variance in x- and y-direction (absolute)
    # var_grav_z      : gravity variance in z-direction (absolute)
    #
    # var_body_pos    : variance of body position in m per direction
    # var_body_rot    : variance of body rotation in degree per axis
    # ______________________________________________________________________
    #
    # return          :  offset of car_body to consider for goal position
    #
    # ______________________________________________________________________


    main_xml_temp = os.path.join(*[ur_path, "ur10_assembly_setup_rand_temp_{}.xml".format(worker_id)])
    robot_body_rel = os.path.join("ur10_heg", "ur10_body_heg_rand_temp_{}.xml".format(worker_id))
    car_body_rel = os.path.join("ur10_heg", "car_body_rand_temp_{}.xml".format(worker_id))
    defaults_rel = os.path.join("ur10_heg", "ur10_default_rand_temp_{}.xml".format(worker_id))
    robot_body_xml_temp = os.path.join(ur_path, robot_body_rel)
    car_body_xml_temp = os.path.join(ur_path, car_body_rel)
    defaults_xml_temp = os.path.join(ur_path, defaults_rel)

    # load robot body xml and randomize body masses
    tree = ET.parse(robot_body_xml)
    inertials = []
    for elem in tree.iter():
        if elem.tag == 'inertial':      # find inertial elements in xml
            inertials.append(elem)

    for inertial in inertials:          # change masses
        inertial.attrib['mass'] = str((np.random.uniform(-var_mass, +var_mass) + 1) * float(inertial.attrib['mass']))

    tree.write(robot_body_xml_temp)     # save new xml in temp file to be loaded in gym env

    # load defaults xml and randomize joint damping and surface friction
    #damping_arm_rand = damping_arm * (1 + np.random.uniform(-var_damp, +var_damp))
    #damping_wrist_rand = damping_wrist * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_pan_rand = damping_pan * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_lift_rand = damping_lift * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_elbow_rand = damping_elbow * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist1_rand = damping_wrist1 * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist2_rand = damping_wrist2 * (1 + np.random.uniform(-var_damp, +var_damp))
    damping_wrist3_rand = damping_wrist3 * (1 + np.random.uniform(-var_damp, +var_damp))

    friction_body_rand = friction_body * (1 + np.random.uniform(-var_fr, +var_fr, (3,)))
    friction_heg_rand = friction_heg * (1 + np.random.uniform(-var_fr, +var_fr, (3,)))

    friction_body_string = "{} {} {}".format(*friction_body_rand)
    friction_heg_string = "{} {} {}".format(*friction_heg_rand)

    tree = ET.parse(defaults_xml)

    for default in tree.findall('default'):
        if default.attrib['class'] == 'ur10:pan':
            default.findall('joint')[0].attrib['damping'] = str(damping_pan_rand)
        if default.attrib['class'] == 'ur10:lift':
            default.findall('joint')[0].attrib['damping'] = str(damping_lift_rand)
        if default.attrib['class'] == 'ur10:elbow':
            default.findall('joint')[0].attrib['damping'] = str(damping_elbow_rand)
        if default.attrib['class'] == 'ur10:wrist1':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist1_rand)
        if default.attrib['class'] == 'ur10:wrist2':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist2_rand)
        if default.attrib['class'] == 'ur10:wrist3':
            default.findall('joint')[0].attrib['damping'] = str(damping_wrist3_rand)
        if default.attrib['class'] == 'body':
            default.findall('geom')[0].attrib['friction'] = friction_body_string
        if default.attrib['class'] == 'heg':
            default.findall('geom')[0].attrib['friction'] = friction_heg_string

    tree.write(defaults_xml_temp)

    # load main xml and randomize gravity, also adapt includes to worker_id
    grav_x = np.random.uniform(-var_grav_x_y, var_grav_x_y)
    grav_y = np.random.uniform(-var_grav_x_y, var_grav_x_y)
    grav_z = np.random.uniform(-var_grav_z, var_grav_z) - 9.81
    grav_string = "{} {} {}".format(grav_x, grav_y, grav_z)

    tree = ET.parse(main_xml)
    tree.findall("option")[0].attrib['gravity'] = grav_string

    for include in tree.findall("default")[0].iter("include"):
        include.attrib['file'] = defaults_rel
    world_temp_files = [car_body_rel, robot_body_rel]
    for i, include in enumerate(tree.findall("worldbody")[0].iter("include")):
        include.attrib['file'] = world_temp_files[i]

    tree.write(main_xml_temp)

    # load car body xml and change position (offset is returned by this function)
    body_euler = normalize_rad(rotations.quat2euler(body_quat))
    body_pos_offset = np.random.uniform(-var_body_pos, var_body_pos, (3,))
    body_euler_offset = np.random.uniform(-var_body_rot/180*np.pi, var_body_rot/180*np.pi, (3,))

    body_pos_rand = body_pos + body_pos_offset
    body_euler_rand = body_euler + body_euler_offset
    body_quat_rand = rotations.euler2quat(body_euler_rand)

    body_pos_rand_string = "{} {} {}".format(*body_pos_rand)
    body_quat_rand_string = "{} {} {} {}".format(*body_quat_rand)

    tree = ET.parse(car_body_xml)
    for body in tree.findall('body'):
        if body.attrib['name'] == 'body_link':
            body.attrib['pos'] = body_pos_rand_string
            body.attrib['quat'] = body_quat_rand_string
    tree.write(car_body_xml_temp)

    offset = {
        'body_pos_offset': body_pos_offset,
        'body_euler_offset': body_euler_offset
    }

    return offset
Пример #14
0
 def quat_to_euler(self, quat):
     return rotations.quat2euler(quat)
Пример #15
0
    def _get_obs(self):
        # positions
        grip_pos = self.sim.data.get_site_xpos('robot0:grip')
        dt = self.sim.nsubsteps * self.sim.model.opt.timestep
        grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
        robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
        if self.has_object:
            object_pos = self.sim.data.get_site_xpos('object0')
            # rotations
            object_rot = rotations.mat2euler(
                self.sim.data.get_site_xmat('object0'))
            # velocities
            object_velp = self.sim.data.get_site_xvelp('object0') * dt
            object_velr = self.sim.data.get_site_xvelr('object0') * dt
            # gripper state
            object_rel_pos = object_pos - grip_pos
            object_velp -= grip_velp
        else:
            object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(
                0)
        gripper_state = robot_qpos[-2:]
        gripper_vel = robot_qvel[
            -2:] * dt  # change to a scalar if the gripper is made symmetric

        if not self.has_object:
            achieved_goal = grip_pos.copy()
        else:
            achieved_goal = np.squeeze(object_pos.copy())
        obs = np.concatenate([
            grip_pos,
            object_pos.ravel(),
            object_rel_pos.ravel(),
            gripper_state,
            object_rot.ravel(),
            object_velp.ravel(),
            object_velr.ravel(),
            grip_velp,
            gripper_vel,
        ])

        # obstacle state

        if self.obstacle_added:  # added by ray 4

            if self.first_time_in_loop:
                # First time goes in to init network dimention.
                self.first_time_in_loop = False
                for i in range(self.obstacle_num):
                    obstacle_id = "obstacle_" + str(i)
                    self.used_obstacle_bid_list.append(
                        self.model.body_name2id(obstacle_id))

            all_obstacles_states = np.array([])
            for obstacle_bid in self.used_obstacle_bid_list:
                obstacle_pos = self.model.body_pos[obstacle_bid].ravel().copy()
                obstacle_quat = self.model.body_quat[obstacle_bid].ravel(
                ).copy()
                obstacle_rot = rotations.quat2euler(
                    obstacle_quat).ravel().copy()
                temp_all_obstacles_states = np.concatenate(
                    [obstacle_pos, obstacle_rot])
                all_obstacles_states = np.concatenate(
                    [all_obstacles_states, temp_all_obstacles_states])

            obs = np.concatenate([
                grip_pos,
                object_pos.ravel(),
                object_rel_pos.ravel(),
                gripper_state,
                object_rot.ravel(),
                object_velp.ravel(),
                object_velr.ravel(),
                grip_velp,
                gripper_vel,
                all_obstacles_states.ravel(),
            ])

        return {
            'observation': obs.copy(),
            'achieved_goal': achieved_goal.copy(),
            'desired_goal': self.goal.copy(),
        }