Пример #1
0
    def step(self, a):
        mocap_cycle_dt = self.vel_scale*0.033332
        mocap_data_num = 38
        mocap_cycle_period = mocap_data_num* mocap_cycle_dt

        self.time += self.dt

        local_time = self.time % mocap_cycle_period
        local_time_plus_init = (local_time + self.init_mocap_data_idx*mocap_cycle_dt) % mocap_cycle_period
        cycle_iter = int((self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) / mocap_data_num)
        self.mocap_data_idx = (self.init_mocap_data_idx + int(local_time / mocap_cycle_dt)) % mocap_data_num
        next_idx = self.mocap_data_idx + 1 

        if (cycle_iter != 0) and (self.mocap_data_idx == self.init_mocap_data_idx):
            self.cycle_init_root_pos[0] = self.sim.data.qpos[0]
            self.cycle_init_root_pos[1] = self.sim.data.qpos[1]
        
        target_data_qpos = np.zeros_like(a)
        target_data_qvel = np.zeros_like(a)
        
        target_data_body_delta = np.zeros(3)
        target_data_body_vel = np.zeros(3)

        for i in range(a.size):
            target_data_qpos[i] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,i+8], self.mocap_data[next_idx,i+8], 0.0, 0.0)
            target_data_qvel[i] =  (self.mocap_data[next_idx,i+8] -  self.mocap_data[self.mocap_data_idx,i+8]) / mocap_cycle_dt

        
        if(self.mocap_data_idx >= self.init_mocap_data_idx):
            target_data_body_delta[0] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], self.mocap_data[next_idx,1]-self.mocap_data[self.init_mocap_data_idx,1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], self.mocap_data[next_idx,3]-self.mocap_data[self.init_mocap_data_idx,3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], self.mocap_data[next_idx,2]-self.mocap_data[self.init_mocap_data_idx,2], 0.0, 0.0)
        else:
            target_data_body_delta[0] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[37,1] + self.mocap_data[self.mocap_data_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], self.mocap_data[37,1] + self.mocap_data[next_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], self.mocap_data[next_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], self.mocap_data[next_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], 0.0, 0.0)
        
        target_data_body_vel[0] = (self.mocap_data[next_idx,1] - self.mocap_data[self.mocap_data_idx,1])/mocap_cycle_dt
        target_data_body_vel[1] = (self.mocap_data[next_idx,3] - self.mocap_data[self.mocap_data_idx,3])/mocap_cycle_dt
        target_data_body_vel[2] = (self.mocap_data[next_idx,2] - self.mocap_data[self.mocap_data_idx,2])/mocap_cycle_dt

        # functions.mj_applyFT(self.model, self.data, force, torque, point, self.model.body_name2id("base_link"), self.data.qfrc_applied)
        for i in range(self.frame_skip):
            qpos = self.sim.data.qpos
            qvel = self.sim.data.qvel
            torque = 900*(target_data_qpos + a - qpos[7:]) + 60*(- qvel[6:])
            self.action_buffer[self.buffer_idx,:] = torque
            self.buffer_idx = self.buffer_idx + 1
            if self.buffer_idx == 9:
                self.buffer_idx = 0
            rand_action_idx = randint(1,9)
            self.do_simulation(self.action_buffer[rand_action_idx,:],1)

        qpos = self.sim.data.qpos
        qvel = self.sim.data.qvel

        basequat = self.sim.data.get_body_xquat("Neck_Link")
        basequat_desired = np.array([1,0,0,0])  #self.mocap_data[self.mocap_data_idx,4:8]
        baseQuatError = (1-np.dot(basequat_desired,basequat))

        Tar_Body = self.cycle_init_root_pos+target_data_body_delta

        # self.set_state(
        #     np.concatenate((Tar_Body, basequat_desired, target_data_qpos)),
        #     self.init_qvel + np.concatenate((target_data_body_vel, np.zeros(3), target_data_qvel)),
        # )        
        # self.sim.step()
        
        done_by_contact = False
        self.r_contact = False
        self.l_contact = False
        for i in range(self.sim.data.ncon):
            if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \
                    any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \
                (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id) and \
                    any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)):
                done_by_contact = True
            if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \
                    self.model.geom_bodyid[self.sim.data.contact[i].geom2] == self.model.body_name2id("L_Foot_Link")) or \
                (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id) and \
                    self.model.geom_bodyid[self.sim.data.contact[i].geom1] == self.model.body_name2id("L_Foot_Link")):
                self.l_contact = True
            if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \
                    self.model.geom_bodyid[self.sim.data.contact[i].geom2] == self.model.body_name2id("R_Foot_Link")) or \
                (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id)\
                    and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == self.model.body_name2id("R_Foot_Link")):
                self.r_contact = True

        if (self.mocap_data_idx == 37 or self.mocap_data_idx == 0 or self.mocap_data_idx == 1 or self.mocap_data_idx == 18 or self.mocap_data_idx == 19 or self.mocap_data_idx == 20):
            if (self.r_contact is True and self.l_contact is True):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0
        elif (self.mocap_data_idx <= 18):
            if (self.r_contact is True and self.l_contact is False):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0
        elif (self.mocap_data_idx <= 37):
            if (self.r_contact is False and self.l_contact is True):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0

        qpos_weight = np.asarray([5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0])
        mimic_qpos_reward = 0.4 * exp(-2.0*(np.linalg.norm((qpos_weight*(target_data_qpos - qpos.flat[7:]))**2).mean()))
        mimic_qvel_reward = 0.00 * exp(-0.1*(np.linalg.norm(target_data_qvel - qvel.flat[6:])**2))
        mimic_body_reward = 0.2 * exp(-10*(np.linalg.norm(Tar_Body - qpos.flat[0:3])**2)) 
        mimic_body_orientation_reward = 0.1 * exp(-200*baseQuatError)
        mimic_body_vel_reward = 0.1*exp(-5.0*(np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2)) # 
        reward = mimic_qpos_reward + mimic_qvel_reward + mimic_body_orientation_reward + mimic_body_reward + mimic_body_vel_reward + mimic_contact_reward

        if not done_by_contact:
            self.epi_len += 1
            self.epi_reward += reward
            if self.epi_len == 1000:
                print("Epi len: ", self.epi_len)
            return self._get_obs(), reward, done_by_contact, dict(specific_reward=dict(mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_body_orientation_reward=mimic_body_orientation_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward))
        else:
            mimic_qpos_reward = 0.0
            mimic_qvel_reward = 0.0
            mimic_body_reward = 0.0
            mimic_body_vel_reward = 0.0
            reward = 0.0
            return_epi_len = self.epi_len
            return_epi_reward = self.epi_reward
            print("Epi len: ", return_epi_len)
            return self._get_obs(), reward, done_by_contact, dict(episode=dict(r=return_epi_reward, l=return_epi_len), specific_reward=dict(mimic_qpos_reward=mimic_qpos_reward, mimic_body_orientation_reward=mimic_body_orientation_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_body_reward=mimic_body_reward,mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward))
Пример #2
0
    def step(self, a):
        mocap_cycle_dt = 0.033332
        mocap_data_num = self.mocap_data_num - 1
        mocap_cycle_period = mocap_data_num * mocap_cycle_dt

        # target_vel = (a - self.sim.data.qpos[7:])/self.dt
        # for i in range(self.frame_skip):
        #     qpos = self.sim.data.qpos
        #     qvel = self.sim.data.qvel
        #     torque = 400*(a - qpos[7:]) + 40*(- qvel[6:])
        #     self.do_simulation(torque,1)

        self.time += self.dt

        # kp=900#4900
        # kv=60#140
        # action_size = len(self.action_space.sample())
        # virtual_action_size = action_size + 6
        # target_qvel = (a - self.sim.data.qpos[7:])/ self.dt

        # for timestep in range(self.frame_skip):
        #     qpos = self.sim.data.qpos
        #     qvel = self.sim.data.qvel
        #     MNN_vector = np.zeros(virtual_action_size**2)
        #     mujoco_py.cymj._mj_fullM(self.model, MNN_vector, self.sim.data.qM)
        #     M = MNN_vector.reshape((virtual_action_size, virtual_action_size))
        #     torque = np.matmul(M[6:,6:], kp*(a - qpos[7:]) + kv* (- qvel[6:])) + self.sim.data.qfrc_bias[6:]
        #     self.do_simulation(torque, 1)

        local_time = self.time % mocap_cycle_period
        local_time_plus_init = (local_time + self.init_mocap_data_idx *
                                mocap_cycle_dt) % mocap_cycle_period
        cycle_iter = int(
            (self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) /
            mocap_data_num)
        self.mocap_data_idx = (self.init_mocap_data_idx + int(
            local_time / mocap_cycle_dt)) % mocap_data_num
        next_idx = self.mocap_data_idx + 1

        if (cycle_iter != 0) and (self.mocap_data_idx
                                  == self.init_mocap_data_idx):
            self.cycle_init_root_pos[0] = self.sim.data.qpos[0]
            self.cycle_init_root_pos[1] = self.sim.data.qpos[1]

        target_data_qpos = np.zeros_like(a)
        target_data_qvel = np.zeros_like(a)
        Tar_EE_COM = np.zeros((4, 3))
        target_data_body_delta = np.zeros(3)
        target_data_body_vel = np.zeros(3)
        target_com = np.zeros(3)

        for i in range(a.size):
            target_data_qpos[i] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx,
                                i + 8], self.mocap_data[next_idx,
                                                        i + 8], 0.0, 0.0)
            target_data_qvel[i] = (
                self.mocap_data[next_idx, i + 8] -
                self.mocap_data[self.mocap_data_idx, i + 8]) / mocap_cycle_dt

        if (self.mocap_data_idx >= self.init_mocap_data_idx):
            target_data_body_delta[0] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1],
                self.mocap_data[next_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3],
                self.mocap_data[next_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2],
                self.mocap_data[next_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0)
        else:
            target_data_body_delta[0] = cubic(
                local_time, self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_num - 2, 1] +
                self.mocap_data[self.mocap_data_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1],
                self.mocap_data[self.mocap_data_num - 2, 1] +
                self.mocap_data[next_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(
                local_time, self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3],
                self.mocap_data[next_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(
                local_time, self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[self.mocap_data_num - 2, 0] +
                self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2],
                self.mocap_data[next_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0)

        target_data_body_vel[0] = (
            self.mocap_data[next_idx, 1] -
            self.mocap_data[self.mocap_data_idx, 1]) / mocap_cycle_dt
        target_data_body_vel[1] = (
            self.mocap_data[next_idx, 3] -
            self.mocap_data[self.mocap_data_idx, 3]) / mocap_cycle_dt
        target_data_body_vel[2] = (
            self.mocap_data[next_idx, 2] -
            self.mocap_data[self.mocap_data_idx, 2]) / mocap_cycle_dt

        # for ee_idx in range(4):
        #     for cartesian_idx in range(3):
        #         data_type = 8 + a.size + 3*ee_idx + cartesian_idx
        #         Tar_EE_COM[ee_idx,cartesian_idx] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,data_type], self.mocap_data[next_idx,data_type] , 0.0, 0.0)

        # for i in range(3):
        #     data_type = 8 + a.size + Tar_EE_COM.size + i
        #     target_com[i] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,data_type], self.mocap_data[next_idx,data_type] , 0.0, 0.0)

        qpos = self.sim.data.qpos
        qvel = self.sim.data.qvel

        com_pos = self.sim.data.subtree_com[0]

        basequat = Quaternion(self.sim.data.get_body_xquat("base_link"))
        basequat_conj = basequat.conjugate
        basepos = self.get_body_com("base_link")
        EE_CoM = np.concatenate((basequat_conj.rotate(self.get_body_com("R_AnkleCenter_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("L_AnkleCenter_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("R_Wrist1_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("L_Wrist1_Link") - basepos)))
        basequat_desired = Quaternion(self.mocap_data[self.mocap_data_idx,
                                                      4:8])
        basequat = Quaternion(qpos[3:7])
        baseQuatError = (basequat_desired * basequat.conjugate).angle

        Tar_Body = self.cycle_init_root_pos + target_data_body_delta
        Tar_COM = Tar_Body + target_com

        self.set_state(
            np.concatenate(
                (Tar_Body, basequat_desired.elements, target_data_qpos)),
            self.init_qvel + np.concatenate(
                (target_data_body_vel, np.zeros(3), target_data_qvel)),
        )
        self.sim.step()
        print("Idx: ", self.mocap_data_idx)

        # for i in range(self.frame_skip):
        #     qpos = self.sim.data.qpos
        #     qvel = self.sim.data.qvel
        #     torque = 400*(target_data_qpos - qpos[7:]) + 40*(target_data_qvel- qvel[6:])
        #     self.do_simulation(torque,1)

        mimic_qpos_reward = 0.55 * exp(
            -2.0 * (np.linalg.norm(target_data_qpos - qpos.flat[7:])**2))
        mimic_qvel_reward = 0.05 * exp(
            -0.1 * (np.linalg.norm(target_data_qvel - qvel.flat[6:])**2))
        mimic_ee_reward = 0.1 * exp(
            -40 * (np.linalg.norm(EE_CoM - Tar_EE_COM.flatten())**2))
        mimic_body_reward = 0.2 * exp(
            -10 * (np.linalg.norm(Tar_Body - qpos.flat[0:3])**2 +
                   0.5 * baseQuatError**2))
        mimic_body_vel_reward = 0.1 * exp(
            -10 *
            (np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2))  #
        reward = mimic_qpos_reward + mimic_qvel_reward + mimic_ee_reward + mimic_body_reward + mimic_body_vel_reward

        done_by_contact = False
        if self.done_init is False:
            done_by_contact = False
            self.done_init = True
        else:
            for i in range(self.sim.data.ncon):
                if (self.sim.data.contact[i].geom1 == 0 and  any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \
                    (self.sim.data.contact[i].geom2 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)):
                    done_by_contact = True
                    break

        if not done_by_contact:
            self.epi_len += 1
            self.epi_reward += reward
            return self._get_obs(), reward, done_by_contact, dict(
                specific_reward=dict(
                    mimic_qpos_reward=mimic_qpos_reward,
                    mimic_qvel_reward=mimic_qvel_reward,
                    mimic_ee_reward=mimic_ee_reward,
                    mimic_body_reward=mimic_body_reward,
                    mimic_body_vel_reward=mimic_body_vel_reward))
        else:
            mimic_qpos_reward = 0.0
            mimic_qvel_reward = 0.0
            mimic_ee_reward = 0.0
            mimic_body_reward = 0.0
            mimic_body_vel_reward = 0.0
            reward = 0.0
            return_epi_len = self.epi_len
            return_epi_reward = self.epi_reward
            return self._get_obs(), reward, done_by_contact, dict(
                episode=dict(r=return_epi_reward, l=return_epi_len),
                specific_reward=dict(
                    mimic_qpos_reward=mimic_qpos_reward,
                    mimic_qvel_reward=mimic_qvel_reward,
                    mimic_ee_reward=mimic_ee_reward,
                    mimic_body_reward=mimic_body_reward,
                    mimic_body_vel_reward=mimic_body_vel_reward))
Пример #3
0
    def step(self, a):
        mocap_cycle_dt = 0.033332
        mocap_data_num = 38
        mocap_cycle_period = mocap_data_num * mocap_cycle_dt

        self.time += self.dt

        local_time = self.time % mocap_cycle_period
        local_time_plus_init = (local_time + self.init_mocap_data_idx *
                                mocap_cycle_dt) % mocap_cycle_period
        cycle_iter = int(
            (self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) /
            mocap_data_num)
        self.mocap_data_idx = (self.init_mocap_data_idx + int(
            local_time / mocap_cycle_dt)) % mocap_data_num
        next_idx = self.mocap_data_idx + 1

        if (cycle_iter != 0) and (self.mocap_data_idx
                                  == self.init_mocap_data_idx):
            self.cycle_init_root_pos[0] = self.sim.data.qpos[0]
            self.cycle_init_root_pos[1] = self.sim.data.qpos[1]

        target_data_qpos = np.zeros_like(a)
        target_data_qvel = np.zeros_like(a)
        Tar_EE_COM = np.zeros((4, 3))
        target_data_body_delta = np.zeros(3)
        target_data_body_vel = np.zeros(3)

        for i in range(a.size):
            target_data_qpos[i] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx,
                                i + 8], self.mocap_data[next_idx,
                                                        i + 8], 0.0, 0.0)
            target_data_qvel[i] = (
                self.mocap_data[next_idx, i + 8] -
                self.mocap_data[self.mocap_data_idx, i + 8]) / mocap_cycle_dt

        if (self.mocap_data_idx >= self.init_mocap_data_idx):
            target_data_body_delta[0] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1],
                self.mocap_data[next_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3],
                self.mocap_data[next_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(
                local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[next_idx,
                                0], self.mocap_data[self.mocap_data_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2],
                self.mocap_data[next_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0)
        else:
            target_data_body_delta[0] = cubic(
                local_time, self.mocap_data[37, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[37, 0] + self.mocap_data[next_idx, 0],
                self.mocap_data[37, 1] +
                self.mocap_data[self.mocap_data_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1],
                self.mocap_data[37, 1] + self.mocap_data[next_idx, 1] -
                self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0)
            target_data_body_delta[1] = cubic(
                local_time, self.mocap_data[37, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[37, 0] + self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3],
                self.mocap_data[next_idx, 3] -
                self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0)
            target_data_body_delta[2] = cubic(
                local_time, self.mocap_data[37, 0] +
                self.mocap_data[self.mocap_data_idx, 0],
                self.mocap_data[37, 0] + self.mocap_data[next_idx, 0],
                self.mocap_data[self.mocap_data_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2],
                self.mocap_data[next_idx, 2] -
                self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0)

        target_data_body_vel[0] = (
            self.mocap_data[next_idx, 1] -
            self.mocap_data[self.mocap_data_idx, 1]) / mocap_cycle_dt
        target_data_body_vel[1] = (
            self.mocap_data[next_idx, 3] -
            self.mocap_data[self.mocap_data_idx, 3]) / mocap_cycle_dt
        target_data_body_vel[2] = (
            self.mocap_data[next_idx, 2] -
            self.mocap_data[self.mocap_data_idx, 2]) / mocap_cycle_dt

        for ee_idx in range(4):
            for cartesian_idx in range(3):
                data_type = 8 + a.size + 3 * ee_idx + cartesian_idx
                Tar_EE_COM[ee_idx, cartesian_idx] = cubic(
                    local_time_plus_init, self.mocap_data[self.mocap_data_idx,
                                                          0],
                    self.mocap_data[next_idx,
                                    0], self.mocap_data[self.mocap_data_idx,
                                                        data_type],
                    self.mocap_data[next_idx, data_type], 0.0, 0.0)

        for i in range(self.frame_skip):
            qpos = self.sim.data.qpos
            qvel = self.sim.data.qvel
            torque = 400 * (target_data_qpos + a - qpos[7:]) + 40 * (-qvel[6:])
            self.do_simulation(torque, 1)

        qpos = self.sim.data.qpos
        qvel = self.sim.data.qvel

        basequat = Quaternion(self.sim.data.get_body_xquat("base_link"))
        basequat_conj = basequat.conjugate
        basepos = self.get_body_com("base_link")
        EE_CoM = np.concatenate((basequat_conj.rotate(self.get_body_com("R_AnkleCenter_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("L_AnkleCenter_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("R_Wrist1_Link") - basepos), \
                basequat_conj.rotate(self.get_body_com("L_Wrist1_Link") - basepos)))
        basequat_desired = Quaternion(
            [1, 0, 0,
             0])  #Quaternion(self.mocap_data[self.mocap_data_idx,4:8])
        basequat = Quaternion(qpos[3:7])
        baseQuatError = (basequat_desired * basequat.conjugate).angle

        Tar_Body = self.cycle_init_root_pos + target_data_body_delta

        # self.set_state(
        #     np.concatenate((Tar_Body, basequat_desired.elements, target_data_qpos)),
        #     self.init_qvel + np.concatenate((target_data_body_vel, np.zeros(3), target_data_qvel)),
        # )
        # self.sim.step()

        # for i in range(self.frame_skip):
        #     qpos = self.sim.data.qpos
        #     qvel = self.sim.data.qvel
        #     torque = 400*(target_data_qpos - qpos[7:]) + 40*(target_data_qvel- qvel[6:])
        #     self.do_simulation(torque,1)

        done_by_contact = False
        self.r_contact = False
        self.l_contact = False
        if self.done_init is False:
            done_by_contact = False
            self.done_init = True
        else:
            for i in range(self.sim.data.ncon):
                if (self.sim.data.contact[i].geom1 == 0 and  any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \
                    (self.sim.data.contact[i].geom2 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)):
                    done_by_contact = True
                if (self.sim.data.contact[i].geom1 == 0 and  self.model.geom_bodyid[self.sim.data.contact[i].geom2] == 8) or \
                    (self.sim.data.contact[i].geom2 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == 8):
                    self.r_contact = True
                if (self.sim.data.contact[i].geom1 == 0 and  self.model.geom_bodyid[self.sim.data.contact[i].geom2] == 15) or \
                    (self.sim.data.contact[i].geom2 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == 15):
                    self.l_contact = True

        if (self.mocap_data_idx == 37 or self.mocap_data_idx == 0
                or self.mocap_data_idx == 1 or self.mocap_data_idx == 18
                or self.mocap_data_idx == 19 or self.mocap_data_idx == 20):
            if (self.r_contact is True and self.l_contact is True):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0
        elif (self.mocap_data_idx <= 18):
            if (self.r_contact is True and self.l_contact is False):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0
        elif (self.mocap_data_idx <= 37):
            if (self.r_contact is False and self.l_contact is True):
                mimic_contact_reward = 0.2
            else:
                mimic_contact_reward = 0.0

        mimic_qpos_reward = 0.4 * exp(-2.0 * (np.linalg.norm(
            (target_data_qpos - qpos.flat[7:])**2).mean()))
        mimic_qvel_reward = 0.00 * exp(
            -0.1 * (np.linalg.norm(target_data_qvel - qvel.flat[6:])**2))
        mimic_ee_reward = 0.1 * exp(
            -40 * (np.linalg.norm(EE_CoM - Tar_EE_COM.flatten())**2))
        mimic_body_reward = 0.2 * exp(
            -10 * (np.linalg.norm(Tar_Body - qpos.flat[0:3])**2 +
                   0.5 * baseQuatError**2))
        mimic_body_vel_reward = 0.1 * exp(
            -10 *
            (np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2))  #
        reward = mimic_qpos_reward + mimic_qvel_reward + mimic_ee_reward + mimic_body_reward + mimic_body_vel_reward + mimic_contact_reward

        if not done_by_contact:
            self.epi_len += 1
            self.epi_reward += reward
            return self._get_obs(), reward, done_by_contact, dict(
                specific_reward=dict(
                    mimic_qpos_reward=mimic_qpos_reward,
                    mimic_qvel_reward=mimic_qvel_reward,
                    mimic_ee_reward=mimic_ee_reward,
                    mimic_body_reward=mimic_body_reward,
                    mimic_body_vel_reward=mimic_body_vel_reward,
                    mimic_contact_reward=mimic_contact_reward))
        else:
            mimic_qpos_reward = 0.0
            mimic_qvel_reward = 0.0
            mimic_ee_reward = 0.0
            mimic_body_reward = 0.0
            mimic_body_vel_reward = 0.0
            reward = 0.0
            return_epi_len = self.epi_len
            return_epi_reward = self.epi_reward
            return self._get_obs(), reward, done_by_contact, dict(
                episode=dict(r=return_epi_reward, l=return_epi_len),
                specific_reward=dict(
                    mimic_qpos_reward=mimic_qpos_reward,
                    mimic_qvel_reward=mimic_qvel_reward,
                    mimic_ee_reward=mimic_ee_reward,
                    mimic_body_reward=mimic_body_reward,
                    mimic_body_vel_reward=mimic_body_vel_reward,
                    mimic_contact_reward=mimic_contact_reward))