def step(self, a): mocap_cycle_dt = self.vel_scale*0.033332 mocap_data_num = 38 mocap_cycle_period = mocap_data_num* mocap_cycle_dt self.time += self.dt local_time = self.time % mocap_cycle_period local_time_plus_init = (local_time + self.init_mocap_data_idx*mocap_cycle_dt) % mocap_cycle_period cycle_iter = int((self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) / mocap_data_num) self.mocap_data_idx = (self.init_mocap_data_idx + int(local_time / mocap_cycle_dt)) % mocap_data_num next_idx = self.mocap_data_idx + 1 if (cycle_iter != 0) and (self.mocap_data_idx == self.init_mocap_data_idx): self.cycle_init_root_pos[0] = self.sim.data.qpos[0] self.cycle_init_root_pos[1] = self.sim.data.qpos[1] target_data_qpos = np.zeros_like(a) target_data_qvel = np.zeros_like(a) target_data_body_delta = np.zeros(3) target_data_body_vel = np.zeros(3) for i in range(a.size): target_data_qpos[i] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,i+8], self.mocap_data[next_idx,i+8], 0.0, 0.0) target_data_qvel[i] = (self.mocap_data[next_idx,i+8] - self.mocap_data[self.mocap_data_idx,i+8]) / mocap_cycle_dt if(self.mocap_data_idx >= self.init_mocap_data_idx): target_data_body_delta[0] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], self.mocap_data[next_idx,1]-self.mocap_data[self.init_mocap_data_idx,1], 0.0, 0.0) target_data_body_delta[1] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], self.mocap_data[next_idx,3]-self.mocap_data[self.init_mocap_data_idx,3], 0.0, 0.0) target_data_body_delta[2] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], self.mocap_data[next_idx,2]-self.mocap_data[self.init_mocap_data_idx,2], 0.0, 0.0) else: target_data_body_delta[0] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[37,1] + self.mocap_data[self.mocap_data_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], self.mocap_data[37,1] + self.mocap_data[next_idx,1] - self.mocap_data[self.init_mocap_data_idx,1], 0.0, 0.0) target_data_body_delta[1] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], self.mocap_data[next_idx,3] - self.mocap_data[self.init_mocap_data_idx,3], 0.0, 0.0) target_data_body_delta[2] = cubic(local_time, self.mocap_data[37,0] + self.mocap_data[self.mocap_data_idx,0], self.mocap_data[37,0] + self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], self.mocap_data[next_idx,2] - self.mocap_data[self.init_mocap_data_idx,2], 0.0, 0.0) target_data_body_vel[0] = (self.mocap_data[next_idx,1] - self.mocap_data[self.mocap_data_idx,1])/mocap_cycle_dt target_data_body_vel[1] = (self.mocap_data[next_idx,3] - self.mocap_data[self.mocap_data_idx,3])/mocap_cycle_dt target_data_body_vel[2] = (self.mocap_data[next_idx,2] - self.mocap_data[self.mocap_data_idx,2])/mocap_cycle_dt # functions.mj_applyFT(self.model, self.data, force, torque, point, self.model.body_name2id("base_link"), self.data.qfrc_applied) for i in range(self.frame_skip): qpos = self.sim.data.qpos qvel = self.sim.data.qvel torque = 900*(target_data_qpos + a - qpos[7:]) + 60*(- qvel[6:]) self.action_buffer[self.buffer_idx,:] = torque self.buffer_idx = self.buffer_idx + 1 if self.buffer_idx == 9: self.buffer_idx = 0 rand_action_idx = randint(1,9) self.do_simulation(self.action_buffer[rand_action_idx,:],1) qpos = self.sim.data.qpos qvel = self.sim.data.qvel basequat = self.sim.data.get_body_xquat("Neck_Link") basequat_desired = np.array([1,0,0,0]) #self.mocap_data[self.mocap_data_idx,4:8] baseQuatError = (1-np.dot(basequat_desired,basequat)) Tar_Body = self.cycle_init_root_pos+target_data_body_delta # self.set_state( # np.concatenate((Tar_Body, basequat_desired, target_data_qpos)), # self.init_qvel + np.concatenate((target_data_body_vel, np.zeros(3), target_data_qvel)), # ) # self.sim.step() done_by_contact = False self.r_contact = False self.l_contact = False for i in range(self.sim.data.ncon): if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \ any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \ (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id) and \ any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)): done_by_contact = True if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \ self.model.geom_bodyid[self.sim.data.contact[i].geom2] == self.model.body_name2id("L_Foot_Link")) or \ (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id) and \ self.model.geom_bodyid[self.sim.data.contact[i].geom1] == self.model.body_name2id("L_Foot_Link")): self.l_contact = True if (any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == ground_id for ground_id in self.ground_id) and \ self.model.geom_bodyid[self.sim.data.contact[i].geom2] == self.model.body_name2id("R_Foot_Link")) or \ (any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == ground_id for ground_id in self.ground_id)\ and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == self.model.body_name2id("R_Foot_Link")): self.r_contact = True if (self.mocap_data_idx == 37 or self.mocap_data_idx == 0 or self.mocap_data_idx == 1 or self.mocap_data_idx == 18 or self.mocap_data_idx == 19 or self.mocap_data_idx == 20): if (self.r_contact is True and self.l_contact is True): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 elif (self.mocap_data_idx <= 18): if (self.r_contact is True and self.l_contact is False): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 elif (self.mocap_data_idx <= 37): if (self.r_contact is False and self.l_contact is True): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 qpos_weight = np.asarray([5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 3.0, 1.0, 1.0, 1.0, 1.0, 5.0, 5.0, 5.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0]) mimic_qpos_reward = 0.4 * exp(-2.0*(np.linalg.norm((qpos_weight*(target_data_qpos - qpos.flat[7:]))**2).mean())) mimic_qvel_reward = 0.00 * exp(-0.1*(np.linalg.norm(target_data_qvel - qvel.flat[6:])**2)) mimic_body_reward = 0.2 * exp(-10*(np.linalg.norm(Tar_Body - qpos.flat[0:3])**2)) mimic_body_orientation_reward = 0.1 * exp(-200*baseQuatError) mimic_body_vel_reward = 0.1*exp(-5.0*(np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2)) # reward = mimic_qpos_reward + mimic_qvel_reward + mimic_body_orientation_reward + mimic_body_reward + mimic_body_vel_reward + mimic_contact_reward if not done_by_contact: self.epi_len += 1 self.epi_reward += reward if self.epi_len == 1000: print("Epi len: ", self.epi_len) return self._get_obs(), reward, done_by_contact, dict(specific_reward=dict(mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_body_orientation_reward=mimic_body_orientation_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward)) else: mimic_qpos_reward = 0.0 mimic_qvel_reward = 0.0 mimic_body_reward = 0.0 mimic_body_vel_reward = 0.0 reward = 0.0 return_epi_len = self.epi_len return_epi_reward = self.epi_reward print("Epi len: ", return_epi_len) return self._get_obs(), reward, done_by_contact, dict(episode=dict(r=return_epi_reward, l=return_epi_len), specific_reward=dict(mimic_qpos_reward=mimic_qpos_reward, mimic_body_orientation_reward=mimic_body_orientation_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_body_reward=mimic_body_reward,mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward))
def step(self, a): mocap_cycle_dt = 0.033332 mocap_data_num = self.mocap_data_num - 1 mocap_cycle_period = mocap_data_num * mocap_cycle_dt # target_vel = (a - self.sim.data.qpos[7:])/self.dt # for i in range(self.frame_skip): # qpos = self.sim.data.qpos # qvel = self.sim.data.qvel # torque = 400*(a - qpos[7:]) + 40*(- qvel[6:]) # self.do_simulation(torque,1) self.time += self.dt # kp=900#4900 # kv=60#140 # action_size = len(self.action_space.sample()) # virtual_action_size = action_size + 6 # target_qvel = (a - self.sim.data.qpos[7:])/ self.dt # for timestep in range(self.frame_skip): # qpos = self.sim.data.qpos # qvel = self.sim.data.qvel # MNN_vector = np.zeros(virtual_action_size**2) # mujoco_py.cymj._mj_fullM(self.model, MNN_vector, self.sim.data.qM) # M = MNN_vector.reshape((virtual_action_size, virtual_action_size)) # torque = np.matmul(M[6:,6:], kp*(a - qpos[7:]) + kv* (- qvel[6:])) + self.sim.data.qfrc_bias[6:] # self.do_simulation(torque, 1) local_time = self.time % mocap_cycle_period local_time_plus_init = (local_time + self.init_mocap_data_idx * mocap_cycle_dt) % mocap_cycle_period cycle_iter = int( (self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) / mocap_data_num) self.mocap_data_idx = (self.init_mocap_data_idx + int( local_time / mocap_cycle_dt)) % mocap_data_num next_idx = self.mocap_data_idx + 1 if (cycle_iter != 0) and (self.mocap_data_idx == self.init_mocap_data_idx): self.cycle_init_root_pos[0] = self.sim.data.qpos[0] self.cycle_init_root_pos[1] = self.sim.data.qpos[1] target_data_qpos = np.zeros_like(a) target_data_qvel = np.zeros_like(a) Tar_EE_COM = np.zeros((4, 3)) target_data_body_delta = np.zeros(3) target_data_body_vel = np.zeros(3) target_com = np.zeros(3) for i in range(a.size): target_data_qpos[i] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, i + 8], self.mocap_data[next_idx, i + 8], 0.0, 0.0) target_data_qvel[i] = ( self.mocap_data[next_idx, i + 8] - self.mocap_data[self.mocap_data_idx, i + 8]) / mocap_cycle_dt if (self.mocap_data_idx >= self.init_mocap_data_idx): target_data_body_delta[0] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], self.mocap_data[next_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0) target_data_body_delta[1] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], self.mocap_data[next_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0) target_data_body_delta[2] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], self.mocap_data[next_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0) else: target_data_body_delta[0] = cubic( local_time, self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_num - 2, 1] + self.mocap_data[self.mocap_data_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], self.mocap_data[self.mocap_data_num - 2, 1] + self.mocap_data[next_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0) target_data_body_delta[1] = cubic( local_time, self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], self.mocap_data[next_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0) target_data_body_delta[2] = cubic( local_time, self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[self.mocap_data_num - 2, 0] + self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], self.mocap_data[next_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0) target_data_body_vel[0] = ( self.mocap_data[next_idx, 1] - self.mocap_data[self.mocap_data_idx, 1]) / mocap_cycle_dt target_data_body_vel[1] = ( self.mocap_data[next_idx, 3] - self.mocap_data[self.mocap_data_idx, 3]) / mocap_cycle_dt target_data_body_vel[2] = ( self.mocap_data[next_idx, 2] - self.mocap_data[self.mocap_data_idx, 2]) / mocap_cycle_dt # for ee_idx in range(4): # for cartesian_idx in range(3): # data_type = 8 + a.size + 3*ee_idx + cartesian_idx # Tar_EE_COM[ee_idx,cartesian_idx] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,data_type], self.mocap_data[next_idx,data_type] , 0.0, 0.0) # for i in range(3): # data_type = 8 + a.size + Tar_EE_COM.size + i # target_com[i] = cubic(local_time_plus_init, self.mocap_data[self.mocap_data_idx,0], self.mocap_data[next_idx,0], self.mocap_data[self.mocap_data_idx,data_type], self.mocap_data[next_idx,data_type] , 0.0, 0.0) qpos = self.sim.data.qpos qvel = self.sim.data.qvel com_pos = self.sim.data.subtree_com[0] basequat = Quaternion(self.sim.data.get_body_xquat("base_link")) basequat_conj = basequat.conjugate basepos = self.get_body_com("base_link") EE_CoM = np.concatenate((basequat_conj.rotate(self.get_body_com("R_AnkleCenter_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("L_AnkleCenter_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("R_Wrist1_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("L_Wrist1_Link") - basepos))) basequat_desired = Quaternion(self.mocap_data[self.mocap_data_idx, 4:8]) basequat = Quaternion(qpos[3:7]) baseQuatError = (basequat_desired * basequat.conjugate).angle Tar_Body = self.cycle_init_root_pos + target_data_body_delta Tar_COM = Tar_Body + target_com self.set_state( np.concatenate( (Tar_Body, basequat_desired.elements, target_data_qpos)), self.init_qvel + np.concatenate( (target_data_body_vel, np.zeros(3), target_data_qvel)), ) self.sim.step() print("Idx: ", self.mocap_data_idx) # for i in range(self.frame_skip): # qpos = self.sim.data.qpos # qvel = self.sim.data.qvel # torque = 400*(target_data_qpos - qpos[7:]) + 40*(target_data_qvel- qvel[6:]) # self.do_simulation(torque,1) mimic_qpos_reward = 0.55 * exp( -2.0 * (np.linalg.norm(target_data_qpos - qpos.flat[7:])**2)) mimic_qvel_reward = 0.05 * exp( -0.1 * (np.linalg.norm(target_data_qvel - qvel.flat[6:])**2)) mimic_ee_reward = 0.1 * exp( -40 * (np.linalg.norm(EE_CoM - Tar_EE_COM.flatten())**2)) mimic_body_reward = 0.2 * exp( -10 * (np.linalg.norm(Tar_Body - qpos.flat[0:3])**2 + 0.5 * baseQuatError**2)) mimic_body_vel_reward = 0.1 * exp( -10 * (np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2)) # reward = mimic_qpos_reward + mimic_qvel_reward + mimic_ee_reward + mimic_body_reward + mimic_body_vel_reward done_by_contact = False if self.done_init is False: done_by_contact = False self.done_init = True else: for i in range(self.sim.data.ncon): if (self.sim.data.contact[i].geom1 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \ (self.sim.data.contact[i].geom2 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)): done_by_contact = True break if not done_by_contact: self.epi_len += 1 self.epi_reward += reward return self._get_obs(), reward, done_by_contact, dict( specific_reward=dict( mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_ee_reward=mimic_ee_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward)) else: mimic_qpos_reward = 0.0 mimic_qvel_reward = 0.0 mimic_ee_reward = 0.0 mimic_body_reward = 0.0 mimic_body_vel_reward = 0.0 reward = 0.0 return_epi_len = self.epi_len return_epi_reward = self.epi_reward return self._get_obs(), reward, done_by_contact, dict( episode=dict(r=return_epi_reward, l=return_epi_len), specific_reward=dict( mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_ee_reward=mimic_ee_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward))
def step(self, a): mocap_cycle_dt = 0.033332 mocap_data_num = 38 mocap_cycle_period = mocap_data_num * mocap_cycle_dt self.time += self.dt local_time = self.time % mocap_cycle_period local_time_plus_init = (local_time + self.init_mocap_data_idx * mocap_cycle_dt) % mocap_cycle_period cycle_iter = int( (self.init_mocap_data_idx + int(self.time / mocap_cycle_dt)) / mocap_data_num) self.mocap_data_idx = (self.init_mocap_data_idx + int( local_time / mocap_cycle_dt)) % mocap_data_num next_idx = self.mocap_data_idx + 1 if (cycle_iter != 0) and (self.mocap_data_idx == self.init_mocap_data_idx): self.cycle_init_root_pos[0] = self.sim.data.qpos[0] self.cycle_init_root_pos[1] = self.sim.data.qpos[1] target_data_qpos = np.zeros_like(a) target_data_qvel = np.zeros_like(a) Tar_EE_COM = np.zeros((4, 3)) target_data_body_delta = np.zeros(3) target_data_body_vel = np.zeros(3) for i in range(a.size): target_data_qpos[i] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, i + 8], self.mocap_data[next_idx, i + 8], 0.0, 0.0) target_data_qvel[i] = ( self.mocap_data[next_idx, i + 8] - self.mocap_data[self.mocap_data_idx, i + 8]) / mocap_cycle_dt if (self.mocap_data_idx >= self.init_mocap_data_idx): target_data_body_delta[0] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], self.mocap_data[next_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0) target_data_body_delta[1] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], self.mocap_data[next_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0) target_data_body_delta[2] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], self.mocap_data[next_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0) else: target_data_body_delta[0] = cubic( local_time, self.mocap_data[37, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[37, 0] + self.mocap_data[next_idx, 0], self.mocap_data[37, 1] + self.mocap_data[self.mocap_data_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], self.mocap_data[37, 1] + self.mocap_data[next_idx, 1] - self.mocap_data[self.init_mocap_data_idx, 1], 0.0, 0.0) target_data_body_delta[1] = cubic( local_time, self.mocap_data[37, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[37, 0] + self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], self.mocap_data[next_idx, 3] - self.mocap_data[self.init_mocap_data_idx, 3], 0.0, 0.0) target_data_body_delta[2] = cubic( local_time, self.mocap_data[37, 0] + self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[37, 0] + self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], self.mocap_data[next_idx, 2] - self.mocap_data[self.init_mocap_data_idx, 2], 0.0, 0.0) target_data_body_vel[0] = ( self.mocap_data[next_idx, 1] - self.mocap_data[self.mocap_data_idx, 1]) / mocap_cycle_dt target_data_body_vel[1] = ( self.mocap_data[next_idx, 3] - self.mocap_data[self.mocap_data_idx, 3]) / mocap_cycle_dt target_data_body_vel[2] = ( self.mocap_data[next_idx, 2] - self.mocap_data[self.mocap_data_idx, 2]) / mocap_cycle_dt for ee_idx in range(4): for cartesian_idx in range(3): data_type = 8 + a.size + 3 * ee_idx + cartesian_idx Tar_EE_COM[ee_idx, cartesian_idx] = cubic( local_time_plus_init, self.mocap_data[self.mocap_data_idx, 0], self.mocap_data[next_idx, 0], self.mocap_data[self.mocap_data_idx, data_type], self.mocap_data[next_idx, data_type], 0.0, 0.0) for i in range(self.frame_skip): qpos = self.sim.data.qpos qvel = self.sim.data.qvel torque = 400 * (target_data_qpos + a - qpos[7:]) + 40 * (-qvel[6:]) self.do_simulation(torque, 1) qpos = self.sim.data.qpos qvel = self.sim.data.qvel basequat = Quaternion(self.sim.data.get_body_xquat("base_link")) basequat_conj = basequat.conjugate basepos = self.get_body_com("base_link") EE_CoM = np.concatenate((basequat_conj.rotate(self.get_body_com("R_AnkleCenter_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("L_AnkleCenter_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("R_Wrist1_Link") - basepos), \ basequat_conj.rotate(self.get_body_com("L_Wrist1_Link") - basepos))) basequat_desired = Quaternion( [1, 0, 0, 0]) #Quaternion(self.mocap_data[self.mocap_data_idx,4:8]) basequat = Quaternion(qpos[3:7]) baseQuatError = (basequat_desired * basequat.conjugate).angle Tar_Body = self.cycle_init_root_pos + target_data_body_delta # self.set_state( # np.concatenate((Tar_Body, basequat_desired.elements, target_data_qpos)), # self.init_qvel + np.concatenate((target_data_body_vel, np.zeros(3), target_data_qvel)), # ) # self.sim.step() # for i in range(self.frame_skip): # qpos = self.sim.data.qpos # qvel = self.sim.data.qvel # torque = 400*(target_data_qpos - qpos[7:]) + 40*(target_data_qvel- qvel[6:]) # self.do_simulation(torque,1) done_by_contact = False self.r_contact = False self.l_contact = False if self.done_init is False: done_by_contact = False self.done_init = True else: for i in range(self.sim.data.ncon): if (self.sim.data.contact[i].geom1 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom2] == collisioncheckid for collisioncheckid in self.collision_check_id)) or \ (self.sim.data.contact[i].geom2 == 0 and any(self.model.geom_bodyid[self.sim.data.contact[i].geom1] == collisioncheckid for collisioncheckid in self.collision_check_id)): done_by_contact = True if (self.sim.data.contact[i].geom1 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom2] == 8) or \ (self.sim.data.contact[i].geom2 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == 8): self.r_contact = True if (self.sim.data.contact[i].geom1 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom2] == 15) or \ (self.sim.data.contact[i].geom2 == 0 and self.model.geom_bodyid[self.sim.data.contact[i].geom1] == 15): self.l_contact = True if (self.mocap_data_idx == 37 or self.mocap_data_idx == 0 or self.mocap_data_idx == 1 or self.mocap_data_idx == 18 or self.mocap_data_idx == 19 or self.mocap_data_idx == 20): if (self.r_contact is True and self.l_contact is True): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 elif (self.mocap_data_idx <= 18): if (self.r_contact is True and self.l_contact is False): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 elif (self.mocap_data_idx <= 37): if (self.r_contact is False and self.l_contact is True): mimic_contact_reward = 0.2 else: mimic_contact_reward = 0.0 mimic_qpos_reward = 0.4 * exp(-2.0 * (np.linalg.norm( (target_data_qpos - qpos.flat[7:])**2).mean())) mimic_qvel_reward = 0.00 * exp( -0.1 * (np.linalg.norm(target_data_qvel - qvel.flat[6:])**2)) mimic_ee_reward = 0.1 * exp( -40 * (np.linalg.norm(EE_CoM - Tar_EE_COM.flatten())**2)) mimic_body_reward = 0.2 * exp( -10 * (np.linalg.norm(Tar_Body - qpos.flat[0:3])**2 + 0.5 * baseQuatError**2)) mimic_body_vel_reward = 0.1 * exp( -10 * (np.linalg.norm(target_data_body_vel - qvel.flat[0:3])**2)) # reward = mimic_qpos_reward + mimic_qvel_reward + mimic_ee_reward + mimic_body_reward + mimic_body_vel_reward + mimic_contact_reward if not done_by_contact: self.epi_len += 1 self.epi_reward += reward return self._get_obs(), reward, done_by_contact, dict( specific_reward=dict( mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_ee_reward=mimic_ee_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward)) else: mimic_qpos_reward = 0.0 mimic_qvel_reward = 0.0 mimic_ee_reward = 0.0 mimic_body_reward = 0.0 mimic_body_vel_reward = 0.0 reward = 0.0 return_epi_len = self.epi_len return_epi_reward = self.epi_reward return self._get_obs(), reward, done_by_contact, dict( episode=dict(r=return_epi_reward, l=return_epi_len), specific_reward=dict( mimic_qpos_reward=mimic_qpos_reward, mimic_qvel_reward=mimic_qvel_reward, mimic_ee_reward=mimic_ee_reward, mimic_body_reward=mimic_body_reward, mimic_body_vel_reward=mimic_body_vel_reward, mimic_contact_reward=mimic_contact_reward))