def reset(self, init_pos=None): # Reset env variables self.step_ctr = 0 self.episodes += 1 self.used_energy = np.zeros((self.act_dim)) # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.12 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 # Init_quat self.rnd_yaw = np.random.rand() * 0.0 - 0.0 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(10): self.sim.forward() self.sim.step() # self.render() # time.sleep(3) obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: print("Obstacle difficulty level: {}".format(self.difficulty)) self.generate() time.sleep(0.1) if self.episode_reward >= self.average_episode_reward: self.difficulty += 0.01 else: self.difficulty = np.maximum(1, self.difficulty - 0.01) self.episode_reward = 0 self.average_episode_reward = self.average_episode_reward * 0.1 + self.average_episode_reward * 0.99 self.viewer = None self.env_name = self.env_list[np.random.randint(0, len(self.env_list))] path = Hexapod.MODELPATH + self.env_name + ".xml" self.model = mujoco_py.load_model_from_path(path) self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.02 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 31 self.act_dim = self.sim.data.actuator_length.shape[0] + self.mem_dim # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0 # np.random.rand() * 4 - 4 init_q[1] = 0 # np.random.rand() * 8 - 4 init_q[2] = 0.15 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos # Init_quat self.rnd_yaw = np.random.randn() * 0.3 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(20): self.sim.forward() self.sim.step() obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def step(self, ctrl): ctrl = self.scale_action(ctrl) self.sim.data.ctrl[:] = ctrl self.sim.forward() self.sim.step() self.step_ctr += 1 obs = self.get_obs() # Angle deviation x, y, z, qw, qx, qy, qz = obs[:7] xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6] # Reward conditions target_vel = 0.25 t_x = target_vel * np.cos(self.target_yaw) t_y = target_vel * np.sin(self.target_yaw) velocity_rew_x = 1. / (abs(xd - t_x) + 1.) - 1. / (t_x + 1.) velocity_rew_y = 1. / (abs(yd - t_y) + 1.) - 1. / (t_y + 1.) roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz]) yaw_pen = np.min((abs((yaw % 6.183) - (self.target_yaw % 6.183)), abs(yaw - self.target_yaw))) r_pos = velocity_rew_x * 6 + velocity_rew_y * 6 r_neg = np.square(roll) * 0.5 + \ np.square(pitch) * 0.5 + \ np.square(zd) * 0.5 + \ np.square(yaw_pen) * 0.2 r_neg = np.clip(r_neg, 0, 1) * 1. r_pos = np.clip(r_pos, -2, 2) r = r_pos - r_neg self.episode_reward += r # Reevaluate termination condition done = self.step_ctr > self.max_steps contacts = (np.abs( np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19 ]])).sum(axis=1) > 0.05).astype( np.float32) obs = np.concatenate([ self.scale_joints(self.sim.get_state().qpos.tolist()[7:]), self.sim.get_state().qvel.tolist()[6:], self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y], [self.target_yaw], contacts ]) self.model.body_quat[20] = my_utils.rpy_to_quat(0, 0, self.target_yaw) self.model.body_pos[20] = [x, y, 1] return obs, r, done, None
def reset(self, init_pos = None): # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.10 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 # Init_quat self.rnd_yaw = np.random.rand() * 1 - 0.5 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat self.goal_xy_1 = [np.random.rand() * .7 + .5, np.random.rand() * 2 - 1.] self.goal_xy_2 = [np.random.rand() * .7 + .5 + self.goal_xy_1[0], np.random.rand() * 2 - 1.] self.goal_xy_3 = [np.random.rand() * .7 + .5 + self.goal_xy_2[0], np.random.rand() * 2 - 1.] self.goal_xy_4 = [np.random.rand() * .7 + .5 + self.goal_xy_3[0], np.random.rand() * 2 - 1.] self.goal_xy_5 = [np.random.rand() * .7 + .5 + self.goal_xy_4[0], np.random.rand() * 2 - 1.] self.goal_list = [self.goal_xy_1,self.goal_xy_2,self.goal_xy_3,self.goal_xy_4,self.goal_xy_5] # Set environment state self.set_state(init_q, init_qvel) self.current_goal_idx = 0 self.goal_A = self.goal_list[self.current_goal_idx] self.goal_B = self.goal_list[self.current_goal_idx + 1] for i in range(30): self.sim.forward() self.sim.step() self.prev_xy = [0, 0] self.model.body_pos[20] = [*self.goal_A, 0] self.model.body_pos[21] = [*self.goal_B, 0] tar_angle = np.arctan2(self.goal_A[1] - 0, self.goal_A[0] - 0) self.prev_deviation = np.min((abs((self.rnd_yaw % 6.183) - (tar_angle % 6.183)), abs(self.rnd_yaw - tar_angle))) obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): # Set episode targets self.target_criteria_norm = np.random.rand( len(self.criteria_pen_range_dict)) * 2 - 1 # self.target_criteria_dict = {} for i, (k, v) in enumerate(self.criteria_pen_range_dict.items()): self.target_criteria_dict[k] = ( (self.target_criteria_norm[i] + 1) * 0.5) * abs(v[1] - v[0]) + v[0] # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = -7.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.10 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos # Init_quat self.rnd_yaw = np.random.randn() * 0.0 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(30): self.sim.forward() self.sim.step() obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: self.generate_hybrid_env(self.n_envs, self.specific_env_len * self.n_envs) time.sleep(0.3) self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 18 * 2 + 6 + 4 + 6 + 1 self.act_dim = self.sim.data.actuator_length.shape[0] if self.use_HF: self.obs_dim += self.HF_width * self.HF_length # Reset env variables self.step_ctr = 0 self.episodes = 0 if self.use_HF: self.hf_data = self.model.hfield_data self.hf_ncol = self.model.hfield_ncol[0] self.hf_nrow = self.model.hfield_nrow[0] self.hf_column_meters = self.model.hfield_size[0][0] * 2 self.hf_row_meters = self.model.hfield_size[0][1] * 2 self.hf_grid = self.hf_data.reshape((self.hf_nrow, self.hf_ncol)) self.hf_grid_aug = np.zeros((self.hf_nrow * 2, self.hf_ncol * 2)) self.hf_grid_aug[int(self.hf_nrow / 2):self.hf_nrow + int(self.hf_nrow / 2), int(self.hf_ncol / 2):self.hf_ncol + int(self.hf_ncol / 2)] = self.hf_grid self.pixels_per_column = self.hf_ncol / float( self.hf_column_meters) self.pixels_per_row = self.hf_nrow / float(self.hf_row_meters) self.x_offset = 0.3 self.y_offset = 0.6 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.10 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos self.vel_sum = 0 # Init_quat self.rnd_yaw = 0 # np.random.rand() * 2. - 1 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(30): self.sim.forward() self.sim.step() obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: self.generate_hybrid_env(self.n_envs, self.max_steps) time.sleep(0.1) self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.02 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 18 * 2 + 6 + 4 + 6 self.act_dim = self.sim.data.actuator_length.shape[0] if self.use_HF: self.obs_dim += self.HF_width * self.HF_length # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.10 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos # Init_quat self.rnd_yaw = np.random.randn() * 0.2 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(40): self.sim.forward() self.sim.step() # self.render() # time.sleep(3) obs, _, _, _ = self.step(np.zeros(self.act_dim)) #x,y = self.sim.get_state().qpos.tolist()[:2] #print("x,y: ", x , y) #test_patch = self.get_local_hf(x,y) return obs
def step(self, ctrl): if self.mem_dim == 0: mem = np.zeros(0) act = ctrl ctrl = self.scale_action(act) else: mem = ctrl[-self.mem_dim:] act = ctrl[:-self.mem_dim] ctrl = self.scale_action(act) x, y, z, q0, q1, q2, q3 = self.get_obs()[:7] prev_dist_to_goal = np.sqrt( np.square(self.goal_xy[0] - x) + np.square(self.goal_xy[1] - y)) siny_cosp = 2.0 * (q0 * q3 + q1 * q2) cosy_cosp = 1.0 - 2.0 * (q2 * q2 + q3 * q3) yaw_prev = np.arctan2(siny_cosp, cosy_cosp) self.sim.data.ctrl[:] = ctrl self.sim.forward() self.sim.step() self.step_ctr += 1 obs = self.get_obs() obs_dict = self.get_obs_dict() x, y, z, q0, q1, q2, q3 = obs[:7] curr_dist_to_goal = np.sqrt( np.square(self.goal_xy[0] - x) + np.square(self.goal_xy[1] - y)) xd, yd, zd, _, _, _ = self.sim.get_state().qvel.tolist()[:6] #roll, pitch, yaw = my_utils.quat_to_rpy((q0,q1,q2,q3)) #print(roll, pitch, yaw) siny_cosp = 2.0 * (q0 * q3 + q1 * q2) cosy_cosp = 1.0 - 2.0 * (q2 * q2 + q3 * q3) yaw = np.arctan2(siny_cosp, cosy_cosp) # Calculate target angle to goal tar_angle = np.arctan2(self.goal_xy[1] - y, self.goal_xy[0] - x) # Reward conditions goal_velocity = (prev_dist_to_goal - curr_dist_to_goal) * 50 target_velocity = 0.3 velocity_rew = 1. / (abs(goal_velocity - target_velocity) + 1.) - 1. / (target_velocity + 1.) direction_rew = abs(tar_angle - yaw_prev) - abs(tar_angle - yaw) direction_pen = abs(tar_angle - yaw) rV = (velocity_rew * 3.0, goal_velocity * 0, direction_rew * 0, -direction_pen * .0, -np.square(ctrl).sum() * 0.00) #,- (np.square(pitch) * 1. + np.square(roll) * 1. + np.square(zd) * 1.) * self.goal_level * int(self.step_ctr > 15)) r = sum(rV) r = np.clip(r, -3, 3) obs_dict['rV'] = rV # Reevaluate termination condition done = self.step_ctr > self.max_steps # or (abs(angle) > 2.4 and self.step_ctr > 30) or abs(y) > 0.5 or x < -0.2 obs = np.concatenate([ self.sim.get_state().qpos.tolist()[2:], self.sim.get_state().qvel.tolist(), obs_dict["contacts"], [self.goal_xy[0] - x, self.goal_xy[1] - y], mem ]) self.model.body_quat[21] = my_utils.rpy_to_quat(0, 0, yaw) self.model.body_pos[21] = [x, y, 1] return obs, r, done, obs_dict
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: self.envs, self.size_list, self.scaled_indeces_list = self.generate_hybrid_env( self.n_envs, self.max_steps) time.sleep(0.2) self.current_env_idx = 0 self.current_env = self.envs[self.current_env_idx] self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.02 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 18 * 2 + 6 + 4 + 6 self.act_dim = self.sim.data.actuator_length.shape[0] + self.n_envs + 1 if self.use_HF: self.obs_dim += self.HF_width * self.HF_length # Reset env variables self.step_ctr = 0 self.episodes = 0 if self.use_HF: self.hf_data = self.model.hfield_data self.hf_ncol = self.model.hfield_ncol[0] self.hf_nrow = self.model.hfield_nrow[0] self.hf_column_meters = self.model.hfield_size[0][0] * 2 self.hf_row_meters = self.model.hfield_size[0][1] * 2 self.hf_grid = self.hf_data.reshape((self.hf_nrow, self.hf_ncol)) self.hf_grid_aug = np.zeros((self.hf_nrow * 2, self.hf_ncol * 2)) self.hf_grid_aug[int(self.hf_nrow / 2):self.hf_nrow + int(self.hf_nrow / 2), int(self.hf_ncol / 2):self.hf_ncol + int(self.hf_ncol / 2)] = self.hf_grid self.pixels_per_column = self.hf_ncol / float( self.hf_column_meters) self.pixels_per_row = self.hf_nrow / float(self.hf_row_meters) self.x_offset = 0.3 self.y_offset = 0.6 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.10 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos # Init_quat self.rnd_yaw = np.random.randn() * 0.2 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat # Set environment state self.set_state(init_q, init_qvel) for i in range(40): self.sim.forward() self.sim.step() # self.render() # time.sleep(3) self.obs = np.zeros(self.obs_dim) self.obs, _, _, _ = self.step(np.zeros(self.act_dim)) #x,y = self.sim.get_state().qpos.tolist()[:2] #print("x,y: ", x , y) #test_patch = self.get_local_hf(x,y) return self.obs
def reset(self, init_pos = None): if np.random.rand() < self.env_change_prob: self.generate_hybrid_env(self.n_envs, self.specific_env_len * self.n_envs) time.sleep(0.3) self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.02 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 18 * 2 + 6 + 4 + 6 + 1 self.act_dim = self.sim.data.actuator_length.shape[0] if self.use_HF: self.obs_dim += self.HF_width * self.HF_length # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[0] = 0.0 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.18 init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1 if init_pos is not None: init_q[0:3] += init_pos self.vel_sum = 0 # Init_quat if self.rnd_init_yaw: self.rnd_yaw = np.random.rand() * 1.6 - 0.8 else: self.rnd_yaw = 0 rnd_quat = my_utils.rpy_to_quat(0,0,self.rnd_yaw) init_q[3:7] = rnd_quat self.prev_deviation = np.min((abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0))) self.prev_y_deviation = 0 # Set environment state self.set_state(init_q, init_qvel) for i in range(20): self.sim.forward() self.sim.step() obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: self.generate_hybrid_env(self.n_envs, self.specific_env_len * self.n_envs) time.sleep(0.1) self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) # # with open(Hexapod.MODELPATH + "limited.xml", "r") as in_file: # buf = in_file.readlines() # # with open(Hexapod.MODELPATH + self.ID + ".xml", "w") as out_file: # for line in buf: # if line.startswith(' <position joint='): # out_file.write(' <hfield name="hill" file="{}.png" size="{} 1.2 {} 0.1" /> \n '.format(self.ID, self.env_scaling * self.n_envs, 0.6 * height_SF)) # else: # out_file.write(line) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.003 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 22 + self.use_contacts * 6 self.act_dim = self.sim.data.actuator_length.shape[0] # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.zeros(self.q_dim, dtype=np.float32) init_q[7:] = self.scale_action([0] * 18) init_q[0] = 0.2 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.3 init_qvel = np.zeros(self.qvel_dim) # TODO: Change initial q if init_pos is not None: init_q[0:3] += init_pos self.vel_sum = 0 # Init_quat if self.rnd_init_yaw: self.rnd_yaw = np.random.rand() * 1. - 0.5 else: self.rnd_yaw = 0 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat self.prev_deviation = np.min( (abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0))) self.prev_y_deviation = 0 # Set environment state self.set_state(init_q, init_qvel) ctr = 0 while True: ctr += 1 self.sim.forward() self.sim.step() #self.render() #time.sleep(0.01) if np.max(self.sim.get_state().qvel.tolist() [0:7]) < 0.15 and ctr > 10: break obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs
def reset(self, init_pos=None): if np.random.rand() < self.env_change_prob: self.generate_hybrid_env(self.n_envs, self.specific_env_len * self.n_envs) time.sleep(0.1) self.viewer = None path = Hexapod.MODELPATH + "{}.xml".format(self.ID) while True: try: self.model = mujoco_py.load_model_from_path(path) break except Exception: pass self.sim = mujoco_py.MjSim(self.model) self.model.opt.timestep = 0.01 # Environment dimensions self.q_dim = self.sim.get_state().qpos.shape[0] self.qvel_dim = self.sim.get_state().qvel.shape[0] self.obs_dim = 22 + self.use_contacts * 6 self.act_dim = self.sim.data.actuator_length.shape[0] # Reset env variables self.step_ctr = 0 self.episodes = 0 # Sample initial configuration init_q = np.concatenate((np.zeros(7), np.tile( [0, -0.5, 0.5], 6))) #np.random.randn(self.q_dim).astype(np.float32) * 0.0 init_q[0] = 0.2 # np.random.rand() * 4 - 4 init_q[1] = 0.0 # np.random.rand() * 8 - 4 init_q[2] = 0.2 init_qvel = np.zeros(self.qvel_dim) if init_pos is not None: init_q[0:3] += init_pos self.vel_sum = 0 # Init_quat if self.rnd_init_yaw: self.rnd_yaw = np.random.rand() * 1. - 0.5 else: self.rnd_yaw = 0 rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw) init_q[3:7] = rnd_quat self.prev_deviation = np.min( (abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0))) self.prev_y_deviation = 0 # Set environment state self.set_state(init_q, init_qvel) while True: self.sim.forward() self.sim.step() if np.max(self.sim.get_state().qvel.tolist()[0:7]) < 0.1: break obs, _, _, _ = self.step(np.zeros(self.act_dim)) return obs