示例#1
0
    def reset(self, init_pos=None):
        # Reset env variables
        self.step_ctr = 0
        self.episodes += 1
        self.used_energy = np.zeros((self.act_dim))

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.12
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        # Init_quat
        self.rnd_yaw = np.random.rand() * 0.0 - 0.0
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(10):
            self.sim.forward()
            self.sim.step()

        # self.render()
        # time.sleep(3)

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
    def reset(self, init_pos=None):

        if np.random.rand() < self.env_change_prob:
            print("Obstacle difficulty level: {}".format(self.difficulty))
            self.generate()
            time.sleep(0.1)

        if self.episode_reward >= self.average_episode_reward:
            self.difficulty += 0.01
        else:
            self.difficulty = np.maximum(1, self.difficulty - 0.01)

        self.episode_reward = 0
        self.average_episode_reward = self.average_episode_reward * 0.1 + self.average_episode_reward * 0.99

        self.viewer = None
        self.env_name = self.env_list[np.random.randint(0, len(self.env_list))]

        path = Hexapod.MODELPATH + self.env_name + ".xml"
        self.model = mujoco_py.load_model_from_path(path)
        self.sim = mujoco_py.MjSim(self.model)

        self.model.opt.timestep = 0.02

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 31
        self.act_dim = self.sim.data.actuator_length.shape[0] + self.mem_dim

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0  # np.random.rand() * 4 - 4
        init_q[1] = 0  # np.random.rand() * 8 - 4
        init_q[2] = 0.15
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        # Init_quat
        self.rnd_yaw = np.random.randn() * 0.3
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(20):
            self.sim.forward()
            self.sim.step()

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
    def step(self, ctrl):

        ctrl = self.scale_action(ctrl)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()

        # Angle deviation
        x, y, z, qw, qx, qy, qz = obs[:7]
        xd, yd, zd, thd, phid, psid = self.sim.get_state().qvel.tolist()[:6]

        # Reward conditions
        target_vel = 0.25
        t_x = target_vel * np.cos(self.target_yaw)
        t_y = target_vel * np.sin(self.target_yaw)

        velocity_rew_x = 1. / (abs(xd - t_x) + 1.) - 1. / (t_x + 1.)
        velocity_rew_y = 1. / (abs(yd - t_y) + 1.) - 1. / (t_y + 1.)

        roll, pitch, yaw = my_utils.quat_to_rpy([qw, qx, qy, qz])

        yaw_pen = np.min((abs((yaw % 6.183) - (self.target_yaw % 6.183)),
                          abs(yaw - self.target_yaw)))

        r_pos = velocity_rew_x * 6 + velocity_rew_y * 6
        r_neg = np.square(roll) * 0.5 + \
                np.square(pitch) * 0.5 + \
                np.square(zd) * 0.5 + \
                np.square(yaw_pen) * 0.2

        r_neg = np.clip(r_neg, 0, 1) * 1.
        r_pos = np.clip(r_pos, -2, 2)
        r = r_pos - r_neg
        self.episode_reward += r

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps

        contacts = (np.abs(
            np.array(self.sim.data.cfrc_ext[[4, 7, 10, 13, 16, 19
                                             ]])).sum(axis=1) > 0.05).astype(
                                                 np.float32)

        obs = np.concatenate([
            self.scale_joints(self.sim.get_state().qpos.tolist()[7:]),
            self.sim.get_state().qvel.tolist()[6:],
            self.sim.get_state().qvel.tolist()[:6], [roll, pitch, yaw, y],
            [self.target_yaw], contacts
        ])

        self.model.body_quat[20] = my_utils.rpy_to_quat(0, 0, self.target_yaw)
        self.model.body_pos[20] = [x, y, 1]

        return obs, r, done, None
示例#4
0
    def reset(self, init_pos = None):
        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0 # np.random.rand() * 4 - 4
        init_q[1] = 0.0 # np.random.rand() * 8 - 4
        init_q[2] = 0.10
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        # Init_quat
        self.rnd_yaw = np.random.rand() * 1 - 0.5
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        self.goal_xy_1 = [np.random.rand() * .7 + .5, np.random.rand() * 2 - 1.]
        self.goal_xy_2 = [np.random.rand() * .7 + .5 + self.goal_xy_1[0], np.random.rand() * 2 - 1.]
        self.goal_xy_3 = [np.random.rand() * .7 + .5 + self.goal_xy_2[0], np.random.rand() * 2 - 1.]
        self.goal_xy_4 = [np.random.rand() * .7 + .5 + self.goal_xy_3[0], np.random.rand() * 2 - 1.]
        self.goal_xy_5 = [np.random.rand() * .7 + .5 + self.goal_xy_4[0], np.random.rand() * 2 - 1.]

        self.goal_list = [self.goal_xy_1,self.goal_xy_2,self.goal_xy_3,self.goal_xy_4,self.goal_xy_5]

        # Set environment state
        self.set_state(init_q, init_qvel)

        self.current_goal_idx = 0
        self.goal_A = self.goal_list[self.current_goal_idx]
        self.goal_B = self.goal_list[self.current_goal_idx + 1]

        for i in range(30):
            self.sim.forward()
            self.sim.step()

        self.prev_xy = [0, 0]
        self.model.body_pos[20] = [*self.goal_A, 0]
        self.model.body_pos[21] = [*self.goal_B, 0]

        tar_angle = np.arctan2(self.goal_A[1] - 0, self.goal_A[0] - 0)
        self.prev_deviation = np.min((abs((self.rnd_yaw % 6.183) - (tar_angle % 6.183)), abs(self.rnd_yaw - tar_angle)))

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
示例#5
0
    def reset(self, init_pos=None):

        # Set episode targets
        self.target_criteria_norm = np.random.rand(
            len(self.criteria_pen_range_dict)) * 2 - 1  #
        self.target_criteria_dict = {}
        for i, (k, v) in enumerate(self.criteria_pen_range_dict.items()):
            self.target_criteria_dict[k] = (
                (self.target_criteria_norm[i] + 1) * 0.5) * abs(v[1] -
                                                                v[0]) + v[0]

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = -7.0  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.10
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        # Init_quat
        self.rnd_yaw = np.random.randn() * 0.0
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(30):
            self.sim.forward()
            self.sim.step()

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
    def reset(self, init_pos=None):
        if np.random.rand() < self.env_change_prob:
            self.generate_hybrid_env(self.n_envs,
                                     self.specific_env_len * self.n_envs)
            time.sleep(0.3)

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 18 * 2 + 6 + 4 + 6 + 1
        self.act_dim = self.sim.data.actuator_length.shape[0]

        if self.use_HF:
            self.obs_dim += self.HF_width * self.HF_length

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        if self.use_HF:
            self.hf_data = self.model.hfield_data
            self.hf_ncol = self.model.hfield_ncol[0]
            self.hf_nrow = self.model.hfield_nrow[0]
            self.hf_column_meters = self.model.hfield_size[0][0] * 2
            self.hf_row_meters = self.model.hfield_size[0][1] * 2
            self.hf_grid = self.hf_data.reshape((self.hf_nrow, self.hf_ncol))
            self.hf_grid_aug = np.zeros((self.hf_nrow * 2, self.hf_ncol * 2))
            self.hf_grid_aug[int(self.hf_nrow /
                                 2):self.hf_nrow + int(self.hf_nrow / 2),
                             int(self.hf_ncol / 2):self.hf_ncol +
                             int(self.hf_ncol / 2)] = self.hf_grid
            self.pixels_per_column = self.hf_ncol / float(
                self.hf_column_meters)
            self.pixels_per_row = self.hf_nrow / float(self.hf_row_meters)
            self.x_offset = 0.3
            self.y_offset = 0.6

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.10
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        self.vel_sum = 0

        # Init_quat
        self.rnd_yaw = 0  # np.random.rand() * 2. - 1
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(30):
            self.sim.forward()
            self.sim.step()

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
示例#7
0
    def reset(self, init_pos=None):
        if np.random.rand() < self.env_change_prob:
            self.generate_hybrid_env(self.n_envs, self.max_steps)
            time.sleep(0.1)

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)

        self.model.opt.timestep = 0.02

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 18 * 2 + 6 + 4 + 6
        self.act_dim = self.sim.data.actuator_length.shape[0]

        if self.use_HF:
            self.obs_dim += self.HF_width * self.HF_length

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.10
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        # Init_quat
        self.rnd_yaw = np.random.randn() * 0.2
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(40):
            self.sim.forward()
            self.sim.step()

        # self.render()
        # time.sleep(3)

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        #x,y = self.sim.get_state().qpos.tolist()[:2]
        #print("x,y: ", x , y)
        #test_patch = self.get_local_hf(x,y)

        return obs
    def step(self, ctrl):
        if self.mem_dim == 0:
            mem = np.zeros(0)
            act = ctrl
            ctrl = self.scale_action(act)
        else:
            mem = ctrl[-self.mem_dim:]
            act = ctrl[:-self.mem_dim]
            ctrl = self.scale_action(act)

        x, y, z, q0, q1, q2, q3 = self.get_obs()[:7]
        prev_dist_to_goal = np.sqrt(
            np.square(self.goal_xy[0] - x) + np.square(self.goal_xy[1] - y))

        siny_cosp = 2.0 * (q0 * q3 + q1 * q2)
        cosy_cosp = 1.0 - 2.0 * (q2 * q2 + q3 * q3)
        yaw_prev = np.arctan2(siny_cosp, cosy_cosp)

        self.sim.data.ctrl[:] = ctrl
        self.sim.forward()
        self.sim.step()
        self.step_ctr += 1

        obs = self.get_obs()
        obs_dict = self.get_obs_dict()

        x, y, z, q0, q1, q2, q3 = obs[:7]
        curr_dist_to_goal = np.sqrt(
            np.square(self.goal_xy[0] - x) + np.square(self.goal_xy[1] - y))

        xd, yd, zd, _, _, _ = self.sim.get_state().qvel.tolist()[:6]

        #roll, pitch, yaw = my_utils.quat_to_rpy((q0,q1,q2,q3))
        #print(roll, pitch, yaw)

        siny_cosp = 2.0 * (q0 * q3 + q1 * q2)
        cosy_cosp = 1.0 - 2.0 * (q2 * q2 + q3 * q3)
        yaw = np.arctan2(siny_cosp, cosy_cosp)

        # Calculate target angle to goal
        tar_angle = np.arctan2(self.goal_xy[1] - y, self.goal_xy[0] - x)

        # Reward conditions
        goal_velocity = (prev_dist_to_goal - curr_dist_to_goal) * 50

        target_velocity = 0.3
        velocity_rew = 1. / (abs(goal_velocity - target_velocity) +
                             1.) - 1. / (target_velocity + 1.)
        direction_rew = abs(tar_angle - yaw_prev) - abs(tar_angle - yaw)
        direction_pen = abs(tar_angle - yaw)

        rV = (velocity_rew * 3.0, goal_velocity * 0, direction_rew * 0,
              -direction_pen * .0, -np.square(ctrl).sum() * 0.00)
        #,- (np.square(pitch) * 1. + np.square(roll) * 1. + np.square(zd) * 1.) * self.goal_level * int(self.step_ctr > 15))

        r = sum(rV)
        r = np.clip(r, -3, 3)
        obs_dict['rV'] = rV

        # Reevaluate termination condition
        done = self.step_ctr > self.max_steps  # or (abs(angle) > 2.4 and self.step_ctr > 30) or abs(y) > 0.5 or x < -0.2

        obs = np.concatenate([
            self.sim.get_state().qpos.tolist()[2:],
            self.sim.get_state().qvel.tolist(), obs_dict["contacts"],
            [self.goal_xy[0] - x, self.goal_xy[1] - y], mem
        ])

        self.model.body_quat[21] = my_utils.rpy_to_quat(0, 0, yaw)
        self.model.body_pos[21] = [x, y, 1]

        return obs, r, done, obs_dict
示例#9
0
    def reset(self, init_pos=None):
        if np.random.rand() < self.env_change_prob:
            self.envs, self.size_list, self.scaled_indeces_list = self.generate_hybrid_env(
                self.n_envs, self.max_steps)
            time.sleep(0.2)

        self.current_env_idx = 0
        self.current_env = self.envs[self.current_env_idx]

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)

        self.model.opt.timestep = 0.02

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 18 * 2 + 6 + 4 + 6
        self.act_dim = self.sim.data.actuator_length.shape[0] + self.n_envs + 1

        if self.use_HF:
            self.obs_dim += self.HF_width * self.HF_length

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        if self.use_HF:
            self.hf_data = self.model.hfield_data
            self.hf_ncol = self.model.hfield_ncol[0]
            self.hf_nrow = self.model.hfield_nrow[0]
            self.hf_column_meters = self.model.hfield_size[0][0] * 2
            self.hf_row_meters = self.model.hfield_size[0][1] * 2
            self.hf_grid = self.hf_data.reshape((self.hf_nrow, self.hf_ncol))
            self.hf_grid_aug = np.zeros((self.hf_nrow * 2, self.hf_ncol * 2))
            self.hf_grid_aug[int(self.hf_nrow /
                                 2):self.hf_nrow + int(self.hf_nrow / 2),
                             int(self.hf_ncol / 2):self.hf_ncol +
                             int(self.hf_ncol / 2)] = self.hf_grid
            self.pixels_per_column = self.hf_ncol / float(
                self.hf_column_meters)
            self.pixels_per_row = self.hf_nrow / float(self.hf_row_meters)
            self.x_offset = 0.3
            self.y_offset = 0.6

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.10
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        # Init_quat
        self.rnd_yaw = np.random.randn() * 0.2
        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(40):
            self.sim.forward()
            self.sim.step()

        # self.render()
        # time.sleep(3)

        self.obs = np.zeros(self.obs_dim)
        self.obs, _, _, _ = self.step(np.zeros(self.act_dim))

        #x,y = self.sim.get_state().qpos.tolist()[:2]
        #print("x,y: ", x , y)
        #test_patch = self.get_local_hf(x,y)

        return self.obs
示例#10
0
    def reset(self, init_pos = None):

        if np.random.rand() < self.env_change_prob:
            self.generate_hybrid_env(self.n_envs, self.specific_env_len * self.n_envs)
            time.sleep(0.3)

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)
        self.model.opt.timestep = 0.02

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 18 * 2 + 6 + 4 + 6 + 1
        self.act_dim = self.sim.data.actuator_length.shape[0]

        if self.use_HF:
            self.obs_dim += self.HF_width * self.HF_length

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0


        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[0] = 0.0 # np.random.rand() * 4 - 4
        init_q[1] = 0.0 # np.random.rand() * 8 - 4
        init_q[2] = 0.18
        init_qvel = np.random.randn(self.qvel_dim).astype(np.float32) * 0.1

        if init_pos is not None:
            init_q[0:3] += init_pos

        self.vel_sum = 0

        # Init_quat
        if self.rnd_init_yaw:
            self.rnd_yaw = np.random.rand() * 1.6 - 0.8
        else:
            self.rnd_yaw = 0

        rnd_quat = my_utils.rpy_to_quat(0,0,self.rnd_yaw)
        init_q[3:7] = rnd_quat

        self.prev_deviation = np.min((abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0)))
        self.prev_y_deviation = 0

        # Set environment state
        self.set_state(init_q, init_qvel)

        for i in range(20):
            self.sim.forward()
            self.sim.step()

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
示例#11
0
    def reset(self, init_pos=None):
        if np.random.rand() < self.env_change_prob:
            self.generate_hybrid_env(self.n_envs,
                                     self.specific_env_len * self.n_envs)
            time.sleep(0.1)

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        #
        # with open(Hexapod.MODELPATH + "limited.xml", "r") as in_file:
        #     buf = in_file.readlines()
        #
        # with open(Hexapod.MODELPATH + self.ID + ".xml", "w") as out_file:
        #     for line in buf:
        #         if line.startswith('    <position joint='):
        #             out_file.write('    <hfield name="hill" file="{}.png" size="{} 1.2 {} 0.1" /> \n '.format(self.ID, self.env_scaling * self.n_envs, 0.6 * height_SF))
        #         else:
        #             out_file.write(line)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)
        self.model.opt.timestep = 0.003

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 22 + self.use_contacts * 6
        self.act_dim = self.sim.data.actuator_length.shape[0]

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.zeros(self.q_dim, dtype=np.float32)
        init_q[7:] = self.scale_action([0] * 18)
        init_q[0] = 0.2  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.3
        init_qvel = np.zeros(self.qvel_dim)

        # TODO: Change initial q

        if init_pos is not None:
            init_q[0:3] += init_pos

        self.vel_sum = 0

        # Init_quat
        if self.rnd_init_yaw:
            self.rnd_yaw = np.random.rand() * 1. - 0.5
        else:
            self.rnd_yaw = 0

        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        self.prev_deviation = np.min(
            (abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0)))
        self.prev_y_deviation = 0

        # Set environment state
        self.set_state(init_q, init_qvel)

        ctr = 0
        while True:
            ctr += 1
            self.sim.forward()
            self.sim.step()
            #self.render()
            #time.sleep(0.01)

            if np.max(self.sim.get_state().qvel.tolist()
                      [0:7]) < 0.15 and ctr > 10:
                break

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs
示例#12
0
    def reset(self, init_pos=None):

        if np.random.rand() < self.env_change_prob:
            self.generate_hybrid_env(self.n_envs,
                                     self.specific_env_len * self.n_envs)
            time.sleep(0.1)

        self.viewer = None
        path = Hexapod.MODELPATH + "{}.xml".format(self.ID)

        while True:
            try:
                self.model = mujoco_py.load_model_from_path(path)
                break
            except Exception:
                pass

        self.sim = mujoco_py.MjSim(self.model)
        self.model.opt.timestep = 0.01

        # Environment dimensions
        self.q_dim = self.sim.get_state().qpos.shape[0]
        self.qvel_dim = self.sim.get_state().qvel.shape[0]

        self.obs_dim = 22 + self.use_contacts * 6
        self.act_dim = self.sim.data.actuator_length.shape[0]

        # Reset env variables
        self.step_ctr = 0
        self.episodes = 0

        # Sample initial configuration
        init_q = np.concatenate((np.zeros(7), np.tile(
            [0, -0.5, 0.5],
            6)))  #np.random.randn(self.q_dim).astype(np.float32) * 0.0
        init_q[0] = 0.2  # np.random.rand() * 4 - 4
        init_q[1] = 0.0  # np.random.rand() * 8 - 4
        init_q[2] = 0.2
        init_qvel = np.zeros(self.qvel_dim)

        if init_pos is not None:
            init_q[0:3] += init_pos

        self.vel_sum = 0

        # Init_quat
        if self.rnd_init_yaw:
            self.rnd_yaw = np.random.rand() * 1. - 0.5
        else:
            self.rnd_yaw = 0

        rnd_quat = my_utils.rpy_to_quat(0, 0, self.rnd_yaw)
        init_q[3:7] = rnd_quat

        self.prev_deviation = np.min(
            (abs((self.rnd_yaw % 6.183) - (0 % 6.183)), abs(self.rnd_yaw - 0)))
        self.prev_y_deviation = 0

        # Set environment state
        self.set_state(init_q, init_qvel)

        while True:
            self.sim.forward()
            self.sim.step()
            if np.max(self.sim.get_state().qvel.tolist()[0:7]) < 0.1:
                break

        obs, _, _, _ = self.step(np.zeros(self.act_dim))

        return obs