def test_viewer(): model = load_model_from_path("mujoco_py/tests/test.xml") sim = MjSim(model) viewer = MjViewer(sim) for _ in range(100): sim.step() viewer.render()
def test_ignore_mujoco_warnings(): # Two boxes on a plane need more than 1 contact (nconmax) xml = ''' <mujoco> <size nconmax="1"/> <worldbody> <geom type="plane" size="1 1 0.1"/> <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body> <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body> </worldbody> </mujoco> ''' model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() with ignore_mujoco_warnings(): # This should raise an exception due to the mujoco warning callback, # but it's suppressed by the context manager. sim.step() sim.reset() with pytest.raises(Exception): # test to make sure previous warning callback restored. sim.step()
def test_mj_sim_basics(): model = load_model_from_xml(BASIC_MODEL_XML) sim = MjSim(model, nsubsteps=2) sim.reset() sim.step() sim.reset() sim.forward()
def test_mj_warning_raises(): ''' Test that MuJoCo warnings cause exceptions. ''' # Two boxes on a plane need more than 1 contact (nconmax) xml = ''' <mujoco> <size nconmax="1"/> <worldbody> <geom type="plane" size="1 1 0.1"/> <body pos="1 0 1"> <joint type="free"/> <geom size="1"/> </body> <body pos="0 1 1"> <joint type="free"/> <geom size="1"/> </body> </worldbody> </mujoco> ''' model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() with pytest.raises(Exception): # This should raise an exception due to the mujoco warning callback sim.step()
def test_xvelr(): # xvelr = rotational velocity in world frame xml = """ <mujoco> <worldbody> <body name="body1" pos="0 0 0"> <joint name="a" axis="1 0 0" pos="0 0 0" type="hinge"/> <geom name="geom1" pos="0 0 0" size="0.3"/> <body name="body2" pos="0 0 1"> <joint name="b" axis="1 0 0" pos="0 0 0" type="hinge"/> <geom name="geom2" pos="0 0 0" size="0.3"/> <site name="site1" size="0.1"/> </body> </body> </worldbody> <actuator> <motor joint="a"/> <motor joint="b"/> </actuator> </mujoco> """ model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() sim.forward() # Check that xvelr starts out at zero (since qvel is zero) site1_xvelr = sim.data.get_site_xvelr('site1') np.testing.assert_allclose(site1_xvelr, np.zeros(3)) # Push the base body and step forward to get it moving sim.data.ctrl[0] = 1e9 sim.step() sim.forward() # Check that the first body has nonzero xvelr body1_xvelr = sim.data.get_body_xvelr('body1') assert not np.allclose(body1_xvelr, np.zeros(3)) # Check that the second body has zero xvelr (still) body2_xvelr = sim.data.get_body_xvelr('body2') np.testing.assert_allclose(body2_xvelr, np.zeros(3)) # Check that this matches the batch (gathered) getter property np.testing.assert_allclose(body2_xvelr, sim.data.body_xvelr[2])
</body> </worldbody> <actuator> <position joint="j" kp="2000"/> </actuator> </mujoco> """ fn = ''' #define SQUARE(a) (a * a) void fun(const mjModel* m, mjData* d) { for (int i = d->ne; i < d->nefc; i++) { pos_sum += SQUARE(d->efc_pos[i]); } } ''' sim = MjSim(load_model_from_xml(MODEL_XML), nsubsteps=50, substep_callback=fn, userdata_names=['pos_sum']) t = 0 while t < 10: t += 1 sim.data.ctrl[0] = .2 print('t', t) sim.step() # verify that there are no contacts visible between steps assert sim.data.ncon == 0, 'No contacts should be detected here' # verify that contacts (and penetrations) are visible to substep_callback if t > 1: assert sim.data.get_userdata('pos_sum') > 0 # detected collision
<joint axis="1 0 0" name="cylinder:slidex" type="slide"/> <joint axis="0 1 0" name="cylinder:slidey" type="slide"/> </body> <body name="box" pos="-0.8 0 0.2"> <geom mass="0.1" size="0.15 0.15 0.15" type="box"/> </body> <body name="floor" pos="0 0 0.025"> <geom condim="3" size="1.0 1.0 0.02" rgba="0 1 0 1" type="box"/> </body> </worldbody> <actuator> <motor gear="2000.0" joint="slide0"/> <motor gear="2000.0" joint="slide1"/> </actuator> </mujoco> """ model = load_model_from_xml(MODEL_XML) sim = MjSim(model) viewer = MjViewer(sim) t = 0 while True: data = sim.data sim.data.ctrl[0] = math.cos(t / 10.) * 0.01 sim.data.ctrl[1] = math.sin(t / 10.) * 0.01 t += 1 sim.step() viewer.render() if t > 100 and os.getenv('TESTING') is not None: break
class Environment(): def __init__(self, model_name, goal_space_train, goal_space_test, project_state_to_end_goal, end_goal_thresholds, initial_state_space, subgoal_bounds, project_state_to_subgoal, subgoal_thresholds, max_actions=1200, num_frames_skip=10, show=False): self.name = model_name # Create Mujoco Simulation self.model = load_model_from_path("./mujoco_files/" + model_name) self.sim = MjSim(self.model) # Set dimensions and ranges of states, actions, and goals in order to configure actor/critic networks if model_name == "pendulum.xml": self.state_dim = 2 * len(self.sim.data.qpos) + len( self.sim.data.qvel) elif model_name == "reacher.xml": self.state_dim = len(self.sim.data.qpos) + len( self.sim.data.qvel) + 2 # fingertip x,y else: self.state_dim = len(self.sim.data.qpos) + len( self.sim.data.qvel ) # State will include (i) joint angles and (ii) joint velocities self.action_dim = len( self.sim.model.actuator_ctrlrange) # low-level action dim self.action_bounds = self.sim.model.actuator_ctrlrange[:, 1] # low-level action bounds self.action_offset = np.zeros((len( self.action_bounds))) # Assumes symmetric low-level action ranges self.end_goal_dim = len(goal_space_test) self.subgoal_dim = len(subgoal_bounds) self.subgoal_bounds = subgoal_bounds # Projection functions self.project_state_to_end_goal = project_state_to_end_goal self.project_state_to_subgoal = project_state_to_subgoal # Convert subgoal bounds to symmetric bounds and offset. Need these to properly configure subgoal actor networks self.subgoal_bounds_symmetric = np.zeros((len(self.subgoal_bounds))) self.subgoal_bounds_offset = np.zeros((len(self.subgoal_bounds))) for i in range(len(self.subgoal_bounds)): self.subgoal_bounds_symmetric[i] = (self.subgoal_bounds[i][1] - self.subgoal_bounds[i][0]) / 2 self.subgoal_bounds_offset[i] = self.subgoal_bounds[i][ 1] - self.subgoal_bounds_symmetric[i] # End goal/subgoal thresholds self.end_goal_thresholds = end_goal_thresholds self.subgoal_thresholds = subgoal_thresholds # Set inital state and goal state spaces self.initial_state_space = initial_state_space self.goal_space_train = goal_space_train self.goal_space_test = goal_space_test self.subgoal_colors = [ "Magenta", "Green", "Red", "Blue", "Cyan", "Orange", "Maroon", "Gray", "White", "Black" ] self.max_actions = max_actions # Implement visualization if necessary self.visualize = show # Visualization boolean if self.visualize: self.viewer = MjViewer(self.sim) self.num_frames_skip = num_frames_skip # Get state, which concatenates joint positions and velocities def get_state(self): if self.name == "pendulum.xml": return np.concatenate([ np.cos(self.sim.data.qpos), np.sin(self.sim.data.qpos), self.sim.data.qvel ]) elif self.name == "reacher.xml": return np.concatenate([ self.sim.data.qpos, self.sim.data.qvel, np.reshape( np.array(self.sim.data.get_body_xpos("fingertip")[:2]), self.sim.data.qpos.shape) ]) else: return np.concatenate((self.sim.data.qpos, self.sim.data.qvel)) # Reset simulation to state within initial state specified by user def reset_sim(self): # Reset joint positions and velocities for i in range(len(self.sim.data.qpos)): self.sim.data.qpos[i] = np.random.uniform( self.initial_state_space[i][0], self.initial_state_space[i][1]) for i in range(len(self.sim.data.qvel)): self.sim.data.qvel[i] = np.random.uniform( self.initial_state_space[len(self.sim.data.qpos) + i][0], self.initial_state_space[len(self.sim.data.qpos) + i][1]) self.sim.step() # Return state return self.get_state() # Execute low-level action for number of frames specified by num_frames_skip def execute_action(self, action): self.sim.data.ctrl[:] = action for _ in range(self.num_frames_skip): self.sim.step() if self.visualize: self.viewer.render() return self.get_state() # Visualize end goal. This function may need to be adjusted for new environments. def display_end_goal(self, end_goal): # Goal can be visualized by changing the location of the relevant site object. if self.name == "pendulum.xml": self.sim.data.mocap_pos[0] = np.array([ 0.5 * np.sin(end_goal[0]), 0, 0.5 * np.cos(end_goal[0]) + 0.6 ]) elif self.name == "ur5.xml": theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nEnd Goal Joint Pos: ") print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ for i in range(3): self.sim.data.mocap_pos[i] = joint_pos[i] elif self.name == "reacher.xml": x, y = end_goal self.sim.data.mocap_pos[0] = np.array([x, y, .01]) else: assert False, "Provide display end goal function in environment.py file" # Function returns an end goal def get_next_goal(self, test): end_goal = np.zeros((len(self.goal_space_test))) if self.name == "ur5.xml": goal_possible = False while not goal_possible: end_goal = np.zeros(shape=(self.end_goal_dim, )) end_goal[0] = np.random.uniform(self.goal_space_test[0][0], self.goal_space_test[0][1]) end_goal[1] = np.random.uniform(self.goal_space_test[1][0], self.goal_space_test[1][1]) end_goal[2] = np.random.uniform(self.goal_space_test[2][0], self.goal_space_test[2][1]) # Next need to ensure chosen joint angles result in achievable task (i.e., desired end effector position is above ground) theta_1 = end_goal[0] theta_2 = end_goal[1] theta_3 = end_goal[2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] # Make sure wrist 1 pos is above ground so can actually be reached if np.absolute(end_goal[0]) > np.pi / 4 and forearm_pos[ 2] > 0.05 and wrist_1_pos[2] > 0.15: goal_possible = True elif not test and self.goal_space_train is not None: for i in range(len(self.goal_space_train)): end_goal[i] = np.random.uniform(self.goal_space_train[i][0], self.goal_space_train[i][1]) else: assert self.goal_space_test is not None, "Need goal space for testing. Set goal_space_test variable in \"design_env.py\" file" for i in range(len(self.goal_space_test)): end_goal[i] = np.random.uniform(self.goal_space_test[i][0], self.goal_space_test[i][1]) # Visualize End Goal self.display_end_goal(end_goal) return end_goal # Visualize all subgoals def display_subgoals(self, subgoals): # Display up to 10 subgoals and end goal if len(subgoals) <= 11: subgoal_ind = 0 else: subgoal_ind = len(subgoals) - 11 for i in range(1, min(len(subgoals), 11)): if self.name == "pendulum.xml": self.sim.data.mocap_pos[i] = np.array([ 0.5 * np.sin(subgoals[subgoal_ind][0]), 0, 0.5 * np.cos(subgoals[subgoal_ind][0]) + 0.6 ]) # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1 elif self.name == "ur5.xml": theta_1 = subgoals[subgoal_ind][0] theta_2 = subgoals[subgoal_ind][1] theta_3 = subgoals[subgoal_ind][2] # shoulder_pos_1 = np.array([0,0,0,1]) upper_arm_pos_2 = np.array([0, 0.13585, 0, 1]) forearm_pos_3 = np.array([0.425, 0, 0, 1]) wrist_1_pos_4 = np.array([0.39225, -0.1197, 0, 1]) # Transformation matrix from shoulder to base reference frame T_1_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0.089159], [0, 0, 0, 1]]) # Transformation matrix from upper arm to shoulder reference frame T_2_1 = np.array([[np.cos(theta_1), -np.sin(theta_1), 0, 0], [np.sin(theta_1), np.cos(theta_1), 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]) # Transformation matrix from forearm to upper arm reference frame T_3_2 = np.array([[np.cos(theta_2), 0, np.sin(theta_2), 0], [0, 1, 0, 0.13585], [-np.sin(theta_2), 0, np.cos(theta_2), 0], [0, 0, 0, 1]]) # Transformation matrix from wrist 1 to forearm reference frame T_4_3 = np.array([[np.cos(theta_3), 0, np.sin(theta_3), 0.425], [0, 1, 0, 0], [-np.sin(theta_3), 0, np.cos(theta_3), 0], [0, 0, 0, 1]]) # Determine joint position relative to original reference frame # shoulder_pos = T_1_0.dot(shoulder_pos_1) upper_arm_pos = T_1_0.dot(T_2_1).dot(upper_arm_pos_2)[:3] forearm_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot( forearm_pos_3)[:3] wrist_1_pos = T_1_0.dot(T_2_1).dot(T_3_2).dot(T_4_3).dot( wrist_1_pos_4)[:3] joint_pos = [upper_arm_pos, forearm_pos, wrist_1_pos] """ print("\nSubgoal %d Joint Pos: " % i) print("Upper Arm Pos: ", joint_pos[0]) print("Forearm Pos: ", joint_pos[1]) print("Wrist Pos: ", joint_pos[2]) """ # Designate site position for upper arm, forearm and wrist for j in range(3): self.sim.data.mocap_pos[3 + 3 * (i - 1) + j] = np.copy( joint_pos[j]) self.sim.model.site_rgba[3 + 3 * (i - 1) + j][3] = 1 # print("\nLayer %d Predicted Pos: " % i, wrist_1_pos[:3]) subgoal_ind += 1 else: return # Visualize desired gripper position, which is elements 18-21 in subgoal vector self.sim.data.mocap_pos[i] = subgoals[subgoal_ind] # Visualize subgoal self.sim.model.site_rgba[i][3] = 1 subgoal_ind += 1
class Square2dEnv(Env): # TODO make this into GoalEnv def __init__(self, model_path='./square2d.xml', distance_threshold=1e-1, frame_skip=2, goal=[0.3, 0.3], horizon=100): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.model = load_model_from_path(fullpath) self.seed() self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.distance_threshold = distance_threshold self.frame_skip = frame_skip self.set_goal_location(goal) self.reward_type = 'dense' self.horizon = horizon self.time_step = 0 obs = self.get_current_observation() self.action_space = spaces.Box(-1., 1., shape=(2, )) self.observation_space = spaces.Box(-np.inf, np.inf, shape=obs.shape) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def reset(self): self.set_ball_location([0., 0.]) self.sim.forward() self.time_step = 0 return self.get_current_observation() def _get_viewer(self): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer_setup() return self.viewer def viewer_setup(self): self.viewer.cam.lookat[ 0] = 0.0 # x,y,z offset from the object (works if trackbodyid=-1) self.viewer.cam.lookat[1] = 0.0 self.viewer.cam.lookat[2] = 0.0 self.viewer.cam.elevation = -90 # camera rotation around the axis in the plane going through the frame origin (if 0 you just see a line) self.viewer.cam.azimuth = 90 self.viewer.cam.distance = 1.5 def set_goal_location(self, goalPos): # goal = [xLoc, yLoc] assert np.linalg.norm(np.asarray(goalPos) - np.asarray([0.3, 0.3]), axis=-1) < 0.1 self.sim.data.qpos[0] = goalPos[0] self.sim.data.qpos[1] = goalPos[1] def set_ball_location(self, ballPos): self.sim.data.qpos[2] = ballPos[0] self.sim.data.qpos[3] = ballPos[1] def set_distance_threshold(self, distance_threshold): self.distance_threshold = distance_threshold def set_frame_skip(self, frame_skip): self.frame_skip = frame_skip def get_frame_skip(self): return self.frame_skip def get_distance_threshold(self): return self.distance_threshold def get_ball_location(self): return self.sim.data.qpos[2:4] def get_goal_location(self): return self.sim.data.qpos[0:2] def get_ball_velocity(self): return self.sim.data.qvel[2:4] def send_control_command(self, xDirectionControl, yDirectionControl): self.sim.data.ctrl[0] = xDirectionControl self.sim.data.ctrl[1] = yDirectionControl def get_current_observation(self): obs = np.concatenate([ self.get_goal_location(), self.get_ball_location(), self.get_ball_velocity() ]).ravel() return obs.copy() # obs = np.concatenate([self.get_ball_location(), self.get_ball_velocity()]).ravel() # desired_goal = self.get_goal_location() # achieved_goal = self.get_ball_location() # return { # 'observation': obs.copy(), # 'achieved_goal': achieved_goal.copy(), # 'desired_goal': desired_goal.copy() # # } def get_image_of_goal_observation(self, xLoc=None, yLoc=None): if not xLoc: xLoc = self.sim.data.qpos[0] if not yLoc: yLoc = self.sim.data.qpos[1] self.sim.data.qpos[0] = xLoc self.sim.data.qpos[1] = yLoc self.sim.data.qpos[2] = xLoc self.sim.data.qpos[3] = yLoc self.render() def do_simulation(self, ctrl, n_frames): self.send_control_command(ctrl[0], ctrl[1]) for _ in range(n_frames): self.take_step() def step(self, ctrl): if np.linalg.norm(self.get_goal_location() - [0.3, 0.3], axis=-1) > 0.1: print(self.get_goal_location()) # assert False ctrl = np.clip(ctrl, -1., 1.) self.do_simulation(ctrl, self.frame_skip) obs = self.get_current_observation() info = {} reward = self.compute_reward(self.get_ball_location(), self.get_goal_location(), {}) done = (reward == 1.0) self.time_step += 1 if self.time_step >= self.horizon: done = True return obs, reward, done, info def take_step(self): self.sim.step() def render(self, mode='human'): self._get_viewer().render() def compute_reward(self, achieved_goal, desired_goal, info): # Compute distance between goal and the achieved goal. d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) if self.reward_type == 'sparse': return -(d > self.distance_threshold).astype(np.float32) else: return -d def _is_success(self, achieved_goal, desired_goal): d = np.linalg.norm(achieved_goal - desired_goal, axis=-1) return (d < self.distance_threshold).astype(np.float32) def log_diagnostics(self, paths): pass def terminate(self): pass
class MujocoEnv(metaclass=EnvMeta): """ Initializes a Mujoco Environment. Args: has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_camera (str): Name of camera to render if `has_renderer` is True. Setting this value to 'None' will result in the default angle being applied, which is useful as it can be dragged / panned by the user using the mouse render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every simulated second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). hard_reset (bool): If True, re-loads model, sim, and render object upon a reset call, else, only calls sim.reset and resets all robosuite-internal variables Raises: ValueError: [Invalid renderer selection] """ def __init__(self, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True): # First, verify that both the on- and off-screen renderers are not being used simultaneously if has_renderer is True and has_offscreen_renderer is True: raise ValueError( "the onscreen and offscreen renderers cannot be used simultaneously." ) # Rendering-specific attributes self.has_renderer = has_renderer self.has_offscreen_renderer = has_offscreen_renderer self.render_camera = render_camera self.render_collision_mesh = render_collision_mesh self.render_visual_mesh = render_visual_mesh self.viewer = None # Simulation-specific attributes self.control_freq = control_freq self.horizon = horizon self.ignore_done = ignore_done self.hard_reset = hard_reset self.model = None self.cur_time = None self.model_timestep = None self.control_timestep = None self.deterministic_reset = False # Whether to add randomized resetting of objects / robot joints # Load the model self._load_model() # Initialize the simulation self._initialize_sim() # Run all further internal (re-)initialization required self._reset_internal() def initialize_time(self, control_freq): """ Initializes the time constants used for simulation. Args: control_freq (float): Hz rate to run control loop at within the simulation """ self.cur_time = 0 self.model_timestep = self.sim.model.opt.timestep if self.model_timestep <= 0: raise XMLError("xml model defined non-positive time step") self.control_freq = control_freq if control_freq <= 0: raise SimulationError( "control frequency {} is invalid".format(control_freq)) self.control_timestep = 1. / control_freq def _load_model(self): """Loads an xml model, puts it in self.model""" pass def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ pass def _initialize_sim(self, xml_string=None): """ Creates a MjSim object and stores it in self.sim. If @xml_string is specified, the MjSim object will be created from the specified xml_string. Else, it will pull from self.model to instantiate the simulation Args: xml_string (str): If specified, creates MjSim object from this filepath """ # if we have an xml string, use that to create the sim. Otherwise, use the local model self.mjpy_model = load_model_from_xml( xml_string) if xml_string else self.model.get_model( mode="mujoco_py") # Create the simulation instance and run a single step to make sure changes have propagated through sim state self.sim = MjSim(self.mjpy_model) self.sim.step() # Setup sim time based on control frequency self.initialize_time(self.control_freq) def reset(self): """ Resets simulation. Returns: OrderedDict: Environment observation space after reset occurs """ # TODO(yukez): investigate black screen of death # Use hard reset if requested if self.hard_reset and not self.deterministic_reset: self._destroy_viewer() self._load_model() self._initialize_sim() # Else, we only reset the sim internally else: self.sim.reset() # Reset necessary robosuite-centric variables self._reset_internal() self.sim.forward() return self._get_observation() def _reset_internal(self): """Resets simulation internal configurations.""" # create visualization screen or renderer if self.has_renderer and self.viewer is None: self.viewer = MujocoPyRenderer(self.sim) self.viewer.viewer.vopt.geomgroup[0] = ( 1 if self.render_collision_mesh else 0) self.viewer.viewer.vopt.geomgroup[1] = ( 1 if self.render_visual_mesh else 0) # hiding the overlay speeds up rendering significantly self.viewer.viewer._hide_overlay = True # make sure mujoco-py doesn't block rendering frames # (see https://github.com/StanfordVL/robosuite/issues/39) self.viewer.viewer._render_every_frame = True # Set the camera angle for viewing if self.render_camera is not None: self.viewer.set_camera(camera_id=self.sim.model.camera_name2id( self.render_camera)) elif self.has_offscreen_renderer: if self.sim._render_context_offscreen is None: render_context = MjRenderContextOffscreen(self.sim) self.sim.add_render_context(render_context) self.sim._render_context_offscreen.vopt.geomgroup[0] = ( 1 if self.render_collision_mesh else 0) self.sim._render_context_offscreen.vopt.geomgroup[1] = ( 1 if self.render_visual_mesh else 0) # additional housekeeping self.sim_state_initial = self.sim.get_state() self._get_reference() self.cur_time = 0 self.timestep = 0 self.done = False def _get_observation(self): """ Grabs observations from the environment. Returns: OrderedDict: OrderedDict containing observations [(name_string, np.array), ...] """ return OrderedDict() def step(self, action): """ Takes a step in simulation with control command @action. Args: action (np.array): Action to execute within the environment Returns: 4-tuple: - (OrderedDict) observations from the environment - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) misc information Raises: ValueError: [Steps past episode termination] """ if self.done: raise ValueError("executing action in terminated episode") self.timestep += 1 # Since the env.step frequency is slower than the mjsim timestep frequency, the internal controller will output # multiple torque commands in between new high level action commands. Therefore, we need to denote via # 'policy_step' whether the current step we're taking is simply an internal update of the controller, # or an actual policy update policy_step = True # Loop through the simulation at the model timestep rate until we're ready to take the next policy step # (as defined by the control frequency specified at the environment level) for i in range(int(self.control_timestep / self.model_timestep)): self.sim.forward() self._pre_action(action, policy_step) self.sim.step() policy_step = False # Note: this is done all at once to avoid floating point inaccuracies self.cur_time += self.control_timestep reward, done, info = self._post_action(action) return self._get_observation(), reward, done, info def _pre_action(self, action, policy_step=False): """ Do any preprocessing before taking an action. Args: action (np.array): Action to execute within the environment policy_step (bool): Whether this current loop is an actual policy step or internal sim update step """ self.sim.data.ctrl[:] = action def _post_action(self, action): """ Do any housekeeping after taking an action. Args: action (np.array): Action to execute within the environment Returns: 3-tuple: - (float) reward from the environment - (bool) whether the current episode is completed or not - (dict) empty dict to be filled with information by subclassed method """ reward = self.reward(action) # done if number of elapsed timesteps is greater than horizon self.done = (self.timestep >= self.horizon) and not self.ignore_done return reward, self.done, {} def reward(self, action): """ Reward should be a function of state and action Args: action (np.array): Action to execute within the environment Returns: float: Reward from environment """ raise NotImplementedError def render(self): """ Renders to an on-screen window. """ self.viewer.render() def observation_spec(self): """ Returns an observation as observation specification. An alternative design is to return an OrderedDict where the keys are the observation names and the values are the shapes of observations. We leave this alternative implementation commented out, as we find the current design is easier to use in practice. Returns: OrderedDict: Observations from the environment """ observation = self._get_observation() return observation @property def action_spec(self): """ Action specification should be implemented in subclasses. Action space is represented by a tuple of (low, high), which are two numpy vectors that specify the min/max action limits per dimension. """ raise NotImplementedError @property def action_dim(self): """ Size of the action space Returns: int: Action space dimension """ raise NotImplementedError def reset_from_xml_string(self, xml_string): """ Reloads the environment from an XML description of the environment. Args: xml_string (str): Filepath to the xml file that will be loaded directly into the sim """ # if there is an active viewer window, destroy it self.close() # Since we are reloading from an xml_string, we are deterministically resetting self.deterministic_reset = True # initialize sim from xml self._initialize_sim(xml_string=xml_string) # Now reset as normal self.reset() # Turn off deterministic reset self.deterministic_reset = False def find_contacts(self, geoms_1, geoms_2): """ Finds contact between two geom groups. Args: geoms_1 (list of str): a list of geom names geoms_2 (list of str): another list of geom names Returns: generator: iterator of all contacts between @geoms_1 and @geoms_2 """ for contact in self.sim.data.contact[0:self.sim.data.ncon]: # check contact geom in geoms c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1 c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 # check contact geom in geoms (flipped) c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1 c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1): yield contact def _check_success(self): """ Checks if the task has been completed. Should be implemented by subclasses Returns: bool: True if the task has been completed """ raise NotImplementedError def _destroy_viewer(self): """ Destroys the current mujoco renderer instance if it exists """ # if there is an active viewer window, destroy it if self.viewer is not None: self.viewer.close() # change this to viewer.finish()? self.viewer = None def close(self): """Do any cleanup necessary here.""" self._destroy_viewer()
def test_jacobians(): xml = """ <mujoco> <worldbody> <body name="body1" pos="0 0 0"> <joint axis="1 0 0" name="a" pos="0 0 0" type="hinge"/> <geom name="geom1" pos="0 0 0" size="1.0"/> <body name="body2" pos="0 0 1"> <joint name="b" axis="1 0 0" pos="0 0 1" type="hinge"/> <geom name="geom2" pos="1 1 1" size="0.5"/> <site name="target" size="0.1"/> </body> </body> </worldbody> <actuator> <motor joint="a"/> <motor joint="b"/> </actuator> </mujoco> """ model = load_model_from_xml(xml) sim = MjSim(model) sim.reset() # After reset jacobians are all zeros target_jacp = np.zeros(3 * sim.model.nv) sim.data.get_site_jacp('target', jacp=target_jacp) np.testing.assert_allclose(target_jacp, np.zeros(3 * sim.model.nv)) # After first forward, jacobians are real sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) target_test = np.array([0, 0, -1, 1, 0, 0]) np.testing.assert_allclose(target_jacp, target_test) # Should be unchanged after steps (zero action) for _ in range(2): sim.step() sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) assert np.linalg.norm(target_jacp - target_test) < 1e-3 # Apply a very large action, ensure jacobian unchanged after step sim.reset() sim.forward() sim.data.ctrl[:] = np.ones(sim.model.nu) * 1e9 sim.step() sim.data.get_site_jacp('target', jacp=target_jacp) np.testing.assert_allclose(target_jacp, target_test) # After large action, ensure jacobian changed after forward sim.forward() sim.data.get_site_jacp('target', jacp=target_jacp) assert not np.allclose(target_jacp, target_test) # Test the `site_jacp` property, which gets all at once np.testing.assert_allclose(target_jacp, sim.data.site_jacp[0]) # Test not passing in array sim.reset() sim.forward() target_jacp = sim.data.get_site_jacp('target') np.testing.assert_allclose(target_jacp, target_test) # Test passing in bad array (long instead of double) target_jacp = np.zeros(3 * sim.model.nv, dtype=np.long) with pytest.raises(ValueError): sim.data.get_site_jacp('target', jacp=target_jacp) # Test rotation jacobian - like above but 'jacr' instead of 'jacp' # After reset jacobians are all zeros sim.reset() target_jacr = np.zeros(3 * sim.model.nv) sim.data.get_site_jacr('target', jacr=target_jacr) np.testing.assert_allclose(target_jacr, np.zeros(3 * sim.model.nv)) # After first forward, jacobians are real sim.forward() sim.data.get_site_jacr('target', jacr=target_jacr) target_test = np.array([1, 1, 0, 0, 0, 0]) # Test allocating dedicated array target_jacr = sim.data.get_site_jacr('target') np.testing.assert_allclose(target_jacr, target_test) # Test the batch getter (all sites at once) np.testing.assert_allclose(target_jacr, sim.data.site_jacr[0]) # Test passing in bad array target_jacr = np.zeros(3 * sim.model.nv, dtype=np.long) with pytest.raises(ValueError): sim.data.get_site_jacr('target', jacr=target_jacr)
class Aether: """ Deus do espaço e do paraíso Faz a conexão entre o simulador e os módulos do programa """ def __init__(self): # DEFINIÇÕES self.field_width = 640 self.field_height = 480 self.cascadeTime = 0 self.cascadeLoops = 1 self.cascadeLastTime = 0 # PREPARAÇÃO model = load_model_from_path("simulator/scene.xml") self.sim = MjSim(model) self.viewer = Viewer(self.sim, self) self.ball_joint = self.sim.model.get_joint_qpos_addr("Ball")[0] self.robot_joints = [ self.sim.model.get_joint_qpos_addr("Robot_01")[0], self.sim.model.get_joint_qpos_addr("Robot_02")[0], self.sim.model.get_joint_qpos_addr("Robot_03")[0], self.sim.model.get_joint_qpos_addr("Robot_04")[0], self.sim.model.get_joint_qpos_addr("Robot_05")[0], self.sim.model.get_joint_qpos_addr("Robot_06")[0] ] # EXECUÇÃO # prepara os módulos self.enabled = [False] * 6 # [False, False, False, True, True, True] self.athena = [Athena(), Athena()] self.zeus = [Zeus(), Zeus()] Endless.setup(self.field_width, self.field_height) self.athena[0].setup(3, 0.8) self.athena[1].setup(3, 0.8) self.zeus[0].setup(3) self.zeus[1].setup(3) # inicializa o loop dos dados self.pause = False self.loopThread1 = threading.Thread(target=self.loopTeam, args=[0]) self.loopThread2 = threading.Thread(target=self.loopTeam, args=[1]) self.loopThread1.daemon = True self.loopThread2.daemon = True self.loopThread1.start() self.loopThread2.start() self.showInfos(0) self.showInfos(1) def run(self): while True: self.sim.step() self.viewer.render() def loopTeam(self, team): while True: time.sleep(0.0000000001) if self.pause or \ (not self.enabled[3 * team] and not self.enabled[3 * team + 1] and not self.enabled[3 * team + 2]): continue # executa nossos módulos positions = self.generatePositions(team) commands = self.athena[team].getTargets(positions) velocities = self.zeus[team].getVelocities(commands) # aplica resultados na simulação if self.enabled[0 + 3 * team]: self.sim.data.ctrl[0 + 6 * team] = self.convertVelocity( velocities[0]["vLeft"]) self.sim.data.ctrl[1 + 6 * team] = self.convertVelocity( velocities[0]["vRight"]) if self.enabled[1 + 3 * team]: self.sim.data.ctrl[2 + 6 * team] = self.convertVelocity( velocities[1]["vLeft"]) self.sim.data.ctrl[3 + 6 * team] = self.convertVelocity( velocities[1]["vRight"]) if self.enabled[2 + 3 * team]: self.sim.data.ctrl[4 + 6 * team] = self.convertVelocity( velocities[2]["vLeft"]) self.sim.data.ctrl[5 + 6 * team] = self.convertVelocity( velocities[2]["vRight"]) # mostra resultados self.showInfos(team, positions, commands) # indicadores 3D for i in range(3): position = positions[0][i]["position"] self.setObjectPose("indicator_" + str(i + 3 * team + 1), position, team, 0.2, velocities[i]["vector"]) # self.setObjectPose("virtual_robot_1", velocities["virtualPos"], team=0) # HELPERS def showInfos(self, team, positions=None, commands=None): infos = [] for i in range(3): if self.enabled[i + 3 * team] and positions and commands: # informações que todos os robôs tem robot = "X: " + "{:.1f}".format(positions[0][i]["position"][0]) robot += ", Y: " + "{:.1f}".format( positions[0][i]["position"][1]) robot += ", O: " + "{:.1f}".format( positions[0][i]["orientation"]) robot += ", T: " + commands[i]["tactics"] robot += ", C: " + commands[i]["command"] if commands[i]["command"] == "lookAt": if type(commands[i]["data"]["target"]) is tuple: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"][0]) + ", " robot += "{:.1f}".format( commands[i]["data"]["target"][1]) + ")" else: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]) + ")" elif commands[i]["command"] == "goTo": robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]["position"][0]) + ", " robot += "{:.1f}".format( commands[i]["data"]["target"]["position"][1]) + ", " if type(commands[i]["data"]["target"] ["orientation"]) is tuple: robot += "(" + "{:.1f}".format( commands[i]["data"]["target"]["orientation"] [0]) + ", " robot += "{:.1f}".format(commands[i]["data"]["target"] ["orientation"][1]) + ") )" else: robot += "{:.1f}".format( commands[i]["data"]["target"]["orientation"]) + ")" elif commands[i]["command"] == "spin": robot += "(" + commands[i]["data"]["direction"] + ")" else: robot = "[OFF]" infos.append(robot) self.viewer.infos["robots" + str(team + 1)] = infos if team == 0: if positions: self.viewer.infos["ball"] = "X: " + "{:.2f}".format(positions[2]["position"][0]) + ", Y: " + \ "{:.2f}".format(positions[2]["position"][1]) fps = self.getFPS() if fps: self.viewer.infos["fps"] = fps if commands: # indicadores 3D # print(commands[0]["futureBall"]) self.setObjectPose("virtual_ball", commands[0]["futureBall"], 0, 0.022) for i in range(3): if commands[i]["command"] == "goTo": target = commands[i]["data"]["target"]["position"] targetOrientation = commands[i]["data"]["target"][ "orientation"] if type(targetOrientation) is tuple: position = positions[0][i]["position"] targetOrientation = math.atan2( position[1] - targetOrientation[1], -(position[0] - targetOrientation[0])) self.setObjectPose("target_" + str(i + 1), target, 0, 0.01, targetOrientation) def getFPS(self): # calcula o fps e manda pra interface self.cascadeTime += time.time() - self.cascadeLastTime self.cascadeLoops += 1 self.cascadeLastTime = time.time() if self.cascadeTime > 1: fps = self.cascadeLoops / self.cascadeTime self.cascadeTime = self.cascadeLoops = 0 return "{:.2f}".format(fps) return None # FUNÇÕES def reset(self): for i in range(6): self.enabled[i] = False self.showInfos(0) self.showInfos(1) self.sim.reset() def startStop(self, pause): self.pause = pause def moveBall(self, direction, keepVel=False): if not keepVel: for i in range(6): self.sim.data.qvel[self.ball_joint + i] = 0 if direction == 0: # UP self.sim.data.qpos[self.ball_joint + 1] += 0.01 elif direction == 1: # DOWN self.sim.data.qpos[self.ball_joint + 1] -= 0.01 elif direction == 2: # LEFT self.sim.data.qpos[self.ball_joint] -= 0.01 elif direction == 3: # RIGHT self.sim.data.qpos[self.ball_joint] += 0.01 def toggleRobot(self, robotId, moveOut=False): if self.sim.data.qpos[self.robot_joints[robotId] + 1] >= 1: self.enabled[robotId] = False if moveOut: self.sim.data.qpos[self.robot_joints[robotId] + 1] = 0 elif moveOut: self.enabled[robotId] = False self.sim.data.qpos[ self.robot_joints[robotId]] = -0.62 + 0.25 * robotId self.sim.data.qpos[self.robot_joints[robotId] + 1] = 1.5 self.sim.data.qpos[self.robot_joints[robotId] + 2] = 0.04 self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0 elif self.enabled[robotId]: self.enabled[robotId] = False self.sim.data.ctrl[robotId] = self.sim.data.ctrl[robotId + 1] = 0 else: self.enabled[robotId] = True self.showInfos(0 if robotId < 3 else 1) def convertPositionX(self, coord, team): """Traz o valor pra positivo e multiplica pela proporção (largura máxima)/(posição x máxima) Args: coord: Coordenada da posição no mundo da simulação a ser convertida team: Time que está pedindo a conversão (0 ou 1) Returns: Coordenada da posição na proporção utilizada pela estratégia """ if team == 0: return (coord + 0.8083874182591296) * self.field_width / 1.6167748365182593 else: return -(coord - 0.8083874182591296 ) * self.field_width / 1.6167748365182593 def convertPositionY(self, coord, team): """Traz o valor pra positivo e multiplica pela proporção (altura máxima)/(posição y máxima) Args: coord: Coordenada da posição no mundo da simulação a ser convertida team: Time que está pedindo a conversão (0 ou 1) Returns: Coordenada da posição na proporção utilizada pela estratégia """ if team == 0: return (coord + 0.58339083) * self.field_height / 1.16678166 else: return -(coord - 0.58339083) * self.field_height / 1.16678166 @staticmethod def convertVelocity(vel): return vel * 30 def setObjectPose(self, objectName, newPos, team=0, height=0.04, newOrientation=0): """Seta a posição e orientação de um objeto no simulador Args: objectName: Nome do objeto a ter a pose alterada. Esse nome deve ser de um mocap configurado na cena. Se o objeto for virtual_robot_i, o robô é amarelo se i <= 3, azul caso contrário newPos: (x, y), 'x' e 'y' valores em pixels team: índice do time (valor em pixels inverte de acordo com o time) height: altura do objeto no universo newOrientation: orientação Z em radianos do objeto """ if team == 0: x = (newPos[0] / self.field_width) * 1.6167748365182593 - 0.8083874182591296 y = (newPos[1] / self.field_height) * 1.16678166 - 0.58339083 else: x = -(newPos[0] / self.field_width) * 1.6167748365182593 + 0.8083874182591296 y = -(newPos[1] / self.field_height) * 1.16678166 + 0.58339083 # conversão de eulerAngles para quaternions (wikipedia) newQuat = [ math.sin(newOrientation / 2), 0, 0, math.cos(newOrientation / 2) ] self.sim.data.set_mocap_quat(objectName, newQuat) self.sim.data.set_mocap_pos(objectName, np.array([x, y, height])) def generatePositions(self, team): """Cria o vetor de posições no formato esperado pela estratégia O 'sim.data.qpos' possui, em cada posição, o seguinte: 0: pos X 1: pos Y 2: pos Z 3: quat component w 4: quat component x 5: quat component y 6: quat component z Returns: Vetor de posições no formato correto """ r1 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 3] * self.sim.data.qpos[self.robot_joints[3 * team] + 6] + self.sim.data.qpos[self.robot_joints[3 * team] + 4] * self.sim.data.qpos[self.robot_joints[3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[3 * team] + 5] * self.sim.data.qpos[self.robot_joints[3 * team] + 5] + self.sim.data.qpos[self.robot_joints[3 * team] + 6] * self.sim.data.qpos[self.robot_joints[3 * team] + 6])) r2 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 3] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] + self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 4] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 5] + self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6] * self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 6])) r3 = math.pi * team - math.atan2( 2 * (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 3] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] + self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 4] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6]), 1 - 2 * (self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 5] + self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6] * self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 6])) return [ [ # robôs aliados { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[3 * team] + 1], team)), "orientation": r1, "robotLetter": "A" }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[1 + 3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[1 + 3 * team] + 1], team)), "orientation": r2, "robotLetter": "B" }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[2 + 3 * team]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[2 + 3 * team] + 1], team)), "orientation": r3, "robotLetter": "C" } ], [ # robôs adversários { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(3 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[(3 + 3 * team) % 6] + 1], team)), }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(4 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[ (4 + 3 * team) % 6] + 1], team)), }, { "position": (self.convertPositionX( self.sim.data.qpos[self.robot_joints[(5 + 3 * team) % 6]], team), self.convertPositionY( self.sim.data.qpos[self.robot_joints[ (5 + 3 * team) % 6] + 1], team)), } ], { # bola "position": (self.convertPositionX(self.sim.data.qpos[self.ball_joint], team), self.convertPositionY(self.sim.data.qpos[self.ball_joint + 1], team)) } ]
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not os.path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.viewer = None self.buffer_size = (1600, 1280) self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 60, } self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if ( type(observation) is tuple) else observation.size bounds = self.model.actuator_ctrlrange.copy() low, high = bounds[:, 0], bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf * np.ones(self.obs_dim) self.observation_space = spaces.Box(-high, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ------------------------------------------------------------------------ def reset_model(self): """Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def viewer_setup(self): """Called when the viewer is initialized and after every reset. Optionally implement this method, if you need to tinker with camera position and so forth. """ pass # ------------------------------------------------------------------------ def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq, ) assert qvel.shape == (self.model.nv, ) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() def _render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer = None return if mode == 'rgb_array': self.viewer_setup() return _read_pixels(self.sim, *self.buffer_size) elif mode == 'human': self._get_viewer().render() def _get_viewer(self, mode='human'): if self.viewer is None and mode == 'human': self.viewer = MjViewer(self.sim) self.viewer_setup() return self.viewer def state_vector(self): state = self.sim.get_state() return np.concatenate([state.qpos.flat, state.qvel.flat])
class MujocoEnv(metaclass=EnvMeta): """Initializes a Mujoco Environment.""" parameters_spec = {} def __init__( self, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, use_camera_obs=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every simulated second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). use_camera_obs (bool): if True, every observation includes a rendered image. camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.has_renderer = has_renderer self.has_offscreen_renderer = has_offscreen_renderer self.render_collision_mesh = render_collision_mesh self.render_visual_mesh = render_visual_mesh self.control_freq = control_freq self.horizon = horizon self.ignore_done = ignore_done self.viewer = None self.model = None # settings for camera observations self.use_camera_obs = use_camera_obs if self.use_camera_obs and not self.has_offscreen_renderer: raise ValueError( "Camera observations require an offscreen renderer.") self.camera_name = camera_name if self.use_camera_obs and self.camera_name is None: raise ValueError("Must specify camera name when using camera obs") self.camera_height = camera_height self.camera_width = camera_width self.camera_depth = camera_depth self._reset_internal() def initialize_time(self, control_freq): """ Initializes the time constants used for simulation. """ self.cur_time = 0 self.model_timestep = self.sim.model.opt.timestep if self.model_timestep <= 0: raise XMLError("xml model defined non-positive time step") self.control_freq = control_freq if control_freq <= 0: raise SimulationError( "control frequency {} is invalid".format(control_freq)) self.control_timestep = 1. / control_freq def _load_model(self): """Loads an xml model, puts it in self.model""" pass def _get_reference(self): """ Sets up references to important components. A reference is typically an index or a list of indices that point to the corresponding elements in a flatten array, which is how MuJoCo stores physical simulation data. """ pass def reset(self, **kwargs): """Resets simulation.""" # TODO(yukez): investigate black screen of death # if there is an active viewer window, destroy it self._destroy_viewer() self.reset_props(**kwargs) self._reset_internal() self.sim.forward() return self._get_observation() def reset_props(self): print( 'INFO from GZZ: this is the base class reset_props. This means the environment does not support domain randomization' ) def init_viewer(self): print('init_viewer', self.viewer) if self.has_offscreen_renderer: print( 'WARNING from GZZ: robosuite has a bug on simutaneously rendering for both offscreen (like camera obs) and onscreen (X window), and will give (at least) onscreen `black screen of death` after a reset. Please check their issue: https://github.com/StanfordVL/robosuite/issues/25 . I failed to help them debug on this.' ) self.viewer = MujocoPyRenderer(self.sim) self.viewer.viewer.vopt.geomgroup[0] = (1 if self.render_collision_mesh else 0) self.viewer.viewer.vopt.geomgroup[ 1] = 1 if self.render_visual_mesh else 0 # hiding the overlay speeds up rendering significantly self.viewer.viewer._hide_overlay = True def _reset_internal(self): """Resets simulation internal configurations.""" # instantiate simulation from MJCF model self._load_model() self.mjpy_model = self.model.get_model(mode="mujoco_py") self.sim = MjSim(self.mjpy_model) self.initialize_time(self.control_freq) # create visualization screen or renderer if self.has_renderer and self.viewer is None: self.init_viewer() elif self.has_offscreen_renderer: if self.sim._render_context_offscreen is None: render_context = MjRenderContextOffscreen(self.sim) self.sim.add_render_context(render_context) self.sim._render_context_offscreen.vopt.geomgroup[0] = ( 1 if self.render_collision_mesh else 0) self.sim._render_context_offscreen.vopt.geomgroup[1] = ( 1 if self.render_visual_mesh else 0) # additional housekeeping self.sim_state_initial = self.sim.get_state() self._get_reference() self.cur_time = 0 self.timestep = 0 self.done = False def _get_observation(self): """Returns an OrderedDict containing observations [(name_string, np.array), ...].""" return OrderedDict() def step(self, action): """Takes a step in simulation with control command @action.""" if self.done: raise ValueError("executing action in terminated episode") self.timestep += 1 self._pre_action(action) end_time = self.cur_time + self.control_timestep while self.cur_time < end_time: self.sim.step() self.cur_time += self.model_timestep reward, done, info = self._post_action(action) return self._get_observation(), reward, done, info def _pre_action(self, action): """Do any preprocessing before taking an action.""" self.sim.data.ctrl[:] = action def _post_action(self, action): """Do any housekeeping after taking an action.""" reward = self.reward(action) # done if number of elapsed timesteps is greater than horizon self.done = (self.timestep >= self.horizon) and not self.ignore_done return reward, self.done, {} def reward(self, action): """Reward should be a function of state and action.""" return 0 def render(self): """ Renders to an on-screen window. """ if self.viewer is None: self.has_renderer = True self.init_viewer() self.viewer.render() def observation_spec(self): """ Returns an observation as observation specification. An alternative design is to return an OrderedDict where the keys are the observation names and the values are the shapes of observations. We leave this alternative implementation commented out, as we find the current design is easier to use in practice. """ observation = self._get_observation() return observation # observation_spec = OrderedDict() # for k, v in observation.items(): # observation_spec[k] = v.shape # return observation_spec def action_spec(self): """ Action specification should be implemented in subclasses. Action space is represented by a tuple of (low, high), which are two numpy vectors that specify the min/max action limits per dimension. """ raise NotImplementedError def reset_from_xml_string(self, xml_string): """Reloads the environment from an XML description of the environment.""" # if there is an active viewer window, destroy it self.close() # load model from xml self.mjpy_model = load_model_from_xml(xml_string) self.sim = MjSim(self.mjpy_model) self.initialize_time(self.control_freq) if self.has_renderer and self.viewer is None: self.init_viewer() elif self.has_offscreen_renderer: render_context = MjRenderContextOffscreen(self.sim) render_context.vopt.geomgroup[ 0] = 1 if self.render_collision_mesh else 0 render_context.vopt.geomgroup[ 1] = 1 if self.render_visual_mesh else 0 self.sim.add_render_context(render_context) self.sim_state_initial = self.sim.get_state() self._get_reference() self.cur_time = 0 self.timestep = 0 self.done = False # necessary to refresh MjData self.sim.forward() def find_contacts(self, geoms_1, geoms_2): """ Finds contact between two geom groups. Args: geoms_1: a list of geom names (string) geoms_2: another list of geom names (string) Returns: iterator of all contacts between @geoms_1 and @geoms_2 """ for contact in self.sim.data.contact[0:self.sim.data.ncon]: # check contact geom in geoms c1_in_g1 = self.sim.model.geom_id2name(contact.geom1) in geoms_1 c2_in_g2 = self.sim.model.geom_id2name(contact.geom2) in geoms_2 # check contact geom in geoms (flipped) c2_in_g1 = self.sim.model.geom_id2name(contact.geom2) in geoms_1 c1_in_g2 = self.sim.model.geom_id2name(contact.geom1) in geoms_2 if (c1_in_g1 and c2_in_g2) or (c1_in_g2 and c2_in_g1): yield contact def _check_contact(self): """Returns True if gripper is in contact with an object.""" return False def _check_success(self): """ Returns True if task has been completed. """ return False def _destroy_viewer(self): # if there is an active viewer window, destroy it if self.viewer is not None: print('_destroy_viewer', self.viewer) self.viewer.close() # change this to viewer.finish()? self.viewer = None def close(self): """Do any cleanup necessary here.""" self._destroy_viewer()
def test_inverse_kinematics(): panda_kinematics = ikpy_panda_kinematics.panda_kinematics() model = load_model_from_path("robot.xml") sim = MjSim(model) viewer = MjViewer(sim) timestep = generatePatternedTrajectories.TIMESTEP control_freq = 1/timestep total_time = 3 num_cycles = int(total_time * control_freq) qd_init = np.zeros(7) plt.ion() LW = 1.0 fig = plt.figure(figsize=(4,15)) axes = [] lines = [] goals = [] min_vals = {'x': -0.5, 'y': -0.5, 'z': 0.2} max_vals = {'x': 0.5, 'y': 0.5, 'z': 0.7} ylabels = ["x(m)", "y(m)", "z(m)"] ylbounds = [min_vals['x'], min_vals['y'], min_vals['z']] yubounds = [max_vals['x'], max_vals['y'], max_vals['z']] for i in range(3): axes.append(fig.add_subplot(3,1,i+1)) lines.append(axes[i].plot([],[],'b-', lw=LW)[0]) goals.append(axes[i].plot([],[],'r-', lw=LW)[0]) axes[i].set_ylim([ylbounds[i], yubounds[i]]) axes[i].set_xlim([0,total_time]) axes[i].set_ylabel(ylabels[i], fontsize=8) axes[i].set_xlabel("Time(s)", fontsize=8) for test in range(5): x_target = np.random.rand() * (max_vals['x'] - min_vals['x']) + min_vals['x'] y_target = np.random.rand() * (max_vals['y'] - min_vals['y']) + min_vals['y'] z_target = np.random.rand() * (max_vals['z'] - min_vals['z']) + min_vals['z'] if\ min_vals['x'] > x_target or max_vals['x'] < x_target or \ min_vals['y'] > y_target or max_vals['y'] < y_target or \ min_vals['z'] > z_target or max_vals['z'] < z_target: print("Error! 'xpos' out of range!") print("x = %.3f\ny = %.3f\nz = %.3f" % (x_target, y_target, z_target)) print(max_vals['y'] - min_vals['y']) return # x_target, y_target, z_target = 0.088, -0.0, 0.926 # roll = np.random.rand() * np.pi - np.pi/2 # pitch = np.random.rand() * np.pi - np.pi/2 # yaw = np.random.rand() * np.pi - np.pi/2 roll = 0 pitch = 0 yaw = np.pi ee_goal = [x_target, y_target, z_target, roll, pitch, yaw] # temp = bounds.getRandPosInBounds() # ee_goal = panda_kinematics.forward_kinematics(temp) q_init = bounds.getRandPosInBounds() q_goal = panda_kinematics.inverse_kinematics(translation=ee_goal[0:3], rpy=ee_goal[3:6], init_qpos=q_init) for g in range(3): goals[g].set_ydata(np.ones(num_cycles) * ee_goal[g]) goals[g].set_xdata(np.linspace(0,3,num_cycles)) sim.set_state(MjSimState(time=0,qpos=q_init,qvel=qd_init,act=None,udd_state={})) sim.step() sim_time = 0 for i in range(num_cycles): state = sim.get_state() q = state[1] qd = state[2] sim.data.ctrl[:] = controllers.PDControl(q=q,qd=qd,qgoal=q_goal) sim.step() viewer.render() if i % 70 == 0: xpos = panda_kinematics.forward_kinematics(q) for a in range(3): lines[a].set_xdata(np.append(lines[a].get_xdata(), sim_time)) lines[a].set_ydata(np.append(lines[a].get_ydata(), xpos[a])) fig.canvas.draw() fig.canvas.flush_events() print("[q\t]:{}".format(np.around(q,3))) print("[qgoal\t]:{}".format(np.around(q_goal,3))) print("[qgoal2\t]:{}".format(np.around(panda_kinematics.inverse_kinematics(ee_goal[0:3], ee_goal[3:6]),3))) print("[EE\t]:{}".format(np.around(panda_kinematics.forward_kinematics(q),3))) print("[EEgoal\t]:{}".format(np.around(ee_goal,3))) sim_time += timestep # panda_kinematics.plot_stick_figure(q) for i in range(3): lines[i].set_xdata([]) lines[i].set_ydata([]) time.sleep(1)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1, help='random seed (default: 1)') parser.add_argument('--robot_file', default='../../xml/simrobot/7dof/robot_mocap.xml', type=str, help='path to robot config') parser.add_argument('--save_dir', default='../../xml/gen_xmls/simrobot/reacher', type=str, help='path to save initial joint poses') print('Program starts at: \033[92m %s \033[0m' % datetime.now().strftime("%Y-%m-%d %H:%M")) args = parser.parse_args() np.random.seed(args.seed) filename = 'poses.json' robot_file = args.robot_file save_dir = args.save_dir ini_bds = np.array([[0.3, 0.6], [-0.2, 0.2], [0.5, 0.7]]) tgt_bds = np.array([[0.3, 0.6], [-0.3, 0.3], [-0.1, 0.3]]) initial_num = [4, 5, 3] target_num = [4, 7, 5] target_state = np.mgrid[tgt_bds[0][0]:tgt_bds[0][1]:complex(target_num[0]), tgt_bds[1][0]:tgt_bds[1][1]:complex(target_num[1]), tgt_bds[2][0]:tgt_bds[2][1]:complex(target_num[2])] target_state = target_state.reshape(3, -1).T initial_state = np.mgrid[ ini_bds[0][0]:ini_bds[0][1]:complex(initial_num[0]), ini_bds[1][0]:ini_bds[1][1]:complex(initial_num[1]), ini_bds[2][0]:ini_bds[2][1]:complex(initial_num[2])] initial_state = initial_state.reshape(3, -1).T np.random.shuffle(target_state) np.random.shuffle(initial_state) assert os.path.exists(robot_file) model = load_model_from_path(robot_file) sim = MjSim(model, nsubsteps=40) sim.reset() sim.step() site_xpos_cur = sim.data.site_xpos[0] print('site xpos:', site_xpos_cur) init_joint_angles = [] goal_poses = [] for idx in tqdm(range(initial_state.shape[0])): sim.reset() sim.step() site_xpos_target = initial_state[idx] delta = site_xpos_target - site_xpos_cur sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta for i in range(30): sim.step() dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target) if dist < 0.002: joint_angle = sim.data.qpos[:7] init_joint_angles.append(joint_angle.tolist()) break for idx in tqdm(range(target_state.shape[0])): sim.reset() sim.step() site_xpos_target = target_state[idx] delta = site_xpos_target - site_xpos_cur sim.data.mocap_pos[0] = sim.data.mocap_pos[0] + delta for i in range(30): sim.step() dist = np.linalg.norm(sim.data.site_xpos[0] - site_xpos_target) if dist < 0.002: goal_poses.append(site_xpos_target.tolist()) break print('valid initial pose num: ', len(init_joint_angles)) print('valid goal pose num: ', len(goal_poses)) data = {'initial_joint_angles': init_joint_angles, 'ee_goal': goal_poses} os.makedirs(save_dir, exist_ok=True) save_file = os.path.join(save_dir, filename) print('saving to: ', save_file) with open(save_file, 'w') as f: json.dump(data, f, indent=2) print('Program ends at: \033[92m %s \033[0m' % datetime.now().strftime("%Y-%m-%d %H:%M"))
def test_mj_sim_buffers(): model = load_model_from_xml(BASIC_MODEL_XML) # test no callback sim = MjSim(model, nsubsteps=2) assert (sim.udd_state == {}) sim.step() assert (sim.udd_state == {}) # test with callback foo = 10 d = {"foo": foo, "foo_2": np.array([foo, foo])} def udd_callback(sim): return d sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback) assert (sim.udd_state is not None) assert (sim.udd_state["foo"] == foo) assert (sim.udd_state["foo_2"].shape[0] == 2) assert (sim.udd_state["foo_2"][0] == foo) foo = 11 d = {"foo": foo, "foo_2": np.array([foo, foo])} sim.step() assert (sim.udd_state is not None) assert (sim.udd_state["foo"] == foo) assert (sim.udd_state["foo_2"][0] == foo) d = {} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo]), "foo_3": foo} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step() d = {"foo": "haha", "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step()
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self._seed() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 1.0 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=640, height=480, mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
def train_POMDP(self): args = self.args ROOT_DIR = os.path.dirname(os.path.dirname( os.path.abspath(__file__))) # corl2019 PARENT_DIR = os.path.dirname(ROOT_DIR) # reserach # Create the output directory if it does not exist output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp', args.output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) # write args to file with open(os.path.join(output_dir, 'args.txt'), 'w+') as f: json.dump(args.__dict__, f, indent=2) f.close() # Create our policy net and a target net self.policy_net_1 = DRQN(args.ftdim, args.outdim).to(args.device) self.policy_net_2 = DRQN(args.ftdim, args.outdim).to(args.device) if args.position: self.tactile_net_1 = TactileNet(args.indim - 6, args.ftdim).to(args.device) self.tactile_net_2 = TactileNet(args.indim - 6, args.ftdim).to(args.device) elif args.force: self.tactile_net_1 = TactileNet(args.indim - 390, args.ftdim).to(args.device) self.tactile_net_2 = TactileNet(args.indim - 390, args.ftdim).to(args.device) else: self.tactile_net_1 = TactileNet(args.indim, args.ftdim).to(args.device) self.tactile_net_2 = TactileNet(args.indim, args.ftdim).to(args.device) # Set up the optimizer self.policy_optimizer_1 = optim.RMSprop(self.policy_net_1.parameters(), lr=args.lr) self.policy_optimizer_2 = optim.RMSprop(self.policy_net_2.parameters(), lr=args.lr) self.tactile_optimizer_1 = optim.RMSprop( self.tactile_net_1.parameters(), lr=args.lr) self.tactile_optimizer_2 = optim.RMSprop( self.tactile_net_2.parameters(), lr=args.lr) self.memory = RecurrentMemory(800) self.steps_done = 0 # Setup the state normalizer normalizer = Multimodal_Normalizer(num_inputs=args.indim, device=args.device) print_variables = {'durations': [], 'rewards': [], 'loss': []} start_episode = 0 if args.weight_policy: if os.path.exists(args.weight_policy): checkpoint = torch.load(args.weight_policy) self.policy_net_1.load_state_dict(checkpoint['policy_net_1']) self.policy_net_2.load_state_dict(checkpoint['policy_net_2']) self.policy_optimizer_1.load_state_dict( checkpoint['policy_optimizer_1']) self.policy_optimizer_2.load_state_dict( checkpoint['policy_optimizer_2']) start_episode = checkpoint['epochs'] self.steps_done = checkpoint['steps_done'] with open( os.path.join(os.path.dirname(args.weight_policy), 'results_pomdp.pkl'), 'rb') as file: plot_dict = pickle.load(file) print_variables['durations'] = plot_dict['durations'] print_variables['rewards'] = plot_dict['rewards'] if args.normalizer_file: if os.path.exists(args.normalizer_file): normalizer.restore_state(args.normalizer_file) if args.memory: if os.path.exists(args.memory): self.memory.load(args.memory) if args.weight_tactile: checkpoint = torch.load(args.weight_tactile) self.tactile_net_1.load_state_dict(checkpoint['tactile_net_1']) self.tactile_optimizer_1.load_state_dict( checkpoint['tactile_optimizer_1']) self.tactile_net_2.load_state_dict(checkpoint['tactile_net_2']) self.tactile_optimizer_2.load_state_dict( checkpoint['tactile_optimizer_2']) action_space = ActionSpace(dp=0.06, df=10) # Create robot, reset simulation and grasp handle model = load_model_from_path(args.model_path) sim = MjSim(model) sim_param = SimParameter(sim) sim.step() if args.render: viewer = MjViewer(sim) else: viewer = None robot = RobotSim(sim, viewer, sim_param, args.render, self.break_threshold) tactile_obs_space = TactileObs( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30 x 6 # Main training loop for ii in range(start_episode, args.epochs): self.steps_done += 1 start_time = time.time() act_sequence = [] act_length = [] velcro_params = init_model(robot.mj_sim) robot.reset_simulation() ret = robot.grasp_handle() if not ret: continue # Local memory for current episode localMemory = [] # Get current observation hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states( batch_size=1, device=args.device) hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states( batch_size=1, device=args.device) broken_so_far = 0 # pick a random action initially action = random.randrange(0, 5) current_state = None next_state = None t = 0 while t < args.max_iter: if not args.quiet and t == 0: print("Running training episode: {}".format(ii, t)) if args.position: multistep_obs = np.empty((0, args.indim - 6)) elif args.force: multistep_obs = np.empty((0, args.indim - 390)) else: multistep_obs = np.empty((0, args.indim)) prev_action = action for k in range(args.len_ub): # Observe tactile features and stack them tactile_obs = tactile_obs_space.get_state() normalizer.observe(tactile_obs) tactile_obs = normalizer.normalize(tactile_obs) if args.position: tactile_obs = tactile_obs[6:] elif args.force: tactile_obs = tactile_obs[:6] multistep_obs = np.vstack((multistep_obs, tactile_obs)) # current jpos current_pos = robot.get_gripper_jpos()[:3] # Perform action delta = action_space.get_action( self.ACTIONS[action])['delta'][:3] target_position = np.add(robot.get_gripper_jpos()[:3], np.array(delta)) target_pose = np.hstack( (target_position, robot.get_gripper_jpos()[3:])) robot.move_joint(target_pose, True, self.gripping_force, hap_sample=args.hap_sample) # Observe new state tactile_obs_space.update( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30x6 displacement = la.norm(robot.get_gripper_jpos()[:3] - current_pos) if displacement / 0.06 < 0.7: break # input stiched multi-step tactile observation into tactile-net to generate tactile feature action, hidden_state_1, cell_state_1 = self.select_action( multistep_obs, hidden_state_1, cell_state_1) if t == 0: next_state = multistep_obs.copy() else: current_state = next_state.copy() next_state = multistep_obs.copy() # record actions in this epoch act_sequence.append(prev_action) act_length.append(k) # Get reward done, num = robot.update_tendons() failure = robot.check_slippage() if num > broken_so_far: reward = num - broken_so_far broken_so_far = num else: if failure: reward = -20 else: reward = 0 t += k + 1 # Set max number of iterations if t >= self.max_iter: done = True if done or failure: next_state = None # Push new Transition into memory if t > k + 1: localMemory.append( Transition(current_state, prev_action, next_state, reward)) # Optimize the model if self.steps_done % 10 == 0: self.optimize() # If we are done, reset the model if done or failure: self.memory.push(localMemory) if failure: print_variables['durations'].append(self.max_iter) else: print_variables['durations'].append(t) print_variables['rewards'].append(broken_so_far) plot_variables(self.figure, print_variables, "Training POMDP") print("Model parameters: {}".format(velcro_params)) print( "{} of Actions in this epoch are: {} \n Action length are: {}" .format(len(act_sequence), act_sequence, act_length)) print("Epoch {} took {}s, total number broken: {}\n\n". format(ii, time.time() - start_time, broken_so_far)) break # Save checkpoints every vew iterations if ii % args.save_freq == 0: save_path = os.path.join(output_dir, 'policy_' + str(ii) + '.pth') torch.save( { 'epochs': ii, 'steps_done': self.steps_done, 'policy_net_1': self.policy_net_1.state_dict(), 'policy_net_2': self.policy_net_2.state_dict(), 'policy_optimizer_1': self.policy_optimizer_1.state_dict(), 'policy_optimizer_2': self.policy_optimizer_2.state_dict(), }, save_path) save_path = os.path.join(output_dir, 'tactile_' + str(ii) + '.pth') torch.save( { 'tactile_net_1': self.tactile_net_1.state_dict(), 'tactile_net_2': self.tactile_net_2.state_dict(), 'tactile_optimizer_1': self.tactile_optimizer_1.state_dict(), 'tactile_optimizer_2': self.tactile_optimizer_2.state_dict(), }, save_path) write_results(os.path.join(output_dir, 'results_pomdp.pkl'), print_variables) self.memory.save_memory( os.path.join(output_dir, 'memory.pickle')) if args.savefig_path: now = dt.datetime.now() self.figure[0].savefig( args.savefig_path + '{}_{}_{}.png'.format(now.month, now.day, now.hour), format='png') print('Training done') plt.show() return print_variables
class BaseEnv: def __init__( self, robot_folders, robot_dir, substeps, tol=0.02, train=True, with_kin=None, with_dyn=None, multi_goal=False, ): self.with_kin = with_kin self.with_dyn = with_dyn self.multi_goal = multi_goal self.goal_dim = 3 if self.with_dyn: norm_file = os.path.join(robot_dir, 'stats/dyn_stats.json') with open(norm_file, 'r') as f: stats = json.load(f) self.dyn_mu = np.array(stats['mu']).reshape(-1) self.dyn_sigma = np.array(stats['sigma']).reshape(-1) self.dyn_min = np.array(stats['min']).reshape(-1) self.dyn_max = np.array(stats['max']).reshape(-1) self.nsubsteps = substeps self.metadata = {} self.reward_range = (-np.inf, np.inf) self.spec = None self.dist_tol = tol self.viewer = None self.links = [ 'gl0', 'gl1_1', 'gl1_2', 'gl2', 'gl3_1', 'gl3_2', 'gl4', 'gl5_1', 'gl5_2', 'gl6' ] self.bodies = [ "connector_plate_base", "electric_gripper_base", "gripper_l_finger", "gripper_l_finger_tip", "gripper_r_finger", "gripper_r_finger_tip", "l0", "l1", "l2", "l3", "l4", "l5", "l6" ] self.site_set = ['j0', 'j1', 'j2', 'j3', 'j4', 'j5', 'j6'] self.joint_set = self.site_set self.robots = [] for folder in robot_folders: self.robots.append(os.path.join(robot_dir, folder)) self.dir2id = {folder: idx for idx, folder in enumerate(self.robots)} self.robot_num = len(self.robots) if train: self.test_robot_num = min(100, self.robot_num) self.train_robot_num = self.robot_num - self.test_robot_num self.test_robot_ids = list( range(self.train_robot_num, self.robot_num)) self.train_test_robot_num = min(100, self.train_robot_num) self.train_test_robot_ids = list(range(self.train_test_robot_num)) self.train_test_conditions = self.train_test_robot_num else: self.test_robot_num = self.robot_num self.train_robot_num = self.robot_num - self.test_robot_num self.test_robot_ids = list(range(self.robot_num)) self.test_conditions = self.test_robot_num print('Train robots: ', self.train_robot_num) print('Test robots: ', self.test_robot_num) print('Multi goal:', self.multi_goal) self.reset_robot(0) self.ob_dim = self.get_obs()[0].size print('Ob dim:', self.ob_dim) high = np.inf * np.ones(self.ob_dim) low = -high self.observation_space = spaces.Box(low, high, dtype=np.float32) self.ep_reward = 0 self.ep_len = 0 def reset(self, robot_id=None): raise NotImplementedError def step(self, action): raise NotImplementedError def update_action_space(self): actuators = self.sim.model._actuator_name2id.keys() valid_joints = [ac for ac in actuators if ac in self.joint_set] self.act_dim = len(valid_joints) bounds = self.sim.model.actuator_ctrlrange[:self.act_dim] self.ctrl_low = np.copy(bounds[:, 0]) self.ctrl_high = np.copy(bounds[:, 1]) self.action_space = spaces.Box(self.ctrl_low, self.ctrl_high, dtype=np.float32) def scale_action(self, action): act_k = (self.action_space.high - self.action_space.low) / 2. act_b = (self.action_space.high + self.action_space.low) / 2. return act_k * action + act_b def reset_robot(self, robot_id): self.robot_folder_id = self.dir2id[self.robots[robot_id]] robot_file = os.path.join(self.robots[robot_id], 'robot.xml') self.model = load_model_from_path(robot_file) self.sim = MjSim(self.model, nsubsteps=self.nsubsteps) self.update_action_space() self.sim.reset() self.sim.step() if self.viewer is not None: self.viewer = MjViewer(self.sim) def test_reset(self, cond): robot_id = self.test_robot_ids[cond] return self.reset(robot_id=robot_id) def train_test_reset(self, cond): robot_id = self.train_test_robot_ids[cond] return self.reset(robot_id=robot_id) def cal_reward(self, s, goal, a): dist = np.linalg.norm(s - goal) if dist < self.dist_tol: done = True reward_dist = 1 else: done = False reward_dist = -1 reward = reward_dist reward -= 0.1 * np.square(a).sum() return reward, dist, done def render(self, mode='human'): if self.viewer is None: self.viewer = MjViewer(self.sim) self.viewer.render() def get_obs(self): qpos = self.get_qpos(self.sim) qvel = self.get_qvel(self.sim) ob = np.concatenate([qpos, qvel]) if self.with_kin: link_rela = self.get_xpos_xrot(self.sim) ob = np.concatenate([ob, link_rela]) if self.with_dyn: body_mass = self.get_body_mass(self.sim) joint_friction = self.get_friction(self.sim) joint_damping = self.get_damping(self.sim) armature = self.get_armature(self.sim) dyn_vec = np.concatenate( (body_mass, joint_friction, joint_damping, armature)) dyn_vec = np.divide((dyn_vec - self.dyn_min), self.dyn_max - self.dyn_min) ob = np.concatenate([ob, dyn_vec]) target_pos = self.sim.data.get_site_xpos('target').flatten() ob = np.concatenate([ob, target_pos]) achieved_goal = self.sim.data.get_site_xpos('ref_pt') return ob, achieved_goal def get_link_length(self, sim): link_length = [] for link in self.links: geom_id = sim.model._geom_name2id[link] link_length.append(sim.model.geom_size[geom_id][1]) return np.asarray(link_length).reshape(-1) def get_qpos(self, sim): angle_noise_range = 0.02 qpos = sim.data.qpos[:self.act_dim] qpos += np.random.uniform(-angle_noise_range, angle_noise_range, self.act_dim) qpos = np.pad(qpos, (0, 7 - self.act_dim), mode='constant', constant_values=0) return qpos.reshape(-1) def get_qvel(self, sim): velocity_noise_range = 0.02 qvel = sim.data.qvel[:self.act_dim] qvel += np.random.uniform(-velocity_noise_range, velocity_noise_range, self.act_dim) qvel = np.pad(qvel, (0, 7 - self.act_dim), mode='constant', constant_values=0) return qvel.reshape(-1) def get_xpos_xrot(self, sim): xpos = [] xrot = [] for joint_id in range(self.act_dim): joint = sim.model._actuator_id2name[joint_id] if joint == 'j0': pos1 = sim.data.get_body_xpos('base_link') mat1 = sim.data.get_body_xmat('base_link') else: prev_id = joint_id - 1 prev_joint = sim.model._actuator_id2name[prev_id] pos1 = sim.data.get_site_xpos(prev_joint) mat1 = sim.data.get_site_xmat(prev_joint) pos2 = sim.data.get_site_xpos(joint) mat2 = sim.data.get_site_xmat(joint) relative_pos = pos2 - pos1 rot_euler = self.relative_rotation(mat1, mat2) xpos.append(relative_pos) xrot.append(rot_euler) xpos = np.array(xpos).flatten() xrot = np.array(xrot).flatten() xpos = np.pad(xpos, (0, (7 - self.act_dim) * 3), mode='constant', constant_values=0) xrot = np.pad(xrot, (0, (7 - self.act_dim) * 3), mode='constant', constant_values=0) ref_pt_xpos = self.sim.data.get_site_xpos('ref_pt') ref_pt_xmat = self.sim.data.get_site_xmat('ref_pt') relative_pos = ref_pt_xpos - pos2 rot_euler = self.relative_rotation(mat2, ref_pt_xmat) xpos = np.concatenate((xpos, relative_pos.flatten())) xrot = np.concatenate((xrot, rot_euler.flatten())) pos_rot = np.concatenate((xpos, xrot)) return pos_rot def get_damping(self, sim): damping = sim.model.dof_damping[:self.act_dim] damping = np.pad(damping, (0, 7 - self.act_dim), mode='constant', constant_values=0) return damping.reshape(-1) def get_friction(self, sim): friction = sim.model.dof_frictionloss[:self.act_dim] friction = np.pad(friction, (0, 7 - self.act_dim), mode='constant', constant_values=0) return friction.reshape(-1) def get_body_mass(self, sim): body_mass = [] for body in self.bodies: body_id = sim.model._body_name2id[body] body_mass.append(sim.model.body_mass[body_id]) return np.asarray(body_mass).reshape(-1) def get_armature(self, sim): armature = sim.model.dof_armature[:self.act_dim] armature = np.pad(armature, (0, 7 - self.act_dim), mode='constant', constant_values=0) return armature.reshape(-1) def relative_rotation(self, mat1, mat2): # return the euler x,y,z of the relative rotation # (w.r.t site1 coordinate system) from site2 to site1 rela_mat = np.dot(np.linalg.inv(mat1), mat2) return rotations.mat2euler(rela_mat) def close(self): pass
class MujocoEnv(gym.Env): """Superclass for all MuJoCo environments. """ def __init__(self, model_path, frame_skip): if model_path.startswith("/"): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip self.model = load_model_from_path(fullpath) self.sim = MjSim(self.model) self.data = self.sim.data self._seed() self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.mujoco_render_frames = False self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = spaces.Box(low, high) high = np.inf*np.ones(self.obs_dim) low = -high self.observation_space = spaces.Box(low, high) self.sim.forward() def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def mj_viewer_setup(self): """ Due to specifics of new mujoco rendering, the standard viewer cannot be used with this set-up. Instead we use this mujoco specific function. """ pass def viewer_setup(self): """ Does not work. Use mj_viewer_setup() instead """ pass # ----------------------------- def _reset(self): self.sim.reset() self.sim.forward() ob = self.reset_model() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) state = self.sim.get_state() for i in range(self.model.nq): state.qpos[i] = qpos[i] for i in range(self.model.nv): state.qvel[i] = qvel[i] self.sim.set_state(state) self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): for i in range(self.model.nu): self.sim.data.ctrl[i] = ctrl[i] for _ in range(n_frames): self.sim.step() if self.mujoco_render_frames is True: self.mj_render() def mj_render(self): try: self.viewer.render() except: self.mj_viewer_setup() self.viewer._run_speed = 0.5 #self.viewer._run_speed /= self.frame_skip self.viewer.render() def _get_viewer(self): return None def state_vector(self): state = self.sim.get_state() return np.concatenate([ state.qpos.flat, state.qvel.flat]) # ----------------------------- # def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): # self.mujoco_render_frames = True # for ep in range(num_episodes): # o = self.reset() # d = False # t = 0 # while t < horizon and d is False: # a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] # o, r, d, _ = self.step(a) # t = t+1 # self.mujoco_render_frames = False def visualize_policy(self, policy, horizon=1000, num_episodes=1, mode='exploration'): self.mujoco_render_frames = True for ep in range(num_episodes): o = self.reset() d = False t = 0 # horizon = 1 while t < horizon and d is False: if policy.type == 'object': obs_used = o[self.object_start:] a = policy.get_action(obs_used)[0] if mode == 'exploration' else policy.get_action(obs_used)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 # timer.sleep(1) self.mujoco_render_frames = False def visualize_policy_offscreen(self, policy, horizon=1000, num_episodes=1, frame_size=(640,480), mode='exploration', save_loc='/tmp/', filename='newvid', camera_name=None): import skvideo.io for ep in range(num_episodes): print("Episode %d: rendering offline " % ep, end='', flush=True) o = self.reset() d = False t = 0 arrs = [] t0 = timer.time() while t < horizon and d is False: a = policy.get_action(o)[0] if mode == 'exploration' else policy.get_action(o)[1]['evaluation'] o, r, d, _ = self.step(a) t = t+1 curr_frame = self.sim.render(width=frame_size[0], height=frame_size[1], mode='offscreen', camera_name=camera_name, device_id=0) arrs.append(curr_frame[::-1,:,:]) print(t, end=', ', flush=True) file_name = save_loc + filename + str(ep) + ".mp4" skvideo.io.vwrite( file_name, np.asarray(arrs)) print("saved", file_name) t1 = timer.time() print("time taken = %f"% (t1-t0))
class Dribble_Env(object): def __init__(self): self.model = load_model_from_path("./xml/world5.xml") self.sim = MjSim(self.model) # self.viewer = MyMjViewer(self.sim) self.viewer = MyMjViewer(self.sim) self.max_vel = [-1000, 1000] self.x_motor = 0 self.y_motor = 0 self.date_time = datetime.datetime.now() self.path = "./datas/path_date" + str( self.date_time.strftime("_%Y%m%d_%H%M%S")) os.mkdir(self.path) def step(self, action): self.x_motor = ((action % 3) - 1) * 200 self.y_motor = ((action // 3) - 1) * 200 self.sim.data.ctrl[0] = self.x_motor self.sim.data.ctrl[1] = self.y_motor self.sim.step() def get_state(self): robot_x, robot_y = self.sim.data.body_xpos[1][0:2] robot_xv, robot_yv = self.sim.data.qvel[0:2] ball_x, ball_y = self.sim.data.body_xpos[2][0:2] ball_xv, ball_yv = self.sim.data.qvel[2:4] ball_pos_local = -(robot_x - ball_x), -(robot_y - ball_y) object1_x, object1_y = self.sim.data.body_xpos[3][ 0] - robot_x, self.sim.data.body_xpos[3][1] - robot_y object2_x, object2_y = self.sim.data.body_xpos[4][ 0] - robot_x, self.sim.data.body_xpos[4][1] - robot_y object3_x, object3_y = self.sim.data.body_xpos[5][ 0] - robot_x, self.sim.data.body_xpos[5][1] - robot_y object4_x, object4_y = self.sim.data.body_xpos[6][ 0] - robot_x, self.sim.data.body_xpos[6][1] - robot_y return [robot_x, robot_y, ball_pos_local[0], ball_pos_local[1], \ robot_xv, robot_yv, object1_x, object1_y, object2_x, object2_y,\ object3_x, object3_y, object4_x, object4_y, ball_x, ball_y, ball_xv, ball_yv] def check_done(self): ball_x, ball_y = self.get_state()[14:16] if ball_x > 80 and -25 < ball_y < 25: return True else: return False def check_wall(self): ball_x, ball_y = self.get_state()[14:16] if math.fabs(ball_y) > 51: return True elif ball_x > 81 and math.fabs(ball_y) > 25: return True else: return False def check_avoidaince(self, object_num=4): for i in range(object_num): if math.fabs(self.sim.data.qvel[5 + i * 3]) > 0.1 or math.fabs( self.sim.data.qvel[6 + 3 * i]) > 0.1: return True return False def reset(self): self.x_motor = 0 self.y_motor = 0 self.robot_x_data = [] self.robot_y_data = [] self.ball_x_data = [] self.ball_y_data = [] self.sim.reset() # self.sim.data.qpos[0] = np.random.randint(-3,3) self.sim.data.qpos[1] = np.random.randint(-5, 5) def render(self): self.viewer.render() def plot_data(self, step, t, done, episode, flag, reward): self.field_x = [-90, -90, 90, 90, -90] self.field_y = [-60, 60, 60, -60, -60] self.robot_x_data.append(self.sim.data.body_xpos[1][0]) self.robot_y_data.append(self.sim.data.body_xpos[1][1]) self.ball_x_data.append(self.sim.data.body_xpos[2][0]) self.ball_y_data.append(self.sim.data.body_xpos[2][1]) datas = str(self.robot_x_data[-1]) + " " + str( self.robot_y_data[-1]) + " " + str( self.ball_x_data[-1]) + " " + str( self.ball_y_data[-1]) + " " + str(reward) with open( self.path + '/plotdata_' + str(episode + 1).zfill(4) + '.txt', 'a') as f: f.write(str(datas) + '\n') if (t >= step - 1 or done) and flag: fig1 = plt.figure() plt.ion() plt.show() plt.plot(self.ball_x_data, self.ball_y_data, marker='o', markersize=2, color="red", label="ball") plt.plot(self.robot_x_data, self.robot_y_data, marker="o", markersize=2, color='blue', label="robot") plt.plot(self.sim.data.body_xpos[3][0], self.sim.data.body_xpos[3][1], marker="o", markersize=8, color='black') plt.plot(self.sim.data.body_xpos[4][0], self.sim.data.body_xpos[4][1], marker="o", markersize=8, color='black') plt.plot(self.sim.data.body_xpos[5][0], self.sim.data.body_xpos[5][1], marker="o", markersize=8, color='black') plt.plot(self.sim.data.body_xpos[6][0], self.sim.data.body_xpos[6][1], marker="o", markersize=8, color='black') plt.plot(self.field_x, self.field_y, markersize=1, color="black") plt.plot(80, 0, marker="X", color="green", label="goal") plt.legend(loc="lower right") plt.draw() plt.pause(0.001) plt.close(1)
def train_POMDP(self): args = self.args ROOT_DIR = os.path.dirname(os.path.dirname( os.path.abspath(__file__))) # corl2019 PARENT_DIR = os.path.dirname(ROOT_DIR) # reserach # Create the output directory if it does not exist output_dir = os.path.join(PARENT_DIR, 'multistep_pomdp', args.output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) # write args to file with open(os.path.join(output_dir, 'args.txt'), 'w+') as f: json.dump(args.__dict__, f, indent=2) f.close() # Create our policy net and a target net self.policy_net_1 = DRQN(args.indim, args.outdim).to(args.device) self.policy_net_2 = DRQN(args.indim, args.outdim).to(args.device) self.policy_net_3 = DRQN(args.indim, args.outdim).to(args.device) # Set up the optimizer self.optimizer_1 = optim.RMSprop(self.policy_net_1.parameters()) self.optimizer_2 = optim.RMSprop(self.policy_net_2.parameters()) self.optimizer_3 = optim.RMSprop(self.policy_net_3.parameters()) self.memory = RecurrentMemory(800) self.steps_done = 0 # Setup the state normalizer normalizer = Multimodal_Normalizer(num_inputs=args.indim, device=args.device) print_variables = {'durations': [], 'rewards': [], 'loss': []} start_episode = 0 if args.checkpoint_file: if os.path.exists(args.checkpoint_file): checkpoint = torch.load(args.checkpoint_file) self.policy_net_1.load_state_dict(checkpoint['policy_net_1']) self.policy_net_2.load_state_dict(checkpoint['policy_net_2']) self.policy_net_3.load_state_dict(checkpoint['policy_net_3']) self.optimizer_1.load_state_dict(checkpoint['optimizer_1']) self.optimizer_2.load_state_dict(checkpoint['optimizer_2']) self.optimizer_3.load_state_dict(checkpoint['optimizer_3']) start_episode = checkpoint['epochs'] self.steps_done = checkpoint['steps_done'] with open( os.path.join(os.path.dirname(args.checkpoint_file), 'results_pomdp.pkl'), 'rb') as file: plot_dict = pickle.load(file) print_variables['durations'] = plot_dict['durations'] print_variables['rewards'] = plot_dict['rewards'] if args.normalizer_file: if os.path.exists(args.normalizer_file): normalizer.restore_state(args.normalizer_file) if args.memory: if os.path.exists(args.memory): self.memory.load(args.memory) action_space = ActionSpace(dp=0.06, df=10) # Create robot, reset simulation and grasp handle model = load_model_from_path(args.model_path) sim = MjSim(model) sim_param = SimParameter(sim) sim.step() if args.render: viewer = MjViewer(sim) else: viewer = None robot = RobotSim(sim, viewer, sim_param, args.render, self.break_threshold) # Main training loop for ii in range(start_episode, args.epochs): start_time = time.time() self.steps_done += 1 act_sequence = [] if args.sim: sim_params = init_model(robot.mj_sim) robot.reset_simulation() ret = robot.grasp_handle() if not ret: continue # Local memory for current episode localMemory = [] # Get current observation hidden_state_1, cell_state_1 = self.policy_net_1.init_hidden_states( batch_size=1, device=args.device) hidden_state_2, cell_state_2 = self.policy_net_2.init_hidden_states( batch_size=1, device=args.device) hidden_state_3, cell_state_3 = self.policy_net_3.init_hidden_states( batch_size=1, device=args.device) observation_space = TactileObs( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30 x 7 broken_so_far = 0 for t in count(): if not args.quiet and t % 50 == 0: print("Running training episode: {}, iteration: {}".format( ii, t)) # Select action observation = observation_space.get_state() if args.position: observation = observation[6:] if args.shear: indices = np.ones(len(observation), dtype=bool) indices[6:166] = False observation = observation[indices] if args.force: observation = observation[:166] normalizer.observe(observation) observation = normalizer.normalize(observation) action, hidden_state_1, cell_state_1 = self.select_action( observation, hidden_state_1, cell_state_1) # record actions in this epoch act_sequence.append(action) # Perform action delta = action_space.get_action( self.ACTIONS[action])['delta'][:3] target_position = np.add(robot.get_gripper_jpos()[:3], np.array(delta)) target_pose = np.hstack( (target_position, robot.get_gripper_jpos()[3:])) if args.sim: robot.move_joint(target_pose, True, self.gripping_force, hap_sample=args.hap_sample) # Get reward done, num = robot.update_tendons() failure = robot.check_slippage() if num > broken_so_far: reward = num - broken_so_far broken_so_far = num else: reward = 0 # # Add a movement reward # reward -= 0.05 * np.linalg.norm(target_position - robot.get_gripper_jpos()[:3]) / np.linalg.norm(delta) # Observe new state observation_space.update( robot.get_gripper_xpos(), # 24 robot.get_all_touch_buffer(args.hap_sample)) # 30x7 # Set max number of iterations if t >= self.max_iter: done = True # Check if done if not done and not failure: next_state = observation_space.get_state() if args.position: next_state = next_state[6:] if args.shear: indices = np.ones(len(next_state), dtype=bool) indices[6:166] = False next_state = next_state[indices] if args.force: next_state = next_state[:166] normalizer.observe(next_state) next_state = normalizer.normalize(next_state) else: next_state = None # Push new Transition into memory localMemory.append( Transition(observation, action, next_state, reward)) # Optimize the model if t % 10 == 0: loss = self.optimize_model() # if loss: # print_variables['loss'].append(loss.item()) # If we are done, reset the model if done or failure: self.memory.push(localMemory) if failure: print_variables['durations'].append(self.max_iter) else: print_variables['durations'].append(t) print_variables['rewards'].append(broken_so_far) plot_variables(self.figure, print_variables, "Training POMDP") print("Model parameters: {}".format(sim_params)) print("Actions in this epoch are: {}".format(act_sequence)) print("Epoch {} took {}s, total number broken: {}\n\n". format(ii, time.time() - start_time, broken_so_far)) break # Save checkpoints every vew iterations if ii % args.save_freq == 0: save_path = os.path.join( output_dir, 'checkpoint_model_' + str(ii) + '.pth') torch.save( { 'epochs': ii, 'steps_done': self.steps_done, 'policy_net_1': self.policy_net_1.state_dict(), 'policy_net_2': self.policy_net_2.state_dict(), 'policy_net_3': self.policy_net_3.state_dict(), 'optimizer_1': self.optimizer_1.state_dict(), 'optimizer_2': self.optimizer_2.state_dict(), 'optimizer_3': self.optimizer_3.state_dict(), }, save_path) self.memory.save_memory(os.path.join(output_dir, 'memory.pickle')) if args.savefig_path: now = dt.datetime.now() self.figure[0].savefig( args.savefig_path + '{}_{}_{}.png'.format(now.month, now.day, now.hour), format='png') print('Training done') plt.show() return print_variables
def test_mj_sim_buffers(): model = load_model_from_xml(BASIC_MODEL_XML) # test no callback sim = MjSim(model, nsubsteps=2) assert(sim.udd_state == {}) sim.step() assert(sim.udd_state == {}) # test with callback foo = 10 d = {"foo": foo, "foo_2": np.array([foo, foo])} def udd_callback(sim): return d sim = MjSim(model, nsubsteps=2, udd_callback=udd_callback) assert(sim.udd_state is not None) assert(sim.udd_state["foo"] == foo) assert(sim.udd_state["foo_2"].shape[0] == 2) assert(sim.udd_state["foo_2"][0] == foo) foo = 11 d = {"foo": foo, "foo_2": np.array([foo, foo])} sim.step() assert(sim.udd_state is not None) assert(sim.udd_state["foo"] == foo) assert(sim.udd_state["foo_2"][0] == foo) d = {} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo]), "foo_3": foo} with pytest.raises(AssertionError): sim.step() d = {"foo": foo, "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step() d = {"foo": "haha", "foo_2": np.array([foo, foo, foo])} with pytest.raises(AssertionError): sim.step()
class BlockPickAndPlaceEnv: def __init__(self, num_objects, num_colors, img_dim, include_z, random_initialize=False, view=False): self.asset_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), '../mujoco_data/stl/') self.img_dim = img_dim self.include_z = include_z self.polygons = ['cube', 'horizontal_rectangle', 'tetrahedron'][:1] self.num_colors = num_colors self.num_objects = num_objects self.view = view # Hyper-parameters self.internal_steps_per_step = 2000 self.drop_height = 5 self.pick_height = 0.59 self.bounds = { 'x_min': -2.5, 'x_max': 2.5, 'y_min': 1.0, 'y_max': 4.0, 'z_min': 0.05, 'z_max': 2.2 } self.TOLERANCE = 0.2 self.names = [] self.blocks = [] if random_initialize: self.reset() #### Env initialization functions def get_unique_name(self, polygon): i = 0 while '{}_{}'.format(polygon, i) in self.names: i += 1 name = '{}_{}'.format(polygon, i) self.names.append(name) return name def add_mesh(self, polygon, pos, quat, rgba): name = self.get_unique_name(polygon) self.blocks.append({ 'name': name, 'polygon': polygon, 'pos': np.array(pos), 'quat': np.array(quat), 'rgba': rgba, 'material': name }) def get_asset_material_str(self): asset_base = '<material name="{}" rgba="{}" specular="0" shininess="0" emission="0.25"/>' asset_list = [ asset_base.format(a['name'], self.convert_to_str(a['rgba'])) for a in self.blocks ] asset_str = '\n'.join(asset_list) return asset_str def get_asset_mesh_str(self): asset_base = '<mesh name="{}" scale="0.6 0.6 0.6" file="{}"/>' asset_list = [ asset_base.format( a['name'], os.path.join(self.asset_path, a['polygon'] + '.stl')) for a in self.blocks ] asset_str = '\n'.join(asset_list) return asset_str def get_body_str(self): body_base = ''' <body name='{}' pos='{}' quat='{}'> <joint type='free' name='{}'/> <geom name='{}' type='mesh' mesh='{}' pos='0 0 0' quat='1 0 0 0' material='{}' condim='3' friction='1 1 1' solimp="0.998 0.998 0.001" solref="0.02 1"/> </body> ''' body_list = [ body_base.format(m['name'], self.convert_to_str(m['pos']), self.convert_to_str(m['quat']), m['name'], m['name'], m['name'], m['material']) for i, m in enumerate(self.blocks) ] body_str = '\n'.join(body_list) return body_str def convert_to_str(self, an_iterable): tmp = "" for an_item in an_iterable: tmp += str(an_item) + " " return tmp[:-1] def get_random_pos(self, height=None): x = np.random.uniform(self.bounds['x_min'], self.bounds['x_max']) y = np.random.uniform(self.bounds['y_min'], self.bounds['y_max']) if height is None: z = np.random.uniform(self.bounds['z_min'], self.bounds['z_max']) else: z = height return np.array([x, y, z]) def get_random_rbga(self, num_colors): rgb = list(pickRandomColor(num_colors)) return rgb + [1] def initialize(self, use_cur_pos): tmp = MODEL_XML_BASE.format(self.get_asset_mesh_str(), self.get_asset_material_str(), self.get_body_str()) model = load_model_from_xml(tmp) self.sim = MjSim(model) if self.view: self.viewer = MjViewer(self.sim) else: self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) self._get_starting_step(use_cur_pos) def _get_starting_step(self, use_cur_pos): prev_positions = {} for i, aname in enumerate(self.names): if use_cur_pos: prev_positions[aname] = self.get_block_info(aname)["pos"] self.add_block(aname, [-5 + i, -5 + i, -5]) for aname in self.names: if use_cur_pos: tmp_pos = prev_positions[aname] else: tmp_pos = self.get_random_pos(self.drop_height) self.add_block(aname, tmp_pos) for i in range(self.internal_steps_per_step): self.internal_step() if self.view: self.viewer.render() #### Env internal step functions def add_block(self, ablock, pos): #pos (x,y,z) self.set_block_info(ablock, {"pos": pos}) def pick_block(self, pos): block_name = None for a_block in self.names: if self.intersect(a_block, pos): block_name = a_block if block_name is None: return False return block_name def intersect(self, a_block, pos): cur_pos = self.get_block_info(a_block)["pos"] return np.max(np.abs(cur_pos - pos)) < self.TOLERANCE def get_block_info(self, a_block): info = {} info["poly"] = a_block[:-2] info["pos"] = np.copy(self.sim.data.get_body_xpos(a_block)) #np array info["quat"] = np.copy(self.sim.data.get_body_xquat(a_block)) info["vel"] = np.copy(self.sim.data.get_body_xvelp(a_block)) info["rot_vel"] = np.copy(self.sim.data.get_body_xvelr(a_block)) return info def set_block_info(self, a_block, info): # print(a_block, info) # print("Setting state: {}, {}".format(a_block, info)) sim_state = self.sim.get_state() start_ind = self.sim.model.get_joint_qpos_addr(a_block)[0] if "pos" in info: sim_state.qpos[start_ind:start_ind + 3] = np.array(info["pos"]) if "quat" in info: sim_state.qpos[start_ind + 3:start_ind + 7] = info["quat"] else: sim_state.qpos[start_ind + 3:start_ind + 7] = np.array( [1, 0, 0, 0]) start_ind = self.sim.model.get_joint_qvel_addr(a_block)[0] if "vel" in info: sim_state.qvel[start_ind:start_ind + 3] = info["vel"] else: sim_state.qvel[start_ind:start_ind + 3] = np.zeros(3) if "rot_vel" in info: sim_state.qvel[start_ind + 3:start_ind + 6] = info["rot_vel"] else: sim_state.qvel[start_ind + 3:start_ind + 6] = np.zeros(3) self.sim.set_state(sim_state) def internal_step(self, action=None): ablock = False if action is None: self.sim.forward() self.sim.step() else: pick_place = action[:3] drop_place = action[3:] ablock = self.pick_block(pick_place) if (ablock): # print("Dropping: {} {}".format(ablock, drop_place)) self.add_block(ablock, drop_place) # self.sim_state = self.sim.get_state() return ablock #### Env external step functions # Input: action (4) or (6) # Output: resultant observation after taking the action def step(self, action): action = self._pre_process_actions(action) ablock = self.internal_step(action) # print(self.get_env_info()) #if ablock: for i in range(self.internal_steps_per_step): self.sim.forward() self.sim.step() # self.internal_step() if self.view: self.viewer.render() # self.give_down_vel() # for i in range(200): # self.sim.forward() # self.sim.step() # self.sim_state = self.sim.get_state() # for aname in self.names: #This looks incorrect TODO: CHECK THIS # self.add_block(aname, self.get_block_info(aname)["pos"]) return self.get_observation() # Input: action can either be (A) or (T, A) where we want to execute T actions in a row # Output: Single obs def try_step(self, actions): tmp = self.get_env_info() # cur_state = copy.deepcopy(self.sim.get_state()) if len(actions.shape) == 1: self.step(actions) elif len(actions.shape) == 2: for action in actions: self.step(action) else: raise KeyError("Wrong shape for actions: {}".format(actions.shape)) obs = self.get_observation() # self.sim.set_state(cur_state) self.set_env_info(tmp) return obs # Input: actions (B,A) # Output: Largest manhattan distance component to closest block (B) def get_action_error(self, actions): vals = np.ones(actions.shape[0]) * 10000 for i, an_action in enumerate(actions): for a_block in self.names: vals[i] = min( np.max( np.abs( self.get_block_info(a_block)["pos"][:2] - an_action[:2])), vals[i]) return vals # Resets the environment def reset(self): self.names = [] self.blocks = [] quat = [1, 0, 0, 0] for i in range(self.num_objects): poly = np.random.choice(self.polygons) pos = self.get_random_pos() pos[-2] += -2 * (i + 1) self.add_mesh(poly, pos, quat, self.get_random_rbga(self.num_colors)) self.initialize(False) return self.get_observation() # Returns an observation def get_observation(self): img = self.sim.render( self.img_dim, self.img_dim, camera_name="fixed" ) # img is upside down, values btwn 0-255 (D,D,3) img = img[::-1, :, :] # flips image right side up (D,D,3) return np.ascontiguousarray(img) # values btwn 0-255 (D,D,3) def get_obs_size(self): return [self.img_dim, self.img_dim] def get_actions_size(self): if self.include_z: return [6] else: return [4] # Inputs: actions (*,6) # Outputs: (*,6) if including z, (*,4) if not def _post_process_actions(self, actions): if self.include_z: return actions else: return np.array(actions)[..., [0, 1, 3, 4]] # Inputs: actions (*,4), or (*,6) # Outputs: actions (*,6) def _pre_process_actions(self, actions): if actions.shape[-1] == 6: return actions full_actions = np.zeros(list(actions.shape)[:-1] + [6]) # (*,6) full_actions[..., [0, 1, 3, 4]] = actions full_actions[..., 2] = self.pick_height full_actions[..., 5] = self.drop_height return full_actions # Inputs: None # Outputs: Returns name of picked block # If self.include z: Pick any random block # Else: Picks a random block which can be picked up with the z pick set to self.pick_height def _get_rand_block_byz(self): if len(self.names) == 0: raise KeyError("No blocks in _get_rand_block_byz()!") if self.include_z: aname = np.random.choice(self.names) else: z_lim = self.pick_height tmp = [ aname for aname in self.names if abs(self.get_block_info(aname)["pos"][2] - z_lim) < self.TOLERANCE ] aname = np.random.choice(tmp) return aname # Input: action_type # Output: Single action either (6) or (4) def sample_action(self, action_type=None, pick_noise_ratio=0.0, place_noise_ratio=0.0): if len(self.names) == 1 and action_type == 'place_block': action_type = None if action_type == 'pick_block': #pick block, place randomly # aname = np.random.choice(self.names) aname = self._get_rand_block_byz() pick = self.get_block_info(aname)["pos"] place = self.get_random_pos() elif action_type == 'place_block': #pick block, place on top of existing block # aname = np.random.choice(self.names) aname = self._get_rand_block_byz() pick = self.get_block_info( aname )["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 0.5 names = copy.deepcopy(self.names) names.remove(aname) aname = np.random.choice(names) place = self.get_block_info( aname )["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) place[2] += 3 # Each block is roughly 1 unit wide elif action_type == 'remove_block': aname = self._get_rand_block_byz() pick = self.get_block_info(aname)["pos"] # + np.random.randn(3)/50 place = [ 0, 0, -5 ] # Place the block under the ground to remove it from scene # elif action_type == "noisy_pick": # aname = self._get_rand_block_byz() # pick = self.get_block_info(aname)["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 0.5 # place = self.get_random_pos() # elif action_type == "noisy_miss": # aname = self._get_rand_block_byz() # pick = self.get_block_info(aname)["pos"] #+ np.random.uniform(-self.TOLERANCE, self.TOLERANCE, size=6) * 5 # place = self.get_random_pos() elif action_type is None: if self.include_z: pick = self.get_random_pos() place = self.get_random_pos() else: pick = self.get_random_pos(self.pick_height) place = self.get_random_pos(self.drop_height) else: raise KeyError("Wrong input action_type!") ac = np.array(list(pick) + list(place)) if pick_noise_ratio: ac[:3] += np.random.uniform( -self.TOLERANCE, self.TOLERANCE, size=3) * pick_noise_ratio else: ac[3:] += np.random.uniform( -self.TOLERANCE, self.TOLERANCE, size=3) * place_noise_ratio return self._post_process_actions(ac) # Input: mean (*), std (*), num_actions # Output: actions (num_actions, *) def sample_multiple_action_gaussian(self, mean, std, num_actions): # return np.stack([self.sample_action_gaussian(mean, std) for _ in range(num_actions)]) ans = np.random.normal(mean, std, size=[num_actions] + list(mean.shape)) ## Clip actions to stay in bounds if not self.include_z: ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'], self.bounds['x_max']) ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'], self.bounds['y_max']) ans[..., 2] = np.clip(ans[..., 2], self.bounds['x_min'], self.bounds['x_max']) ans[..., 3] = np.clip(ans[..., 3], self.bounds['y_min'], self.bounds['y_max']) else: ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'], self.bounds['x_max']) ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'], self.bounds['y_max']) ans[..., 2] = np.clip(ans[..., 2], self.bounds['z_min'], self.bounds['z_max']) ans[..., 3] = np.clip(ans[..., 3], self.bounds['x_min'], self.bounds['x_max']) ans[..., 4] = np.clip(ans[..., 4], self.bounds['y_min'], self.bounds['y_max']) ans[..., 5] = np.clip(ans[..., 5], self.bounds['z_min'], self.bounds['z_max']) return ans # Input: mean (*, 2/3), std (*, 2/3), num_actions # Output: actions (num_actions, *, 2/3) def sample_multiple_place_locs_gaussian(self, mean, std, num_actions): ans = np.random.normal(mean, std, size=[num_actions] + list(mean.shape)) ## Clip actions to stay in bounds if not self.include_z: ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'], self.bounds['x_max']) ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'], self.bounds['y_max']) else: ans[..., 0] = np.clip(ans[..., 0], self.bounds['x_min'], self.bounds['x_max']) ans[..., 1] = np.clip(ans[..., 1], self.bounds['y_min'], self.bounds['y_max']) ans[..., 2] = np.clip(ans[..., 2], self.bounds['z_min'], self.bounds['z_max']) return ans # # Input: mean (*), std (*) # # Output: actions (*) # def sample_multiple_action_gaussian(self, mean, std, num_samples): # #mean and std should be (T, A) # random_a = np.random.normal(mean, std, [num_samples] + list(mean.shape)) # # set pick height # random_a[:, :, 2] = 0.6 # # set place height # random_a[:, :, 5] = self.drop_height # return random_a def move_blocks_side(self): # Move blocks to either side z = self.drop_height side_pos = [[-2.2, 1.5, z], [2.2, 1.5, z], [-2.2, 3.5, z], [2.2, 3.5, z]] # self.bounds = {'x_min':-2.5, 'x_max':2.5, 'y_min': 1.0, 'y_max' :4.0, 'z_min':0.05, 'z_max'2.2} place_lst = [] for i, block in enumerate(self.names): place = copy.deepcopy(self.get_block_info(block)["pos"]) place[-1] = self.drop_height self.add_block(block, side_pos[i]) place_lst.append(place) #true_actions.append(side_pos[i] + list(place)) #Note pick & places z's might be # slightly # off # sort by place height so place lowest block first for i in range(self.internal_steps_per_step): self.internal_step() if self.view: self.viewer.render() true_actions = [] for i, block in enumerate(self.names): pick = self.get_block_info(block)["pos"] pick[-1] = 0.6 place = place_lst[i] true_actions.append(np.concatenate([pick, place])) sorted(true_actions, key=lambda x: x[5]) # print(true_actions) return self._post_process_actions(true_actions) def create_tower_shape(self): def get_valid_width_pos(width): num_pos = len(self.heights) possible = [] for i in range(num_pos): valid = True for k in range(max(i - width, 0), min(i + width + 1, num_pos)): if self.types[k] == "tetrahedron": valid = False break if self.heights[i] < self.heights[k]: valid = False break if self.heights[i] >= 3: valid = False break if valid: possible.append(i) return possible def get_drop_pos(index): delta_x = 1 y_val = 3 left_most_x = -2.5 return [left_most_x + index * delta_x, y_val, 4] self.names = [] self.blocks = [] self.heights = [0, 0, 0, 0, 0] self.types = [None] * 5 self.check_clear_width = { 'cube': 1, 'horizontal_rectangle': 1, 'tetrahedron': 1 } self.add_height_width = { 'cube': 0, 'horizontal_rectangle': 1, 'tetrahedron': 0 } tmp_polygons = copy.deepcopy( self.polygons ) #['cube', 'horizontal_rectangle', 'tetrahedron'][:2] quat = [1, 0, 0, 0] colors = [] for i in range(self.num_objects): poly = np.random.choice(tmp_polygons) tmp = get_valid_width_pos(self.check_clear_width[poly]) if len(tmp) == 0: tmp_polygons.remove(poly) if len(tmp_polygons) == 0: # print("DONE!") break else: continue tmp_polygons = copy.deepcopy(self.polygons) ind = np.random.choice(tmp) # print(poly, tmp, ind) self.update_tower_info(ind, poly) tmp_pos = get_drop_pos(ind) while True: color = self.get_random_rbga(self.num_colors) # if len(colors) > 0: # import pdb; pdb.set_trace() if len(colors) == 0 or np.linalg.norm( color[:3] - np.array(colors)[:, :3]).min(0) > 0.3: break colors.append(color) self.add_mesh(poly, tmp_pos, quat, color) self.num_objects = len(self.names) self.initialize(True) def update_tower_info(self, ind, poly): self.types[ind] = poly width = self.add_height_width[poly] new_height = self.heights[ind] + 1 for i in range(max(ind - width, 0), min(ind + width + 1, len(self.heights))): self.heights[i] = new_height for i in range(1, 4): if self.heights[i - 1] == self.heights[ i + 1] and new_height == self.heights[i - 1]: self.heights[i] = self.heights[i - 1] def get_env_info(self): env_info = {} env_info["names"] = copy.deepcopy(self.names) env_info["blocks"] = copy.deepcopy(self.blocks) for i, aname in enumerate(self.names): info = self.get_block_info(aname) env_info["blocks"][i]["pos"] = copy.deepcopy(info["pos"]) env_info["blocks"][i]["quat"] = copy.deepcopy(info["quat"]) return env_info def set_env_info(self, env_info): self.names = env_info["names"] self.blocks = env_info["blocks"] self.initialize(True) # Output: If check_all_in_bounds is false, return actions(N,3) # Else return true if all the boxes are in bounds, false otherwise def get_block_locs(self, check_all_in_bounds=False): ans = [] for a_block in self.names: ans.append(self.get_block_info(a_block)["pos"]) # (3) ans = np.array(ans) # (Num_blocks,3) if check_all_in_bounds: x_max = np.max(ans[:, 0]) x_min = np.min(ans[:, 0]) y_max = np.max(ans[:, 1]) y_min = np.min(ans[:, 1]) if x_max > self.bounds['x_max'] or x_min < self.bounds['x_min'] or y_max > self.bounds['y_max'] or \ y_min < self.bounds['y_min']: return False else: return True return ans # Input: Computes accuracy (#blocks correct/total #) of the current environment given the true data and threshold def compute_accuracy(self, true_data, threshold=0.2): mjc_data = copy.deepcopy(true_data) max_err = -float('inf') data = self.get_env_info() correct = 0 for pred_datum in data['blocks']: err, mjc_match, err_pos, err_rgb = self._best_obj_match( pred_datum, mjc_data['blocks']) # del mjc_data[mjc_match] # print(err) if err > max_err: max_err = err max_pos = err_pos max_rgb = err_rgb if len(mjc_data) == 0: break if err < threshold: correct += 1 correct /= float(len(data['blocks'])) return correct def _best_obj_match(self, pred, targs): def np_mse(x1, x2): return np.square(x1 - x2).mean() pos = pred['pos'] rgb = pred['rgba'] best_err = float('inf') for obj_data in targs: obj_name = obj_data['name'] obj_pos = obj_data['pos'] obj_rgb = obj_data['rgba'] pos_err = np_mse(pos, obj_pos) rgb_err = np_mse(np.array(rgb), np.array(obj_rgb)) err = pos_err + rgb_err if err < best_err: best_err = err best_obj = obj_name best_pos = pos_err best_rgb = rgb_err return best_err, best_obj, best_pos, best_rgb