def evaluate_policy(env_config, model_path, n_eval, printout=False): if printout: print("Loading policy...") # Load trained model and corresponding policy data = torch.load(model_path) policy = data['evaluation/policy'] if printout: print("Policy loaded") # Load controller controller = env_config.pop("controller") if controller in set(ALL_CONTROLLERS): # This is a default controller controller_config = load_controller_config( default_controller=controller) else: # This is a string to the custom controller controller_config = load_controller_config(custom_fpath=controller) # Create robosuite env env = suite.make(**env_config, has_renderer=False, has_offscreen_renderer=False, use_object_obs=True, use_camera_obs=False, reward_shaping=True, controller_configs=controller_config) env = GymWrapper(env) # Use CUDA if available if torch.cuda.is_available(): set_gpu_mode(True) policy.cuda() if not isinstance( policy, MakeDeterministic) else policy.stochastic_policy.cuda() if printout: print("Evaluating policy over {} simulations...".format(n_eval)) # Create variable to hold rewards to be averaged returns = [] # Loop through simulation n_eval times and take average returns each time for i in range(n_eval): path = rollout( env, policy, max_path_length=env_config["horizon"], render=False, ) # Determine total summed rewards from episode and append to 'returns' returns.append(sum(path["rewards"])) # Average the episode rewards and return the normalized corresponding value return sum(returns) / (env_config["reward_scale"] * n_eval)
def _load_controller(self): """ Loads controller to be used for dynamic trajectories """ # Flag for loading urdf once (only applicable for IK controllers) urdf_loaded = False # Load controller configs for both left and right arm for arm in self.arms: # First, load the default controller if none is specified if not self.controller_config[arm]: # Need to update default for a single agent controller_path = os.path.join( os.path.dirname(__file__), '..', 'controllers/config/{}.json'.format( self.robot_model.default_controller_config[arm])) self.controller_config[arm] = load_controller_config( custom_fpath=controller_path) # Assert that the controller config is a dict file: # NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY, # OSC_POSITION, OSC_POSE, IK_POSE} assert type(self.controller_config[arm]) == dict, \ "Inputted controller config must be a dict! Instead, got type: {}".format( type(self.controller_config[arm])) # Add to the controller dict additional relevant params: # the robot name, mujoco sim, eef_name, actuator_range, joint_indexes, timestep (model) freq, # policy (control) freq, and ndim (# joints) self.controller_config[arm]["robot_name"] = self.name self.controller_config[arm]["sim"] = self.sim self.controller_config[arm]["eef_name"] = self.gripper[ arm].important_sites["grip_site"] self.controller_config[arm][ "eef_rot_offset"] = self.eef_rot_offset[arm] self.controller_config[arm]["ndim"] = self._joint_split_idx self.controller_config[arm]["policy_freq"] = self.control_freq (start, end) = (None, self._joint_split_idx) if arm == "right" else ( self._joint_split_idx, None) self.controller_config[arm]["joint_indexes"] = { "joints": self.joint_indexes[start:end], "qpos": self._ref_joint_pos_indexes[start:end], "qvel": self._ref_joint_vel_indexes[start:end] } self.controller_config[arm]["actuator_range"] = ( self.torque_limits[0][start:end], self.torque_limits[1][start:end]) # Only load urdf the first time this controller gets called self.controller_config[arm][ "load_urdf"] = True if not urdf_loaded else False urdf_loaded = True # Instantiate the relevant controller self.controller[arm] = controller_factory( self.controller_config[arm]["type"], self.controller_config[arm])
def test_playback(): # set seeds random.seed(0) np.random.seed(0) env = robosuite.make( "Lift", robots=["Panda"], controller_configs=load_controller_config( default_controller="OSC_POSE"), has_renderer=False, has_offscreen_renderer=False, ignore_done=True, use_camera_obs=False, reward_shaping=True, control_freq=20, ) env.reset() # task instance task_xml = env.sim.model.get_xml() task_init_state = np.array(env.sim.get_state().flatten()) # trick for ensuring that we can play MuJoCo demonstrations back # deterministically by using the recorded actions open loop env.reset_from_xml_string(task_xml) env.sim.reset() env.sim.set_state_from_flattened(task_init_state) env.sim.forward() # random actions to play n_actions = 100 actions = 0.1 * np.random.uniform( low=-1.0, high=1.0, size=(n_actions, env.action_spec[0].shape[0])) # play actions print("playing random actions...") states = [task_init_state] for i in range(n_actions): env.step(actions[i]) states.append(np.array(env.sim.get_state().flatten())) # try playback print("attempting playback...") env.reset() env.reset_from_xml_string(task_xml) env.sim.reset() env.sim.set_state_from_flattened(task_init_state) env.sim.forward() for i in range(n_actions): env.step(actions[i]) state_playback = env.sim.get_state().flatten() assert np.all(np.equal(states[i + 1], state_playback)) env.close() print("test passed!")
def _load_controller(self): """ Loads controller to be used for dynamic trajectories """ # First, load the default controller if none is specified if not self.controller_config: # Need to update default for a single agent controller_path = os.path.join( os.path.dirname(__file__), "..", "controllers/config/{}.json".format( self.robot_model.default_controller_config), ) self.controller_config = load_controller_config( custom_fpath=controller_path) # Assert that the controller config is a dict file: # NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY, # OSC_POSITION, OSC_POSE, IK_POSE} assert ( type(self.controller_config) == dict ), "Inputted controller config must be a dict! Instead, got type: {}".format( type(self.controller_config)) # Add to the controller dict additional relevant params: # the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq, # policy (control) freq, and ndim (# joints) self.controller_config["robot_name"] = self.name self.controller_config["sim"] = self.sim self.controller_config["eef_name"] = self.gripper.important_sites[ "grip_site"] self.controller_config["eef_rot_offset"] = self.eef_rot_offset self.controller_config["joint_indexes"] = { "joints": self.joint_indexes, "qpos": self._ref_joint_pos_indexes, "qvel": self._ref_joint_vel_indexes, } self.controller_config["actuator_range"] = self.torque_limits self.controller_config["policy_freq"] = self.control_freq self.controller_config["ndim"] = len(self.robot_joints) # Instantiate the relevant controller self.controller = controller_factory(self.controller_config["type"], self.controller_config)
def _load_controller(self): """ Loads controller to be used for dynamic trajectories """ # First, load the default controller if none is specified if not self.controller_config: # Need to update default for a single agent controller_path = os.path.join( os.path.dirname(__file__), '..', 'controllers/config/{}.json'.format( self.robot_model.default_controller_config)) self.controller_config = load_controller_config( custom_fpath=controller_path) # Assert that the controller config is a dict file: # NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY, # OSC_POSITION, OSC_POSE, IK_POSE} assert type(self.controller_config) == dict, \ "Inputted controller config must be a dict! Instead, got type: {}".format(type(self.controller_config)) assert self.controller_config["type"] == "LOCOMOTION_JOINT_TORQUE", \ "Only JOINT_TORQUE controllers are currently supported for quadruped robots, please change the " \ "controller to be a joint torque controller. Got controller type: {}".format(self.controller_config["type"]) # Add to the controller dict additional relevant params: # the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq, # policy (control) freq, and ndim (# joints) self.controller_config["robot_name"] = self.name self.controller_config["sim"] = self.sim self.controller_config["chassis_name"] = self.robot_model.robot_base self.controller_config["joint_indexes"] = { "joints": self.joint_indexes, "qpos": self._ref_joint_pos_indexes, "qvel": self._ref_joint_vel_indexes } self.controller_config["actuator_range"] = self.torque_limits self.controller_config["policy_freq"] = self.control_freq self.controller_config["ndim"] = len(self.robot_joints) # Instantiate the relevant controller self.controller = controller_factory(self.controller_config["type"], self.controller_config)
def liftEnvPlay(): env = GymWrapper( MergeObsWrapper( rs.make( "LiftNBVCube", robots=["Sawyer"], controller_configs=load_controller_config( default_controller="OSC_POSITION"), use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=True, # make sure we can render to the screen reward_shaping=True, # use dense rewards control_freq= 10, # control should happen fast enough so that simulation looks smooth camera_heights=84, camera_widths=84, camera_depths=True, ), img_keys=["agentview_image", "agentview_depth"], vect_keys=["robot0_robot-state"]), keys=["merged_obs"]) return env
options["robots"] = [] # Have user choose two robots print("A multiple single-arm configuration was chosen.\n") for i in range(2): print("Please choose Robot {}...\n".format(i)) options["robots"].append(choose_robots(exclude_bimanual=True)) # Else, we simply choose a single (single-armed) robot to instantiate in the environment else: options["robots"] = choose_robots(exclude_bimanual=True) # Load the desired controller options["controller_configs"] = load_controller_config(default_controller="OSC_POSE") # change renderer config config = load_renderer_config('igibson') if args.vision_modality == 'rgb': config['vision_modalities'] = ['rgb'] if args.vision_modality == 'segmentation': config['vision_modalities'] = ['seg'] config['msaa'] = False if args.vision_modality == 'depth': config['vision_modalities'] = ['3d'] if args.vision_modality == 'normal': config['vision_modalities'] = ['normal']
import robosuite from robosuite.controllers import load_controller_config # load default controller parameters for Operational Space Control (OSC) controller_config = load_controller_config(default_controller="OSC_POSE") # create an environment to visualize on-screen env = robosuite.make( "Lift", robots=["Sawyer"], # load a Sawyer robot and a Panda robot gripper_types="PandaGripper", # use default grippers per robot arm controller_configs=controller_config, # each arm is controlled using OSC has_renderer=True, # on-screen rendering render_camera="frontview", # visualize the "frontview" camera has_offscreen_renderer=False, # no off-screen rendering control_freq=20, # 20 hz control for applied actions horizon=200, # each episode terminates after 200 steps use_object_obs=False, # no observations needed use_camera_obs=False, # no observations needed path=[ '/home/lthpc/Desktop/meta-role/robosuite/robosuite/my_xmls/sawyer/[1.1, 1.1, 1.1, 1.1, 1.1, 0.9, 0.9, 0.9, 0.9, 0.9].xml' ])
# Have user choose two robots print("A multiple single-arm configuration was chosen.\n") for i in range(2): print("Please choose Robot {}...\n".format(i)) options["robots"].append(choose_robots(exclude_bimanual=True)) # Else, we simply choose a single (single-armed) robot to instantiate in the environment else: options["robots"] = choose_robots(exclude_bimanual=True) # Choose controller controller_name = choose_controller() # Load the desired controller options["controller_configs"] = load_controller_config(default_controller=controller_name) # Help message to user print() print("Press \"H\" to show the viewer control panel.") # initialize the task env = suite.make( **options, has_renderer=True, has_offscreen_renderer=False, ignore_done=True, use_camera_obs=False, control_freq=20, hard_reset=False, # TODO: Not setting this flag to False brings up a segfault on macos or glfw error on linux )
def run_sim_test(type_test): real_robot_joints = [] sim_robot_joints = [] target_robot_joints = [] real_robot_eef_rframe = [] real_robot_eef_sframe = [] sim_robot_eef_rframe = [] sim_robot_eef_sframe = [] # Create dict to hold options that will be passed to env creation call options = {} # Choose environment and add it to options options["env_name"] = "Lift" options["robots"] = ["Jaco"] # Choose camera camera = "frontview" write_path = os.path.join('datasets', type_test) # load data # latin1 allows us to load python2 real_robot = np.load(os.path.join('datasets', type_test + '.npz'), allow_pickle=True, encoding='latin1') real_eef_pos = real_robot['eef_pos'] #real_joint_pos = np.mod(real_robot['joint_pos'], 4*np.pi) real_joint_pos = real_robot['joint_pos'] real_actions = real_robot['actions'] init_real = real_joint_pos[0][:7] # Choose controller controller_file = "jaco_%s_%shz.json" % (args.controller_type, args.control_freq) controller_fpath = os.path.join( os.path.split(suite.__file__)[0], 'controllers', 'config', controller_file) print('loading controller from', controller_fpath) # Load the desired controller options["controller_configs"] = load_controller_config( custom_fpath=controller_fpath) options['initial_qposes'] = [init_real] control_freq = args.control_freq n_joints = 7 n_steps = len(real_actions) # initialize the task env = suite.make( **options, has_renderer=False, has_offscreen_renderer=True, ignore_done=True, use_camera_obs=True, control_freq=control_freq, camera_names=camera, camera_heights=512, camera_widths=512, ) site = 'gripper0_grip_site' env = JacoSim2RealWrapper(env) env.reset() video_writer = imageio.get_writer(write_path + '.mp4', fps=2) eef_site_id = env.robots[0].eef_site_id # Get action limits low, high = env.action_spec #env.robots[0].set_robot_joint_positions(init_real[:7]) sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes] for t in range(n_steps - 1): action = np.deg2rad(real_actions[t - 1]) #action = real_joint_pos[t,:7]-sim_joint_pos if len(action) == 7: action = np.hstack((action, [0])) obs, reward, done, _ = env.step(action) video_img = obs['%s_image' % camera][::-1] video_writer.append_data(video_img) # get simulator position and quaternion in real robot frame sim_eef_pos_rframe, sim_eef_quat_rframe = env.get_sim2real_posquat() sim_eef_pos_sframe, sim_eef_quat_sframe = env.get_sim_posquat() sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes] sim_goal_joint_pos = env.robots[0].controller.goal_qpos sim_robot_eef_rframe.append( deepcopy(np.hstack((sim_eef_pos_rframe, sim_eef_quat_rframe)))) sim_robot_eef_sframe.append( deepcopy(np.hstack((sim_eef_pos_sframe, sim_eef_quat_sframe)))) sim_robot_joints.append(deepcopy(sim_joint_pos)) target_robot_joints.append(deepcopy(sim_goal_joint_pos)) real_eef_pos_sframe, real_eef_quat_sframe = env.get_real2sim_posquat( real_eef_pos[t, :3], real_eef_pos[t, 3:7]) real_robot_eef_rframe.append(real_eef_pos[t]) real_robot_eef_sframe.append( deepcopy(np.hstack((real_eef_pos_sframe, real_eef_quat_sframe)))) real_robot_joints.append(deepcopy(obs['robot0_joint_pos'])) f, ax = plt.subplots(7, figsize=(10, 20)) real_robot_eef_rframe = np.array(real_robot_eef_rframe) real_robot_eef_sframe = np.array(real_robot_eef_sframe) sim_robot_eef_rframe = np.array(sim_robot_eef_rframe) sim_robot_eef_sframe = np.array(sim_robot_eef_sframe) y = np.arange(len(real_robot_eef_rframe)) vmin = -np.pi vmax = np.pi for i in range(7): if not i: ax[i].scatter(y, real_robot_eef_rframe[:, i], marker='o', s=4, c='r', label='robot_rframe') ax[i].scatter(y, real_robot_eef_sframe[:, i], marker='o', s=4, c='k', label='robot_sframe') ax[i].scatter(y, sim_robot_eef_rframe[:, i], marker='o', s=4, c='g', label='sim_rframe') ax[i].scatter(y, sim_robot_eef_sframe[:, i], marker='o', s=4, c='b', label='sim_sframe') else: ax[i].scatter(y, real_robot_eef_rframe[:, i], marker='o', s=4, c='r') ax[i].scatter(y, real_robot_eef_sframe[:, i], marker='o', s=4, c='k') ax[i].scatter(y, sim_robot_eef_rframe[:, i], marker='o', s=4, c='g') ax[i].scatter(y, sim_robot_eef_sframe[:, i], marker='o', s=4, c='b') ax[i].plot(real_robot_eef_rframe[:, i], c='r') ax[i].plot(real_robot_eef_sframe[:, i], c='k') ax[i].plot(sim_robot_eef_rframe[:, i], c='g') ax[i].plot(sim_robot_eef_sframe[:, i], c='b') for i in range(4, 7): ax[i].set_ylim([vmin, vmax]) ax[0].set_title('x') ax[1].set_title('y') ax[2].set_title('z') ax[3].set_title('qx') ax[4].set_title('qy') ax[5].set_title('qz') ax[6].set_title('qw') ax[0].legend() plt.savefig(write_path + '_eef.png') plt.close() f, ax = plt.subplots(7, figsize=(10, 20)) real_robot_joints = np.array(real_robot_joints) sim_robot_joints = np.array(sim_robot_joints) target_robot_joints = np.array(target_robot_joints) vmin = -4 * np.pi vmax = 4 * np.pi for i in range(7): ax[i].set_title(i) if not i: ax[i].plot(real_robot_joints[:, i], c='b', label='real') ax[i].plot(sim_robot_joints[:, i], c='k', label='sim') ax[i].plot(target_robot_joints[:, i], c='c', label='goal') else: ax[i].plot(real_robot_joints[:, i], c='b') ax[i].plot(sim_robot_joints[:, i], c='k') ax[i].plot(target_robot_joints[:, i], c='c') ax[i].scatter(y, real_robot_joints[:, i], s=2, c='b') ax[i].scatter(y, sim_robot_joints[:, i], s=2, c='k') ax[i].scatter(y, target_robot_joints[:, i], s=2, c='c') for x in range(7): ax[x].set_ylim([vmin, vmax]) ax[0].legend() plt.savefig(write_path + '_joints.png') plt.close() video_writer.close() print("Video saved to {}".format(write_path))
def run_dh_test(type_test): sim_robot_joints = [] target_robot_joints = [] dh_robot_joints = [] sim_robot_eef_sframe = [] sim_robot_eef_rframe = [] dh_robot_eef_rframe = [] # matches rframe #base_matrix = torch.FloatTensor([[1,0,0,0], # [0,-1,0,0], # [0,0,-1,0], # [0,0,0,1]]).to(device) ## gets x and y correct except orientation #base_matrix = torch.FloatTensor([[0,1,0,0], # [1,0,0,0], # [0,0,1,0], # [0,0,0,1]]).to(device) # gets x and y correct base_matrix = torch.FloatTensor([[0,1,0,0], [1,0,0,0], [0,0,-1,0], [0,0,0,1]]).to(device) # Create dict to hold options that will be passed to env creation call options = {} # Choose environment and add it to options robot_name = "Jaco" options["env_name"] = "Reach" options["robots"] = [robot_name] npdh = robot_attributes[robot_name] tdh = {} for key, item in npdh.items(): tdh[key] = torch.FloatTensor(item).to('cuda') # Choose camera camera = "frontview" write_path = os.path.join('datasets', 'DH_'+type_test) # load data # latin1 allows us to load python2 real_robot = np.load(os.path.join('datasets', type_test + '.npz'), allow_pickle=True, encoding='latin1') real_eef_pos = real_robot['eef_pos'] #real_joint_pos = np.mod(real_robot['joint_pos'], 4*np.pi) real_joint_pos = real_robot['joint_pos'] real_actions = real_robot['actions'] init_real = real_joint_pos[0][:7] # Choose controller controller_type = 'JOINT_POSITION' control_freq = 5 controller_file = "%s_%s_%shz.json" %(robot_name.lower(), controller_type.lower(), control_freq) controller_fpath = os.path.join( os.path.split(suite.__file__)[0], 'controllers', 'config', controller_file) print('loading controller from', controller_fpath) # Load the desired controller options["controller_configs"] = load_controller_config(custom_fpath=controller_fpath) n_joints = 7 n_steps = len(real_actions) # initialize the task env = suite.make( **options, has_renderer=False, has_offscreen_renderer=True, ignore_done=True, use_camera_obs=True, control_freq=control_freq, camera_names=camera, camera_heights=512, camera_widths=512, ) site = 'gripper0_grip_site' env.reset() video_writer = imageio.get_writer(write_path + '.mp4', fps=2) eef_site_id = env.robots[0].eef_site_id # Get action limits low, high = env.action_spec env.robots[0].set_robot_joint_positions(init_real[:7]) sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes] dh_eef = torch_angle2ee(tdh, base_matrix, torch.FloatTensor(sim_joint_pos)[None,:].to(device)) for t in range(n_steps-1): action = np.deg2rad(real_actions[t-1]) #action = real_joint_pos[t,:7]-sim_joint_pos if len(action) == 7: action = np.hstack((action, [0])) obs, reward, done, _ = env.step(action) video_img = obs['%s_image'%camera][::-1] video_writer.append_data(video_img) # get simulator position and quaternion in real robot frame sim_eef_pose = deepcopy(env.robots[0].pose_in_base_from_name('gripper0_eef')) sim_eef_pos_sframe = deepcopy(sim_eef_pose)[:3, 3] sim_eef_quat_sframe = deepcopy(transform_utils.mat2quat(sim_eef_pose)) sim_eef_pos_rframe, sim_eef_quat_rframe = get_sim2real_posquat(env) sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes] sim_goal_joint_pos = env.robots[0].controller.goal_qpos torch_joint_pos = torch.FloatTensor(sim_joint_pos)[None,:].to(device) dh_eef_pose = torch_angle2ee(tdh, base_matrix, torch_joint_pos)[0].detach().cpu().numpy() dh_eef_pos_sframe = deepcopy(dh_eef_pose)[:3, 3] dh_eef_quat_sframe = deepcopy(transform_utils.mat2quat(dh_eef_pose)) sim_robot_eef_sframe.append(deepcopy(np.hstack((sim_eef_pos_sframe, sim_eef_quat_sframe)))) sim_robot_eef_rframe.append(deepcopy(np.hstack((sim_eef_pos_rframe, sim_eef_quat_rframe)))) dh_robot_eef_rframe.append(deepcopy(np.hstack((dh_eef_pos_sframe, dh_eef_quat_sframe)))) sim_robot_joints.append(deepcopy(sim_joint_pos)) target_robot_joints.append(deepcopy(sim_goal_joint_pos)) dh_robot_joints.append(deepcopy(sim_goal_joint_pos)) #real_eef_pos_sframe, real_eef_quat_sframe = env.get_real2sim_posquat(real_eef_pos[t,:3], real_eef_pos[t,3:7]) #real_robot_eef_rframe.append(real_eef_pos[t]) #real_robot_eef_sframe.append(deepcopy(np.hstack((real_eef_pos_sframe, real_eef_quat_sframe)))) #real_robot_joints.append(deepcopy(obs['robot0_joint_pos'])) f, ax = plt.subplots(7, figsize=(10,20)) sim_robot_eef_sframe = np.array(sim_robot_eef_sframe) sim_robot_eef_rframe = np.array(sim_robot_eef_rframe) dh_robot_eef_rframe = np.array(dh_robot_eef_rframe) y = np.arange(len(sim_robot_eef_sframe)) vmin = -np.pi vmax = np.pi for i in range(7): ax[i].scatter(y, dh_robot_eef_rframe[:,i] , marker='x', s=35, c='g', label='dh_sframe') ax[i].scatter(y, sim_robot_eef_sframe[:,i] , marker='o', s=4, c='b', label='sim_sframe') #ax[i].scatter(y, sim_robot_eef_rframe[:,i] , marker='o', s=4, c='r', label='sim_rframe') ax[i].plot(dh_robot_eef_rframe[:, i], c='g' ) ax[i].plot(sim_robot_eef_sframe[:, i], c='b' ) #ax[i].plot(sim_robot_eef_rframe[:, i], c='r' ) for i in range(4, 7): ax[i].set_ylim([vmin, vmax]) ax[0].set_title('x'); ax[1].set_title('y'); ax[2].set_title('z') ax[3].set_title('qx'); ax[4].set_title('qy'); ax[5].set_title('qz'); ax[6].set_title('qw') ax[0].legend() plt.savefig(write_path + '_eef.png') plt.close() f, ax = plt.subplots(7, figsize=(10,20)) sim_robot_joints = np.array(sim_robot_joints) dh_robot_joints = np.array(dh_robot_joints) target_robot_joints = np.array(target_robot_joints) vmin = -4*np.pi vmax = 4*np.pi for i in range(7): ax[i].set_title(i) if not i: ax[i].plot(sim_robot_joints[:,i], c='k', label='sim') ax[i].plot(target_robot_joints[:,i], c='b', label='goal') ax[i].plot(dh_robot_joints[:,i], c='g', label='dh') else: ax[i].plot(sim_robot_joints[:,i], c='k') ax[i].plot(target_robot_joints[:,i], c='b') ax[i].plot(dh_robot_joints[:,i], c='g') ax[i].scatter(y, sim_robot_joints[:,i], s=2, c='k') ax[i].scatter(y, target_robot_joints[:,i], s=2, c='c') ax[i].scatter(y, dh_robot_joints[:,i], s=2, c='b') for x in range(7): ax[x].set_ylim([vmin, vmax]) ax[0].legend() plt.savefig(write_path + '_joints.png') plt.close() video_writer.close() print("Video saved to {}".format(write_path))
env_args["camera_widths"] = 512 # Setup video recorder if necesssary if args.record_video: # Grab name of this rollout combo video_name = "{}-{}-{}".format(env_args["env_name"], "".join( env_args["robots"]), env_args["controller"]).replace("_", "-") # Calculate appropriate fps fps = int(env_args["control_freq"]) # Define video writer video_writer = imageio.get_writer("{}.mp4".format(video_name), fps=fps) # Pop the controller controller = env_args.pop("controller") if controller in ALL_CONTROLLERS: controller_config = load_controller_config( default_controller=controller) else: controller_config = load_controller_config(custom_fpath=controller) # Create env env_suite = suite.make(**env_args, controller_configs=controller_config, has_renderer=not args.record_video, has_offscreen_renderer=args.record_video, use_object_obs=True, use_camera_obs=args.record_video, reward_shaping=True) env = GymWrapper(env_suite) # Run rollout simulate_policy(
def test_camera_transforms(): # set seeds random.seed(0) np.random.seed(0) camera_name = "agentview" camera_height = 120 camera_width = 120 env = robosuite.make( "Lift", robots=["Panda"], controller_configs=load_controller_config( default_controller="OSC_POSE"), has_renderer=False, has_offscreen_renderer=True, ignore_done=True, use_object_obs=True, use_camera_obs=True, camera_names=[camera_name], camera_depths=[True], camera_heights=[camera_height], camera_widths=[camera_width], reward_shaping=True, control_freq=20, ) sim = env.sim obs_dict = env.reset() # ground-truth object position obj_pos = obs_dict["object-state"][:3] # camera frame image = obs_dict["{}_image".format(camera_name)][::-1] # unnormalized depth map depth_map = obs_dict["{}_depth".format(camera_name)][::-1] depth_map = CU.get_real_depth_map(sim=sim, depth_map=depth_map) # get camera matrices world_to_camera = CU.get_camera_transform_matrix( sim=sim, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, ) camera_to_world = np.linalg.inv(world_to_camera) # transform object position into camera pixel obj_pixel = CU.project_points_from_world_to_camera( points=obj_pos, world_to_camera_transform=world_to_camera, camera_height=camera_height, camera_width=camera_width, ) # transform from camera pixel back to world position estimated_obj_pos = CU.transform_from_pixels_to_world( pixels=obj_pixel, depth_map=depth_map, camera_to_world_transform=camera_to_world, ) # the most we should be off by in the z-direction is 3^0.5 times the maximum half-size of the cube max_z_err = np.sqrt(3) * 0.022 z_err = np.abs(obj_pos[2] - estimated_obj_pos[2]) assert z_err < max_z_err print("pixel: {}".format(obj_pixel)) print("obj pos: {}".format(obj_pos)) print("estimated obj pos: {}".format(estimated_obj_pos)) print("z err: {}".format(z_err)) env.close()
def experiment(variant, agent="SAC"): # Make sure agent is a valid choice assert agent in AGENTS, "Invalid agent selected. Selected: {}. Valid options: {}".format( agent, AGENTS) # Get environment configs for expl and eval envs and create the appropriate envs # suites[0] is expl and suites[1] is eval suites = [] for env_config in (variant["expl_environment_kwargs"], variant["eval_environment_kwargs"]): # Load controller controller = env_config.pop("controller") if controller in set(ALL_CONTROLLERS): # This is a default controller controller_config = load_controller_config( default_controller=controller) else: # This is a string to the custom controller controller_config = load_controller_config(custom_fpath=controller) # Create robosuite env and append to our list suites.append( suite.make( **env_config, has_renderer=False, has_offscreen_renderer=False, use_object_obs=True, use_camera_obs=False, reward_shaping=True, controller_configs=controller_config, )) # Create gym-compatible envs expl_env = NormalizedBoxEnv(GymWrapper(suites[0])) eval_env = NormalizedBoxEnv(GymWrapper(suites[1])) obs_dim = expl_env.observation_space.low.size action_dim = eval_env.action_space.low.size qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, **variant['qf_kwargs'], ) qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, **variant['qf_kwargs'], ) target_qf1 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, **variant['qf_kwargs'], ) target_qf2 = FlattenMlp( input_size=obs_dim + action_dim, output_size=1, **variant['qf_kwargs'], ) # Define references to variables that are agent-specific trainer = None eval_policy = None expl_policy = None # Instantiate trainer with appropriate agent if agent == "SAC": expl_policy = TanhGaussianPolicy( obs_dim=obs_dim, action_dim=action_dim, **variant['policy_kwargs'], ) eval_policy = MakeDeterministic(expl_policy) trainer = SACTrainer(env=eval_env, policy=expl_policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, **variant['trainer_kwargs']) elif agent == "TD3": eval_policy = TanhMlpPolicy(input_size=obs_dim, output_size=action_dim, **variant['policy_kwargs']) target_policy = TanhMlpPolicy(input_size=obs_dim, output_size=action_dim, **variant['policy_kwargs']) es = GaussianStrategy( action_space=expl_env.action_space, max_sigma=0.1, min_sigma=0.1, # Constant sigma ) expl_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=eval_policy, ) trainer = TD3Trainer(policy=eval_policy, qf1=qf1, qf2=qf2, target_qf1=target_qf1, target_qf2=target_qf2, target_policy=target_policy, **variant['trainer_kwargs']) else: print("Error: No valid agent chosen!") replay_buffer = EnvReplayBuffer( variant['replay_buffer_size'], expl_env, ) eval_path_collector = MdpPathCollector( eval_env, eval_policy, ) expl_path_collector = MdpPathCollector( expl_env, expl_policy, ) # Define algorithm algorithm = CustomTorchBatchRLAlgorithm( trainer=trainer, exploration_env=expl_env, evaluation_env=eval_env, exploration_data_collector=expl_path_collector, evaluation_data_collector=eval_path_collector, replay_buffer=replay_buffer, **variant['algorithm_kwargs']) algorithm.to(ptu.device) algorithm.train()