def fetch_push_gym_demo(): env = GymWrapper( suite.make( 'FetchPush', robots='Panda', controller_configs=None, gripper_types='UltrasoundProbeGripper', has_renderer=True, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=50, render_camera=None, )) for i_episode in range(20): observation = env.reset() for t in range(500): env.render() action = env.action_space.sample() observation, reward, done, info = env.step(action) if done: print("Episode finished after {} timesteps".format(t + 1)) break
def main(): parser = custom_arg_parser() args = parser.parse_args() load_defaults(args) print("Arguments:{}".format(args)) # Create the model name with all the parameters model_dir_name = serialize_args(args) print("Model name: {}".format(model_dir_name)) if args.model is not None: model_save_path = os.path.dirname(args.model) + "/" tb_save_path = model_save_path.replace("learned_models","tb_logs") else: model_save_path = "../../learned_models/" + model_dir_name + "/" tb_save_path = "../../tb_logs/" + model_dir_name + "/" print("Model save path:{}".format(model_save_path)) print("TB logs save path:{}".format(tb_save_path)) final_model_path = model_save_path + "final_" + model_dir_name model_load_path = args.model show_render = args.visualize # Save args to json for training from checkpoints if not os.path.exists(model_save_path): os.makedirs(model_save_path) with open(model_save_path + "args.json", 'w+') as f: json.dump(vars(args),f,indent=2,sort_keys=True) env = GymWrapper( suite.make( "JR2Door", has_renderer = show_render, use_camera_obs = False, ignore_done = False, control_freq = args.control_freq, horizon = args.horizon, door_type = args.door_type, bot_motion = args.bot_motion, robot_pos = args.robot_pos, robot_theta = args.robot_theta, dist_to_handle_coef = args.rcoef_dist_to_handle, door_angle_coef = args.rcoef_door_angle, handle_con_coef = args.rcoef_handle_con, body_door_con_coef = args.rcoef_body_door_con, self_con_coef = args.rcoef_self_con, arm_handle_con_coef = args.rcoef_arm_handle_con, arm_door_con_coef = args.rcoef_arm_door_con, force_coef = args.rcoef_force, gripper_touch_coef = args.rcoef_gripper_touch, dist_to_door_coef = args.rcoef_dist_to_door, wall_con_coef = args.rcoef_wall_con, reset_on_large_force= args.reset_on_large_force, debug_print = args.print_info, eef_type = args.eef_type, door_init_qpos = args.door_init_qpos, goal_offset = args.goal_offset, ) ) if args.slurm: env = SubprocVecEnv([lambda: env for i in range(args.n_cpu)]) else: env = DummyVecEnv([lambda: env]) # Load the specified model, if there is one if args.model is not None: # Training from checkpoint, so need to reset timesteps for tb reset_num_timesteps = False if args.rl_alg == "ppo2": model = PPO2.load(model_load_path,env=env) print("Succesfully loaded PPO2 model") if args.rl_alg == "ppo1": model = PPO1.load(model_load_path,env=env) print("Succesfully loaded PPO1 model") else: # New model, so need to reset timesteps for tb reset_num_timesteps = True if args.rl_alg == "ppo2": model = PPO2( args.policy, env, verbose=args.verbose, n_steps=args.n_steps, nminibatches=args.minibatches, noptepochs=args.opt_epochs, cliprange=args.clip_range, ent_coef=args.ent_coef, tensorboard_log=tb_save_path, #full_tensorboard_log=True ) elif args.rl_alg == "ppo1": model = PPO1( args.policy, env, verbose=args.verbose, timesteps_per_actorbatch=args.n_steps, optim_epochs=args.opt_epochs, tensorboard_log=tb_save_path, ) if args.replay: # Replay a policy obs = env.reset() count = 0 with open('episode-reward.csv', mode='w') as fid: writer = csv.writer(fid, delimiter=',') writer.writerow("reward") while(count < 1000): env.render() count += 1 print(count) while True: if args.model is None: print("Error: No model has been specified") action, _states = model.predict(obs,deterministic=True) #print("action {}".format(action)) obs, reward, done, info = env.step(action) env.render() #print(obs) #print(env.sim.data.qpos[env._ref_joint_vel_indexes]) #time.sleep(0.1) with open('episode-reward.csv', mode='a') as fid: writer = csv.writer(fid, delimiter=',') writer.writerow(reward) #if done: # quit() else: # Train model.learn( total_timesteps = args.total_timesteps, save_dir = model_save_path, render=show_render, reset_num_timesteps=reset_num_timesteps, ) model.save(final_model_path) print("Done training") obs = env.reset()
break We demonstrate equivalent functionality below. """ from robosuite.wrappers import GymWrapper if __name__ == "__main__": # Notice how the environment is wrapped by the wrapper env = GymWrapper( suite.make( "SawyerLift", use_camera_obs=False, # do not use pixel observations has_offscreen_renderer=False, # not needed since not using pixel obs has_renderer=True, # make sure we can render to the screen reward_shaping=True, # use dense rewards control_freq=100, # control should happen fast enough so that simulation looks smooth ) ) for i_episode in range(20): observation = env.reset() for t in range(500): env.render() action = env.action_space.sample() observation, reward, done, info = env.step(action) if done: print("Episode finished after {} timesteps".format(t + 1)) break
class NutAssemblyDenseHand(gym.Env): """ TODO: change obs and reward setting NutAssemblyHand: 'FetchPickAndPlace-v1' with a perfect controller Action taken as: pi_theta(s) = pi(s) + f_theta(s) Parameters: ----------- kp: float Scaling factor for position control """ def __init__(self, kp: float = 20, *args, **kwargs): options = {} controller_name = 'OSC_POSE' options["controller_configs"] = suite.load_controller_config( default_controller=controller_name) self.env = GymWrapper( suite.make( "NutAssemblyRound", # Nut Assembly task with the round peg robots="IIWA", # use IIWA robot **options, # controller options use_object_obs=True, use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=False, # make sure we can render to the screen reward_shaping=False, # use dense rewards control_freq= 20, # control should happen fast enough so that simulation looks smooth )) self.max_episode_steps = 500 self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.reward_type = 'sparse' self.distance_threshold = 0.065 def step(self, residual_action: np.ndarray): controller_action = np.array( self.controller_action(self.last_observation)) if (controller_action > 1).any() or (controller_action < -1).any(): print(controller_action) action = np.add(controller_action, residual_action) action = np.clip(action, -1, 1) ob, reward, done, info = self.env.step(action) ob = self.env.env._get_observation() observation = {} observation['observation'] = np.hstack( (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'], ob['RoundNut0_quat'])) observation['desired_goal'] = np.array( self.env.sim.data.body_xpos[self.env.peg2_body_id]) observation['achieved_goal'] = ob['RoundNut0_pos'] self.last_observation = observation.copy() info['is_success'] = reward return observation, reward, done, info def reset(self): self.env.reset() # reset according to task defaults ob = self.env.env._get_observation() observation = {} observation['observation'] = np.hstack( (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'], ob['RoundNut0_quat'])) observation['desired_goal'] = np.array( self.env.sim.data.body_xpos[self.env.peg2_body_id]) observation['achieved_goal'] = ob['RoundNut0_pos'] self.last_observation = observation.copy() self.object_in_hand = False self.object_below_hand = False self.gripper_reoriented = 0 return observation def seed(self, seed: int = 0): self.np_random, seed = seeding.np_random(seed) return self.env.seed(seed=seed) def render(self, mode: str = "human", *args, **kwargs): return self.env.render() def close(self): return self.env.close() def goal_distance(self, achieved_goal, desired_goal): return np.linalg.norm(achieved_goal - desired_goal, axis=1) def compute_reward(self, achieved_goal, goal, info): # Compute distance between goal and the achieved goal. d = self.goal_distance(achieved_goal, goal) if self.reward_type == 'sparse': return -(d > self.distance_threshold).astype(np.float32) else: return -d def controller_action(self, obs: dict, take_action: bool = True, DEBUG: bool = False): observation = obs['observation'] goal_pos = obs['desired_goal'] achieved_goal = obs['achieved_goal'] gripper_pos = observation[:3] gripper_quat = observation[3:7] object_pos = observation[7:10] object_quat = observation[10:] z_table = 0.8610982 object_axang = T.quat2axisangle(object_quat) if abs(object_axang[-1] - 1.24) < 0.2: object_axang_touse = [ 0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8) ] else: object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)] gripper_axang = T.quat2axisangle(gripper_quat) # print('object axang to use ' + str(object_axang_touse)) if self.gripper_reoriented == 0: self.gripper_init_quat = gripper_quat self.gripper_reoriented = 1 init_inv = T.quat_inverse(self.gripper_init_quat) changing_wf = T.quat_multiply(init_inv, gripper_quat) changing_wf_axang = T.quat2axisangle(changing_wf) # gripper_quat_inv = T.quat_inverse(gripper_quat) # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat) # changing_wf_axang = T.quat2axisangle(changing_wf) # print('changing wf axis ' +str(changing_wf_axang)) if not self.object_below_hand or self.gripper_reoriented < 5: self.nut_p = T.quat2axisangle(object_quat)[-1] # print(self.nut_p) if not self.object_below_hand: action = 20 * (object_pos[:2] - gripper_pos[:2]) else: action = [0, 0] action = 20 * (object_pos[:2] - gripper_pos[:2]) # frac = 0.2 # Rate @ which to rotate gripper about z. # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z. # if self.gripper_reoriented == 0: # self.gripper_init_quat = gripper_quat # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps # action_angle= [0,0,ang_goal] # #print('object ' + str(object_axang)) # #print('gripper ' + str(gripper_axang)) # init_inv = T.quat_inverse(self.gripper_init_quat) # init_inv_mat = T.quat2mat(init_inv) # rel = T.quat_multiply(gripper_quat,init_inv) # rel_axang = T.quat2axisangle(rel) # #print('rel_axang ' +str(rel_axang)) # rel_axang_WF = np.matmul(init_inv_mat,rel_axang) # #print('rel_axang_WF ' +str(rel_axang_WF)) # if take_action: # self.gripper_reoriented+=1 # else: # After 5 action steps, don't rotate gripper any more # action_angle=[0,0,0] action_angle = 0.2 * (object_axang_touse - changing_wf_axang) action_angle = [0, 0, action_angle[-1]] #action_angle = [0,0,0] #print('action angle ' +str(action_angle)) if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1: if take_action: self.gripper_reoriented = 5 action = np.hstack((action, [0], action_angle, [-1])) if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01: if take_action: self.object_below_hand = True #self.gripper_reoriented = 5 elif not self.object_in_hand: # Close gripper action = [0, 0, -1, 0, 0, 0, -1] if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01: action = [0, 0, 0, 0, 0, 0, 1] if take_action: self.object_in_hand = True else: # Move gripper up and toward goal position action = [0, 0, 1, 0, 0, 0, 1] if object_pos[2] - z_table > 0.1: action = 20 * (goal_pos[:2] - object_pos[:2]) action = np.hstack((action, [0, 0, 0, 0, 1])) if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225: action = [0, 0, 0, 0, 0, 0, -1] # Drop nut once it's close enough to the peg action = np.clip(action, -1, 1) return action
class NutAssemblyDense(gym.Env): """ NutAssembly: NutAssembly task from robosuite with no controller. Can be used for learning from scratch. """ def __init__(self, horizon=500, *args, **kwargs): options = {} controller_name = 'OSC_POSE' options["controller_configs"] = suite.load_controller_config( default_controller=controller_name) self.env = GymWrapper( suite.make( "NutAssemblyRound", # Nut Assembly task with the round peg robots="IIWA", # use IIWA robot **options, # controller options use_object_obs=True, use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=False, # make sure we can render to the screen reward_shaping=True, # use dense rewards reward_scale=1.0, control_freq= 20, # control should happen fast enough so that simulation looks smooth horizon=horizon, # number of timesteps for ending episode )) self.max_episode_steps = horizon self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.reward_type = 'sparse' self.distance_threshold = 0.065 def step(self, action): ob, reward, done, info = self.env.step(action) ob = self.env.env._get_observation() peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id]) observation = np.hstack( (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'], ob['RoundNut0_quat'], peg_pos)) # info['is_success'] = self.get_success(ob['RoundNut0_pos'], peg_pos) info['is_success'] = self.env.env._check_success() return observation, reward, done, info def reset(self): ob = self.env.reset() ob = self.env.env._get_observation() peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id]) observation = np.hstack( (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'], ob['RoundNut0_quat'], peg_pos)) return observation def seed(self, seed=0): self.np_random, seed = seeding.np_random(seed) return self.env.seed(seed=seed) def render(self, mode="human", *args, **kwargs): return self.env.render() def close(self): return self.env.close() def goal_distance(self, achieved_goal, desired_goal): return np.linalg.norm(achieved_goal - desired_goal) def get_success(self, object_pos, goal): # Compute distance between goal and the achieved goal. d = self.goal_distance(object_pos, goal) return (d < self.distance_threshold).astype(np.float32)
# create original environment env = robosuite.make( args.environment, ignore_done=True, use_camera_obs=False, has_renderer=True, control_freq=100, ) data_directory = args.directory # wrap the environment with data collection wrapper #env = DataCollectionWrapper(env, data_directory) #env = IKWrapper(env) env = GymWrapper(env) # testing to make sure multiple env.reset calls don't create multiple directories env.reset() env.reset() env.reset() # collect some data #print("Collecting some random data...") #collect_random_trajectory(env, timesteps=args.timesteps) # playback some data _ = input("Press any key to begin the playback...") print("Playing back the data...") #data_directory = env.ep_directory playback_trajectory(env, data_directory)