Exemplo n.º 1
0
def fetch_push_gym_demo():
    env = GymWrapper(
        suite.make(
            'FetchPush',
            robots='Panda',
            controller_configs=None,
            gripper_types='UltrasoundProbeGripper',
            has_renderer=True,
            has_offscreen_renderer=False,
            use_camera_obs=False,
            use_object_obs=True,
            control_freq=50,
            render_camera=None,
        ))
    for i_episode in range(20):
        observation = env.reset()
        for t in range(500):
            env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
Exemplo n.º 2
0
def main():

  parser = custom_arg_parser()
  args = parser.parse_args()
  load_defaults(args)
  print("Arguments:{}".format(args))
  # Create the model name with all the parameters
  
  model_dir_name = serialize_args(args)
  print("Model name: {}".format(model_dir_name))
  if args.model is not None:
    model_save_path = os.path.dirname(args.model) + "/"
    tb_save_path = model_save_path.replace("learned_models","tb_logs")
  else:
    model_save_path = "../../learned_models/" + model_dir_name + "/"
    tb_save_path = "../../tb_logs/" +  model_dir_name + "/"
  print("Model save path:{}".format(model_save_path))
  print("TB logs save path:{}".format(tb_save_path))
  final_model_path = model_save_path + "final_" + model_dir_name
  model_load_path = args.model
  show_render = args.visualize

  # Save args to json for training from checkpoints
  if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)
    with open(model_save_path + "args.json", 'w+') as f:
      json.dump(vars(args),f,indent=2,sort_keys=True)

  env = GymWrapper(
      suite.make(
      "JR2Door",
      has_renderer        = show_render,
      use_camera_obs      = False,
      ignore_done         = False,
      control_freq        = args.control_freq,
      horizon             = args.horizon,
      door_type           = args.door_type,
      bot_motion          = args.bot_motion,
      robot_pos           = args.robot_pos,
      robot_theta         = args.robot_theta,
      dist_to_handle_coef = args.rcoef_dist_to_handle,
      door_angle_coef     = args.rcoef_door_angle,
      handle_con_coef     = args.rcoef_handle_con,
      body_door_con_coef  = args.rcoef_body_door_con,
      self_con_coef       = args.rcoef_self_con,
      arm_handle_con_coef = args.rcoef_arm_handle_con,
      arm_door_con_coef   = args.rcoef_arm_door_con,
      force_coef          = args.rcoef_force,
      gripper_touch_coef  = args.rcoef_gripper_touch,
      dist_to_door_coef   = args.rcoef_dist_to_door,
      wall_con_coef       = args.rcoef_wall_con,
      reset_on_large_force= args.reset_on_large_force,
      debug_print         = args.print_info,
      eef_type            = args.eef_type,
      door_init_qpos      = args.door_init_qpos,
      goal_offset         = args.goal_offset,
    )
  )
  
  if args.slurm:
    env = SubprocVecEnv([lambda: env for i in range(args.n_cpu)])
  else:
    env = DummyVecEnv([lambda: env])

  # Load the specified model, if there is one
  if args.model is not None:
    # Training from checkpoint, so need to reset timesteps for tb
    reset_num_timesteps = False
    if args.rl_alg == "ppo2":
      model = PPO2.load(model_load_path,env=env)
      print("Succesfully loaded PPO2 model")
    if args.rl_alg == "ppo1":
      model = PPO1.load(model_load_path,env=env)
      print("Succesfully loaded PPO1 model")
  else: 
    # New model, so need to reset timesteps for tb
    reset_num_timesteps = True
    if args.rl_alg == "ppo2":
      model = PPO2(
                  args.policy,
                  env,
                  verbose=args.verbose,
                  n_steps=args.n_steps,
                  nminibatches=args.minibatches,
                  noptepochs=args.opt_epochs,
                  cliprange=args.clip_range,
                  ent_coef=args.ent_coef,
                  tensorboard_log=tb_save_path,
                  #full_tensorboard_log=True
                  )

    elif args.rl_alg == "ppo1":
      model = PPO1(
                  args.policy,
                  env,
                  verbose=args.verbose,
                  timesteps_per_actorbatch=args.n_steps,
                  optim_epochs=args.opt_epochs,
                  tensorboard_log=tb_save_path,
                  )
  if args.replay:
    # Replay a policy
    obs = env.reset()
    count = 0
    with open('episode-reward.csv', mode='w') as fid:
      writer = csv.writer(fid, delimiter=',')
      writer.writerow("reward")
    while(count < 1000):
      env.render()
      count += 1
      print(count)
    while True:
      if args.model is None:
        print("Error: No model has been specified")
      action, _states = model.predict(obs,deterministic=True)
      #print("action {}".format(action))
      obs, reward, done, info = env.step(action)
      env.render()
      #print(obs)
      #print(env.sim.data.qpos[env._ref_joint_vel_indexes])
      #time.sleep(0.1)

      with open('episode-reward.csv', mode='a') as fid:
        writer = csv.writer(fid, delimiter=',')
        writer.writerow(reward)

      #if done:
      #  quit()
  else:
    # Train
    model.learn(
                total_timesteps = args.total_timesteps,
                save_dir = model_save_path,
                render=show_render,
                reset_num_timesteps=reset_num_timesteps,
                )

    model.save(final_model_path)
  
    print("Done training")
    obs = env.reset()
                break

We demonstrate equivalent functionality below.
"""

from robosuite.wrappers import GymWrapper

if __name__ == "__main__":

    # Notice how the environment is wrapped by the wrapper
    env = GymWrapper(
        suite.make(
            "SawyerLift",
            use_camera_obs=False,  # do not use pixel observations
            has_offscreen_renderer=False,  # not needed since not using pixel obs
            has_renderer=True,  # make sure we can render to the screen
            reward_shaping=True,  # use dense rewards
            control_freq=100,  # control should happen fast enough so that simulation looks smooth
        )
    )

    for i_episode in range(20):
        observation = env.reset()
        for t in range(500):
            env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} timesteps".format(t + 1))
                break
class NutAssemblyDenseHand(gym.Env):
    """
        TODO: change obs and reward setting
        NutAssemblyHand:
            'FetchPickAndPlace-v1' with a perfect controller
            Action taken as:
                pi_theta(s) = pi(s) + f_theta(s)
        Parameters:
        -----------
        kp: float
            Scaling factor for position control
    """
    def __init__(self, kp: float = 20, *args, **kwargs):
        options = {}
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.env = GymWrapper(
            suite.make(
                "NutAssemblyRound",  # Nut Assembly task with the round peg
                robots="IIWA",  # use IIWA robot
                **options,  # controller options
                use_object_obs=True,
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=False,  # make sure we can render to the screen
                reward_shaping=False,  # use dense rewards
                control_freq=
                20,  # control should happen fast enough so that simulation looks smooth
            ))

        self.max_episode_steps = 500
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_type = 'sparse'
        self.distance_threshold = 0.065

    def step(self, residual_action: np.ndarray):
        controller_action = np.array(
            self.controller_action(self.last_observation))
        if (controller_action > 1).any() or (controller_action < -1).any():
            print(controller_action)
        action = np.add(controller_action, residual_action)
        action = np.clip(action, -1, 1)
        ob, reward, done, info = self.env.step(action)
        ob = self.env.env._get_observation()
        observation = {}
        observation['observation'] = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat']))
        observation['desired_goal'] = np.array(
            self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation['achieved_goal'] = ob['RoundNut0_pos']
        self.last_observation = observation.copy()
        info['is_success'] = reward
        return observation, reward, done, info

    def reset(self):
        self.env.reset()  # reset according to task defaults
        ob = self.env.env._get_observation()
        observation = {}
        observation['observation'] = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat']))
        observation['desired_goal'] = np.array(
            self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation['achieved_goal'] = ob['RoundNut0_pos']
        self.last_observation = observation.copy()
        self.object_in_hand = False
        self.object_below_hand = False
        self.gripper_reoriented = 0
        return observation

    def seed(self, seed: int = 0):
        self.np_random, seed = seeding.np_random(seed)
        return self.env.seed(seed=seed)

    def render(self, mode: str = "human", *args, **kwargs):
        return self.env.render()

    def close(self):
        return self.env.close()

    def goal_distance(self, achieved_goal, desired_goal):
        return np.linalg.norm(achieved_goal - desired_goal, axis=1)

    def compute_reward(self, achieved_goal, goal, info):
        # Compute distance between goal and the achieved goal.
        d = self.goal_distance(achieved_goal, goal)
        if self.reward_type == 'sparse':
            return -(d > self.distance_threshold).astype(np.float32)
        else:
            return -d

    def controller_action(self,
                          obs: dict,
                          take_action: bool = True,
                          DEBUG: bool = False):
        observation = obs['observation']
        goal_pos = obs['desired_goal']
        achieved_goal = obs['achieved_goal']

        gripper_pos = observation[:3]
        gripper_quat = observation[3:7]
        object_pos = observation[7:10]
        object_quat = observation[10:]

        z_table = 0.8610982

        object_axang = T.quat2axisangle(object_quat)
        if abs(object_axang[-1] - 1.24) < 0.2:
            object_axang_touse = [
                0, 0, object_axang[-1] % (2 * pi / 8) + (2 * pi / 8)
            ]
        else:
            object_axang_touse = [0, 0, object_axang[-1] % (2 * pi / 8)]
        gripper_axang = T.quat2axisangle(gripper_quat)
        # print('object axang to use ' + str(object_axang_touse))

        if self.gripper_reoriented == 0:
            self.gripper_init_quat = gripper_quat
            self.gripper_reoriented = 1

        init_inv = T.quat_inverse(self.gripper_init_quat)
        changing_wf = T.quat_multiply(init_inv, gripper_quat)
        changing_wf_axang = T.quat2axisangle(changing_wf)

        # gripper_quat_inv = T.quat_inverse(gripper_quat)
        # changing_wf = T.quat_multiply(gripper_quat_inv,self.gripper_init_quat)
        # changing_wf_axang = T.quat2axisangle(changing_wf)

        # print('changing wf axis ' +str(changing_wf_axang))

        if not self.object_below_hand or self.gripper_reoriented < 5:
            self.nut_p = T.quat2axisangle(object_quat)[-1]
            # print(self.nut_p)
            if not self.object_below_hand:
                action = 20 * (object_pos[:2] - gripper_pos[:2])
            else:
                action = [0, 0]

            action = 20 * (object_pos[:2] - gripper_pos[:2])

            # frac = 0.2 # Rate @ which to rotate gripper about z.
            # ang_goal = frac*self.nut_p # Nut p is the nut's random intial pertubation about z.

            # if self.gripper_reoriented == 0:
            #     self.gripper_init_quat = gripper_quat
            # if self.gripper_reoriented < 5: # Gripper should be aligned with nut after 5 action steps
            #     action_angle= [0,0,ang_goal]

            #     #print('object ' + str(object_axang))
            #     #print('gripper ' + str(gripper_axang))

            #     init_inv = T.quat_inverse(self.gripper_init_quat)
            #     init_inv_mat = T.quat2mat(init_inv)

            #     rel = T.quat_multiply(gripper_quat,init_inv)
            #     rel_axang = T.quat2axisangle(rel)
            #     #print('rel_axang ' +str(rel_axang))

            #     rel_axang_WF = np.matmul(init_inv_mat,rel_axang)

            #     #print('rel_axang_WF ' +str(rel_axang_WF))

            #     if take_action:
            #         self.gripper_reoriented+=1
            # else: # After 5 action steps, don't rotate gripper any more
            #     action_angle=[0,0,0]

            action_angle = 0.2 * (object_axang_touse - changing_wf_axang)
            action_angle = [0, 0, action_angle[-1]]
            #action_angle = [0,0,0]
            #print('action angle ' +str(action_angle))

            if np.linalg.norm(object_axang_touse - changing_wf_axang) < 0.1:
                if take_action:
                    self.gripper_reoriented = 5

            action = np.hstack((action, [0], action_angle, [-1]))
            if np.linalg.norm((object_pos[:2] - gripper_pos[:2])) < 0.01:
                if take_action:
                    self.object_below_hand = True
                    #self.gripper_reoriented = 5

        elif not self.object_in_hand:  # Close gripper
            action = [0, 0, -1, 0, 0, 0, -1]
            if np.linalg.norm((object_pos[2] - gripper_pos[2])) < 0.01:
                action = [0, 0, 0, 0, 0, 0, 1]
                if take_action:
                    self.object_in_hand = True

        else:  # Move gripper up and toward goal position
            action = [0, 0, 1, 0, 0, 0, 1]
            if object_pos[2] - z_table > 0.1:
                action = 20 * (goal_pos[:2] - object_pos[:2])
                action = np.hstack((action, [0, 0, 0, 0, 1]))
                if np.linalg.norm((goal_pos[:2] - object_pos[:2])) < 0.0225:
                    action = [0, 0, 0, 0, 0, 0,
                              -1]  # Drop nut once it's close enough to the peg

        action = np.clip(action, -1, 1)
        return action
class NutAssemblyDense(gym.Env):
    """
        NutAssembly:
        NutAssembly task from robosuite with no controller. Can be used for learning from scratch.
    """
    def __init__(self, horizon=500, *args, **kwargs):
        options = {}
        controller_name = 'OSC_POSE'
        options["controller_configs"] = suite.load_controller_config(
            default_controller=controller_name)

        self.env = GymWrapper(
            suite.make(
                "NutAssemblyRound",  # Nut Assembly task with the round peg
                robots="IIWA",  # use IIWA robot
                **options,  # controller options
                use_object_obs=True,
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=False,  # make sure we can render to the screen
                reward_shaping=True,  # use dense rewards
                reward_scale=1.0,
                control_freq=
                20,  # control should happen fast enough so that simulation looks smooth
                horizon=horizon,  # number of timesteps for ending episode
            ))
        self.max_episode_steps = horizon
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.reward_type = 'sparse'
        self.distance_threshold = 0.065

    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        ob = self.env.env._get_observation()
        peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat'], peg_pos))
        # info['is_success'] = self.get_success(ob['RoundNut0_pos'], peg_pos)
        info['is_success'] = self.env.env._check_success()
        return observation, reward, done, info

    def reset(self):
        ob = self.env.reset()
        ob = self.env.env._get_observation()
        peg_pos = np.array(self.env.sim.data.body_xpos[self.env.peg2_body_id])
        observation = np.hstack(
            (ob['robot0_eef_pos'], ob['robot0_eef_quat'], ob['RoundNut0_pos'],
             ob['RoundNut0_quat'], peg_pos))
        return observation

    def seed(self, seed=0):
        self.np_random, seed = seeding.np_random(seed)
        return self.env.seed(seed=seed)

    def render(self, mode="human", *args, **kwargs):
        return self.env.render()

    def close(self):
        return self.env.close()

    def goal_distance(self, achieved_goal, desired_goal):
        return np.linalg.norm(achieved_goal - desired_goal)

    def get_success(self, object_pos, goal):
        # Compute distance between goal and the achieved goal.
        d = self.goal_distance(object_pos, goal)
        return (d < self.distance_threshold).astype(np.float32)
Exemplo n.º 6
0
    # create original environment
    env = robosuite.make(
        args.environment,
        ignore_done=True,
        use_camera_obs=False,
        has_renderer=True,
        control_freq=100,
    )
    data_directory = args.directory

    # wrap the environment with data collection wrapper
    #env = DataCollectionWrapper(env, data_directory)
    #env = IKWrapper(env)
    env = GymWrapper(env)

    # testing to make sure multiple env.reset calls don't create multiple directories
    env.reset()
    env.reset()
    env.reset()

    # collect some data
    #print("Collecting some random data...")
    #collect_random_trajectory(env, timesteps=args.timesteps)

    # playback some data
    _ = input("Press any key to begin the playback...")
    print("Playing back the data...")
    #data_directory = env.ep_directory
    playback_trajectory(env, data_directory)