Пример #1
0
def evaluate_policy(env_config, model_path, n_eval, printout=False):
    if printout:
        print("Loading policy...")

    # Load trained model and corresponding policy
    data = torch.load(model_path)
    policy = data['evaluation/policy']

    if printout:
        print("Policy loaded")

    # Load controller
    controller = env_config.pop("controller")
    if controller in set(ALL_CONTROLLERS):
        # This is a default controller
        controller_config = load_controller_config(
            default_controller=controller)
    else:
        # This is a string to the custom controller
        controller_config = load_controller_config(custom_fpath=controller)

    # Create robosuite env
    env = suite.make(**env_config,
                     has_renderer=False,
                     has_offscreen_renderer=False,
                     use_object_obs=True,
                     use_camera_obs=False,
                     reward_shaping=True,
                     controller_configs=controller_config)
    env = GymWrapper(env)

    # Use CUDA if available
    if torch.cuda.is_available():
        set_gpu_mode(True)
        policy.cuda() if not isinstance(
            policy, MakeDeterministic) else policy.stochastic_policy.cuda()

    if printout:
        print("Evaluating policy over {} simulations...".format(n_eval))

    # Create variable to hold rewards to be averaged
    returns = []

    # Loop through simulation n_eval times and take average returns each time
    for i in range(n_eval):
        path = rollout(
            env,
            policy,
            max_path_length=env_config["horizon"],
            render=False,
        )

        # Determine total summed rewards from episode and append to 'returns'
        returns.append(sum(path["rewards"]))

    # Average the episode rewards and return the normalized corresponding value
    return sum(returns) / (env_config["reward_scale"] * n_eval)
Пример #2
0
    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # Flag for loading urdf once (only applicable for IK controllers)
        urdf_loaded = False

        # Load controller configs for both left and right arm
        for arm in self.arms:
            # First, load the default controller if none is specified
            if not self.controller_config[arm]:
                # Need to update default for a single agent
                controller_path = os.path.join(
                    os.path.dirname(__file__), '..',
                    'controllers/config/{}.json'.format(
                        self.robot_model.default_controller_config[arm]))
                self.controller_config[arm] = load_controller_config(
                    custom_fpath=controller_path)

            # Assert that the controller config is a dict file:
            #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
            #                                           OSC_POSITION, OSC_POSE, IK_POSE}
            assert type(self.controller_config[arm]) == dict, \
                "Inputted controller config must be a dict! Instead, got type: {}".format(
                    type(self.controller_config[arm]))

            # Add to the controller dict additional relevant params:
            #   the robot name, mujoco sim, eef_name, actuator_range, joint_indexes, timestep (model) freq,
            #   policy (control) freq, and ndim (# joints)
            self.controller_config[arm]["robot_name"] = self.name
            self.controller_config[arm]["sim"] = self.sim
            self.controller_config[arm]["eef_name"] = self.gripper[
                arm].important_sites["grip_site"]
            self.controller_config[arm][
                "eef_rot_offset"] = self.eef_rot_offset[arm]
            self.controller_config[arm]["ndim"] = self._joint_split_idx
            self.controller_config[arm]["policy_freq"] = self.control_freq
            (start, end) = (None,
                            self._joint_split_idx) if arm == "right" else (
                                self._joint_split_idx, None)
            self.controller_config[arm]["joint_indexes"] = {
                "joints": self.joint_indexes[start:end],
                "qpos": self._ref_joint_pos_indexes[start:end],
                "qvel": self._ref_joint_vel_indexes[start:end]
            }
            self.controller_config[arm]["actuator_range"] = (
                self.torque_limits[0][start:end],
                self.torque_limits[1][start:end])

            # Only load urdf the first time this controller gets called
            self.controller_config[arm][
                "load_urdf"] = True if not urdf_loaded else False
            urdf_loaded = True

            # Instantiate the relevant controller
            self.controller[arm] = controller_factory(
                self.controller_config[arm]["type"],
                self.controller_config[arm])
Пример #3
0
def test_playback():
    # set seeds
    random.seed(0)
    np.random.seed(0)

    env = robosuite.make(
        "Lift",
        robots=["Panda"],
        controller_configs=load_controller_config(
            default_controller="OSC_POSE"),
        has_renderer=False,
        has_offscreen_renderer=False,
        ignore_done=True,
        use_camera_obs=False,
        reward_shaping=True,
        control_freq=20,
    )
    env.reset()

    # task instance
    task_xml = env.sim.model.get_xml()
    task_init_state = np.array(env.sim.get_state().flatten())

    # trick for ensuring that we can play MuJoCo demonstrations back
    # deterministically by using the recorded actions open loop
    env.reset_from_xml_string(task_xml)
    env.sim.reset()
    env.sim.set_state_from_flattened(task_init_state)
    env.sim.forward()

    # random actions to play
    n_actions = 100
    actions = 0.1 * np.random.uniform(
        low=-1.0, high=1.0, size=(n_actions, env.action_spec[0].shape[0]))

    # play actions
    print("playing random actions...")
    states = [task_init_state]
    for i in range(n_actions):
        env.step(actions[i])
        states.append(np.array(env.sim.get_state().flatten()))

    # try playback
    print("attempting playback...")
    env.reset()
    env.reset_from_xml_string(task_xml)
    env.sim.reset()
    env.sim.set_state_from_flattened(task_init_state)
    env.sim.forward()

    for i in range(n_actions):
        env.step(actions[i])
        state_playback = env.sim.get_state().flatten()
        assert np.all(np.equal(states[i + 1], state_playback))

    env.close()
    print("test passed!")
Пример #4
0
    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # First, load the default controller if none is specified
        if not self.controller_config:
            # Need to update default for a single agent
            controller_path = os.path.join(
                os.path.dirname(__file__),
                "..",
                "controllers/config/{}.json".format(
                    self.robot_model.default_controller_config),
            )
            self.controller_config = load_controller_config(
                custom_fpath=controller_path)

        # Assert that the controller config is a dict file:
        #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
        #                                           OSC_POSITION, OSC_POSE, IK_POSE}
        assert (
            type(self.controller_config) == dict
        ), "Inputted controller config must be a dict! Instead, got type: {}".format(
            type(self.controller_config))

        # Add to the controller dict additional relevant params:
        #   the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq,
        #   policy (control) freq, and ndim (# joints)
        self.controller_config["robot_name"] = self.name
        self.controller_config["sim"] = self.sim
        self.controller_config["eef_name"] = self.gripper.important_sites[
            "grip_site"]
        self.controller_config["eef_rot_offset"] = self.eef_rot_offset
        self.controller_config["joint_indexes"] = {
            "joints": self.joint_indexes,
            "qpos": self._ref_joint_pos_indexes,
            "qvel": self._ref_joint_vel_indexes,
        }
        self.controller_config["actuator_range"] = self.torque_limits
        self.controller_config["policy_freq"] = self.control_freq
        self.controller_config["ndim"] = len(self.robot_joints)

        # Instantiate the relevant controller
        self.controller = controller_factory(self.controller_config["type"],
                                             self.controller_config)
Пример #5
0
    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # First, load the default controller if none is specified
        if not self.controller_config:
            # Need to update default for a single agent
            controller_path = os.path.join(
                os.path.dirname(__file__), '..',
                'controllers/config/{}.json'.format(
                    self.robot_model.default_controller_config))
            self.controller_config = load_controller_config(
                custom_fpath=controller_path)

        # Assert that the controller config is a dict file:
        #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
        #                                           OSC_POSITION, OSC_POSE, IK_POSE}
        assert type(self.controller_config) == dict, \
            "Inputted controller config must be a dict! Instead, got type: {}".format(type(self.controller_config))
        assert self.controller_config["type"] == "LOCOMOTION_JOINT_TORQUE", \
            "Only JOINT_TORQUE controllers are currently supported for quadruped robots, please change the " \
            "controller to be a joint torque controller. Got controller type: {}".format(self.controller_config["type"])

        # Add to the controller dict additional relevant params:
        #   the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq,
        #   policy (control) freq, and ndim (# joints)
        self.controller_config["robot_name"] = self.name
        self.controller_config["sim"] = self.sim
        self.controller_config["chassis_name"] = self.robot_model.robot_base
        self.controller_config["joint_indexes"] = {
            "joints": self.joint_indexes,
            "qpos": self._ref_joint_pos_indexes,
            "qvel": self._ref_joint_vel_indexes
        }
        self.controller_config["actuator_range"] = self.torque_limits
        self.controller_config["policy_freq"] = self.control_freq
        self.controller_config["ndim"] = len(self.robot_joints)

        # Instantiate the relevant controller
        self.controller = controller_factory(self.controller_config["type"],
                                             self.controller_config)
Пример #6
0
def liftEnvPlay():
    env = GymWrapper(
        MergeObsWrapper(
            rs.make(
                "LiftNBVCube",
                robots=["Sawyer"],
                controller_configs=load_controller_config(
                    default_controller="OSC_POSITION"),
                use_camera_obs=False,  # do not use pixel observations
                has_offscreen_renderer=
                False,  # not needed since not using pixel obs
                has_renderer=True,  # make sure we can render to the screen
                reward_shaping=True,  # use dense rewards
                control_freq=
                10,  # control should happen fast enough so that simulation looks smooth
                camera_heights=84,
                camera_widths=84,
                camera_depths=True,
            ),
            img_keys=["agentview_image", "agentview_depth"],
            vect_keys=["robot0_robot-state"]),
        keys=["merged_obs"])
    return env
            options["robots"] = []

            # Have user choose two robots
            print("A multiple single-arm configuration was chosen.\n")

            for i in range(2):
                print("Please choose Robot {}...\n".format(i))
                options["robots"].append(choose_robots(exclude_bimanual=True))

    # Else, we simply choose a single (single-armed) robot to instantiate in the environment
    else:
        options["robots"] = choose_robots(exclude_bimanual=True)


    # Load the desired controller
    options["controller_configs"] = load_controller_config(default_controller="OSC_POSE")
    

    # change renderer config
    config = load_renderer_config('igibson')

    if args.vision_modality == 'rgb':
        config['vision_modalities'] = ['rgb']
    if args.vision_modality == 'segmentation':
        config['vision_modalities'] = ['seg']
        config['msaa'] = False 
    if args.vision_modality == 'depth':
        config['vision_modalities'] = ['3d'] 
    if args.vision_modality == 'normal':
        config['vision_modalities'] = ['normal']                        
    
Пример #8
0
import robosuite

from robosuite.controllers import load_controller_config

# load default controller parameters for Operational Space Control (OSC)
controller_config = load_controller_config(default_controller="OSC_POSE")

# create an environment to visualize on-screen
env = robosuite.make(
    "Lift",
    robots=["Sawyer"],  # load a Sawyer robot and a Panda robot
    gripper_types="PandaGripper",  # use default grippers per robot arm
    controller_configs=controller_config,  # each arm is controlled using OSC
    has_renderer=True,  # on-screen rendering
    render_camera="frontview",  # visualize the "frontview" camera
    has_offscreen_renderer=False,  # no off-screen rendering
    control_freq=20,  # 20 hz control for applied actions
    horizon=200,  # each episode terminates after 200 steps
    use_object_obs=False,  # no observations needed
    use_camera_obs=False,  # no observations needed
    path=[
        '/home/lthpc/Desktop/meta-role/robosuite/robosuite/my_xmls/sawyer/[1.1, 1.1, 1.1, 1.1, 1.1, 0.9, 0.9, 0.9, 0.9, 0.9].xml'
    ])
Пример #9
0
            # Have user choose two robots
            print("A multiple single-arm configuration was chosen.\n")

            for i in range(2):
                print("Please choose Robot {}...\n".format(i))
                options["robots"].append(choose_robots(exclude_bimanual=True))

    # Else, we simply choose a single (single-armed) robot to instantiate in the environment
    else:
        options["robots"] = choose_robots(exclude_bimanual=True)

    # Choose controller
    controller_name = choose_controller()

    # Load the desired controller
    options["controller_configs"] = load_controller_config(default_controller=controller_name)

    # Help message to user
    print()
    print("Press \"H\" to show the viewer control panel.")

    # initialize the task
    env = suite.make(
        **options,
        has_renderer=True,
        has_offscreen_renderer=False,
        ignore_done=True,
        use_camera_obs=False,
        control_freq=20,
        hard_reset=False,   # TODO: Not setting this flag to False brings up a segfault on macos or glfw error on linux
    )
Пример #10
0
def run_sim_test(type_test):
    real_robot_joints = []
    sim_robot_joints = []
    target_robot_joints = []
    real_robot_eef_rframe = []
    real_robot_eef_sframe = []
    sim_robot_eef_rframe = []
    sim_robot_eef_sframe = []

    # Create dict to hold options that will be passed to env creation call
    options = {}

    # Choose environment and add it to options
    options["env_name"] = "Lift"
    options["robots"] = ["Jaco"]

    # Choose camera
    camera = "frontview"

    write_path = os.path.join('datasets', type_test)

    # load data
    # latin1 allows us to load python2
    real_robot = np.load(os.path.join('datasets', type_test + '.npz'),
                         allow_pickle=True,
                         encoding='latin1')
    real_eef_pos = real_robot['eef_pos']
    #real_joint_pos = np.mod(real_robot['joint_pos'], 4*np.pi)
    real_joint_pos = real_robot['joint_pos']

    real_actions = real_robot['actions']
    init_real = real_joint_pos[0][:7]
    # Choose controller

    controller_file = "jaco_%s_%shz.json" % (args.controller_type,
                                             args.control_freq)
    controller_fpath = os.path.join(
        os.path.split(suite.__file__)[0], 'controllers', 'config',
        controller_file)
    print('loading controller from', controller_fpath)
    # Load the desired controller
    options["controller_configs"] = load_controller_config(
        custom_fpath=controller_fpath)
    options['initial_qposes'] = [init_real]

    control_freq = args.control_freq
    n_joints = 7
    n_steps = len(real_actions)

    # initialize the task
    env = suite.make(
        **options,
        has_renderer=False,
        has_offscreen_renderer=True,
        ignore_done=True,
        use_camera_obs=True,
        control_freq=control_freq,
        camera_names=camera,
        camera_heights=512,
        camera_widths=512,
    )

    site = 'gripper0_grip_site'
    env = JacoSim2RealWrapper(env)
    env.reset()

    video_writer = imageio.get_writer(write_path + '.mp4', fps=2)
    eef_site_id = env.robots[0].eef_site_id

    # Get action limits
    low, high = env.action_spec
    #env.robots[0].set_robot_joint_positions(init_real[:7])

    sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes]
    for t in range(n_steps - 1):
        action = np.deg2rad(real_actions[t - 1])
        #action = real_joint_pos[t,:7]-sim_joint_pos

        if len(action) == 7:
            action = np.hstack((action, [0]))

        obs, reward, done, _ = env.step(action)

        video_img = obs['%s_image' % camera][::-1]
        video_writer.append_data(video_img)

        # get simulator position and quaternion in real robot frame
        sim_eef_pos_rframe, sim_eef_quat_rframe = env.get_sim2real_posquat()
        sim_eef_pos_sframe, sim_eef_quat_sframe = env.get_sim_posquat()
        sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes]
        sim_goal_joint_pos = env.robots[0].controller.goal_qpos

        sim_robot_eef_rframe.append(
            deepcopy(np.hstack((sim_eef_pos_rframe, sim_eef_quat_rframe))))
        sim_robot_eef_sframe.append(
            deepcopy(np.hstack((sim_eef_pos_sframe, sim_eef_quat_sframe))))
        sim_robot_joints.append(deepcopy(sim_joint_pos))
        target_robot_joints.append(deepcopy(sim_goal_joint_pos))

        real_eef_pos_sframe, real_eef_quat_sframe = env.get_real2sim_posquat(
            real_eef_pos[t, :3], real_eef_pos[t, 3:7])
        real_robot_eef_rframe.append(real_eef_pos[t])
        real_robot_eef_sframe.append(
            deepcopy(np.hstack((real_eef_pos_sframe, real_eef_quat_sframe))))
        real_robot_joints.append(deepcopy(obs['robot0_joint_pos']))

    f, ax = plt.subplots(7, figsize=(10, 20))
    real_robot_eef_rframe = np.array(real_robot_eef_rframe)
    real_robot_eef_sframe = np.array(real_robot_eef_sframe)
    sim_robot_eef_rframe = np.array(sim_robot_eef_rframe)
    sim_robot_eef_sframe = np.array(sim_robot_eef_sframe)
    y = np.arange(len(real_robot_eef_rframe))
    vmin = -np.pi
    vmax = np.pi
    for i in range(7):
        if not i:
            ax[i].scatter(y,
                          real_robot_eef_rframe[:, i],
                          marker='o',
                          s=4,
                          c='r',
                          label='robot_rframe')
            ax[i].scatter(y,
                          real_robot_eef_sframe[:, i],
                          marker='o',
                          s=4,
                          c='k',
                          label='robot_sframe')
            ax[i].scatter(y,
                          sim_robot_eef_rframe[:, i],
                          marker='o',
                          s=4,
                          c='g',
                          label='sim_rframe')
            ax[i].scatter(y,
                          sim_robot_eef_sframe[:, i],
                          marker='o',
                          s=4,
                          c='b',
                          label='sim_sframe')
        else:
            ax[i].scatter(y,
                          real_robot_eef_rframe[:, i],
                          marker='o',
                          s=4,
                          c='r')
            ax[i].scatter(y,
                          real_robot_eef_sframe[:, i],
                          marker='o',
                          s=4,
                          c='k')
            ax[i].scatter(y,
                          sim_robot_eef_rframe[:, i],
                          marker='o',
                          s=4,
                          c='g')
            ax[i].scatter(y,
                          sim_robot_eef_sframe[:, i],
                          marker='o',
                          s=4,
                          c='b')
        ax[i].plot(real_robot_eef_rframe[:, i], c='r')
        ax[i].plot(real_robot_eef_sframe[:, i], c='k')
        ax[i].plot(sim_robot_eef_rframe[:, i], c='g')
        ax[i].plot(sim_robot_eef_sframe[:, i], c='b')

    for i in range(4, 7):
        ax[i].set_ylim([vmin, vmax])
    ax[0].set_title('x')
    ax[1].set_title('y')
    ax[2].set_title('z')
    ax[3].set_title('qx')
    ax[4].set_title('qy')
    ax[5].set_title('qz')
    ax[6].set_title('qw')
    ax[0].legend()
    plt.savefig(write_path + '_eef.png')
    plt.close()

    f, ax = plt.subplots(7, figsize=(10, 20))
    real_robot_joints = np.array(real_robot_joints)
    sim_robot_joints = np.array(sim_robot_joints)
    target_robot_joints = np.array(target_robot_joints)
    vmin = -4 * np.pi
    vmax = 4 * np.pi
    for i in range(7):
        ax[i].set_title(i)
        if not i:
            ax[i].plot(real_robot_joints[:, i], c='b', label='real')
            ax[i].plot(sim_robot_joints[:, i], c='k', label='sim')
            ax[i].plot(target_robot_joints[:, i], c='c', label='goal')
        else:
            ax[i].plot(real_robot_joints[:, i], c='b')
            ax[i].plot(sim_robot_joints[:, i], c='k')
            ax[i].plot(target_robot_joints[:, i], c='c')
        ax[i].scatter(y, real_robot_joints[:, i], s=2, c='b')
        ax[i].scatter(y, sim_robot_joints[:, i], s=2, c='k')
        ax[i].scatter(y, target_robot_joints[:, i], s=2, c='c')

    for x in range(7):
        ax[x].set_ylim([vmin, vmax])
    ax[0].legend()
    plt.savefig(write_path + '_joints.png')
    plt.close()

    video_writer.close()
    print("Video saved to {}".format(write_path))
Пример #11
0
def run_dh_test(type_test):
    sim_robot_joints = []
    target_robot_joints = []
    dh_robot_joints = []
    sim_robot_eef_sframe = []
    sim_robot_eef_rframe = []
    dh_robot_eef_rframe = []

    # matches rframe
    #base_matrix = torch.FloatTensor([[1,0,0,0],
    #                                 [0,-1,0,0],
    #                                  [0,0,-1,0],
    #                                    [0,0,0,1]]).to(device)

    ## gets x and y correct except orientation
    #base_matrix = torch.FloatTensor([[0,1,0,0],
    #                                 [1,0,0,0],
    #                                 [0,0,1,0],
    #                                 [0,0,0,1]]).to(device)
    # gets x and y correct
    base_matrix = torch.FloatTensor([[0,1,0,0],
                                     [1,0,0,0],
                                     [0,0,-1,0],
                                     [0,0,0,1]]).to(device)



    # Create dict to hold options that will be passed to env creation call
    options = {}

    # Choose environment and add it to options
    robot_name = "Jaco"
    options["env_name"] = "Reach"
    options["robots"] = [robot_name]


    npdh = robot_attributes[robot_name]
    tdh = {}
    for key, item in npdh.items():
        tdh[key] = torch.FloatTensor(item).to('cuda')
    # Choose camera
    camera = "frontview"

    write_path = os.path.join('datasets', 'DH_'+type_test)

    # load data
    # latin1 allows us to load python2
    real_robot = np.load(os.path.join('datasets', type_test + '.npz'), allow_pickle=True, encoding='latin1')
    real_eef_pos = real_robot['eef_pos']
    #real_joint_pos = np.mod(real_robot['joint_pos'], 4*np.pi)
    real_joint_pos = real_robot['joint_pos']

    real_actions = real_robot['actions']
    init_real = real_joint_pos[0][:7]
    # Choose controller

    controller_type = 'JOINT_POSITION'
    control_freq = 5
    controller_file = "%s_%s_%shz.json" %(robot_name.lower(), controller_type.lower(), control_freq)
    controller_fpath = os.path.join(
                os.path.split(suite.__file__)[0], 'controllers', 'config',
                controller_file)
    print('loading controller from', controller_fpath)
    # Load the desired controller
    options["controller_configs"] = load_controller_config(custom_fpath=controller_fpath)

    n_joints = 7
    n_steps = len(real_actions)

    # initialize the task
    env = suite.make(
        **options,
        has_renderer=False,
        has_offscreen_renderer=True,
        ignore_done=True,
        use_camera_obs=True,
        control_freq=control_freq,
        camera_names=camera,
        camera_heights=512,
        camera_widths=512,
    )

    site = 'gripper0_grip_site'
    env.reset()

    video_writer = imageio.get_writer(write_path + '.mp4', fps=2)
    eef_site_id = env.robots[0].eef_site_id

    # Get action limits
    low, high = env.action_spec
    env.robots[0].set_robot_joint_positions(init_real[:7])

    sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes]
    dh_eef = torch_angle2ee(tdh, base_matrix, torch.FloatTensor(sim_joint_pos)[None,:].to(device))
    for t in range(n_steps-1):
        action = np.deg2rad(real_actions[t-1])
        #action = real_joint_pos[t,:7]-sim_joint_pos

        if len(action) == 7:
            action = np.hstack((action, [0]))

        obs, reward, done, _ = env.step(action)

        video_img = obs['%s_image'%camera][::-1]
        video_writer.append_data(video_img)


        # get simulator position and quaternion in real robot frame
        sim_eef_pose = deepcopy(env.robots[0].pose_in_base_from_name('gripper0_eef'))
        sim_eef_pos_sframe = deepcopy(sim_eef_pose)[:3, 3]
        sim_eef_quat_sframe = deepcopy(transform_utils.mat2quat(sim_eef_pose))
        sim_eef_pos_rframe, sim_eef_quat_rframe = get_sim2real_posquat(env)

        sim_joint_pos = env.sim.data.qpos[env.robots[0]._ref_joint_pos_indexes]
        sim_goal_joint_pos = env.robots[0].controller.goal_qpos

        torch_joint_pos = torch.FloatTensor(sim_joint_pos)[None,:].to(device)
        dh_eef_pose = torch_angle2ee(tdh, base_matrix, torch_joint_pos)[0].detach().cpu().numpy()
        dh_eef_pos_sframe = deepcopy(dh_eef_pose)[:3, 3]
        dh_eef_quat_sframe = deepcopy(transform_utils.mat2quat(dh_eef_pose))

        sim_robot_eef_sframe.append(deepcopy(np.hstack((sim_eef_pos_sframe, sim_eef_quat_sframe))))
        sim_robot_eef_rframe.append(deepcopy(np.hstack((sim_eef_pos_rframe, sim_eef_quat_rframe))))
        dh_robot_eef_rframe.append(deepcopy(np.hstack((dh_eef_pos_sframe, dh_eef_quat_sframe))))
        sim_robot_joints.append(deepcopy(sim_joint_pos))
        target_robot_joints.append(deepcopy(sim_goal_joint_pos))
        dh_robot_joints.append(deepcopy(sim_goal_joint_pos))


        #real_eef_pos_sframe, real_eef_quat_sframe = env.get_real2sim_posquat(real_eef_pos[t,:3], real_eef_pos[t,3:7])
        #real_robot_eef_rframe.append(real_eef_pos[t])
        #real_robot_eef_sframe.append(deepcopy(np.hstack((real_eef_pos_sframe, real_eef_quat_sframe))))
        #real_robot_joints.append(deepcopy(obs['robot0_joint_pos']))


    f, ax = plt.subplots(7, figsize=(10,20))
    sim_robot_eef_sframe = np.array(sim_robot_eef_sframe)
    sim_robot_eef_rframe = np.array(sim_robot_eef_rframe)
    dh_robot_eef_rframe = np.array(dh_robot_eef_rframe)
    y = np.arange(len(sim_robot_eef_sframe))
    vmin = -np.pi
    vmax = np.pi
    for i in range(7):
        ax[i].scatter(y,  dh_robot_eef_rframe[:,i] , marker='x', s=35, c='g', label='dh_sframe')
        ax[i].scatter(y,  sim_robot_eef_sframe[:,i] , marker='o', s=4, c='b', label='sim_sframe')
        #ax[i].scatter(y,  sim_robot_eef_rframe[:,i] , marker='o', s=4, c='r', label='sim_rframe')
        ax[i].plot(dh_robot_eef_rframe[:, i],  c='g' )
        ax[i].plot(sim_robot_eef_sframe[:, i],  c='b' )
        #ax[i].plot(sim_robot_eef_rframe[:, i],  c='r' )


    for i in range(4, 7):
        ax[i].set_ylim([vmin, vmax])
    ax[0].set_title('x'); ax[1].set_title('y'); ax[2].set_title('z')
    ax[3].set_title('qx'); ax[4].set_title('qy'); ax[5].set_title('qz'); ax[6].set_title('qw')
    ax[0].legend()
    plt.savefig(write_path + '_eef.png')
    plt.close()

    f, ax = plt.subplots(7, figsize=(10,20))
    sim_robot_joints = np.array(sim_robot_joints)
    dh_robot_joints = np.array(dh_robot_joints)
    target_robot_joints = np.array(target_robot_joints)
    vmin = -4*np.pi
    vmax = 4*np.pi
    for i in range(7):
        ax[i].set_title(i)
        if not i:
            ax[i].plot(sim_robot_joints[:,i], c='k', label='sim')
            ax[i].plot(target_robot_joints[:,i], c='b', label='goal')
            ax[i].plot(dh_robot_joints[:,i], c='g', label='dh')
        else:
            ax[i].plot(sim_robot_joints[:,i], c='k')
            ax[i].plot(target_robot_joints[:,i], c='b')
            ax[i].plot(dh_robot_joints[:,i], c='g')
        ax[i].scatter(y, sim_robot_joints[:,i],    s=2, c='k')
        ax[i].scatter(y, target_robot_joints[:,i], s=2, c='c')
        ax[i].scatter(y, dh_robot_joints[:,i], s=2, c='b')

    for x in range(7):
        ax[x].set_ylim([vmin, vmax])
    ax[0].legend()
    plt.savefig(write_path + '_joints.png')
    plt.close()

    video_writer.close()
    print("Video saved to {}".format(write_path))
Пример #12
0
        env_args["camera_widths"] = 512

    # Setup video recorder if necesssary
    if args.record_video:
        # Grab name of this rollout combo
        video_name = "{}-{}-{}".format(env_args["env_name"], "".join(
            env_args["robots"]), env_args["controller"]).replace("_", "-")
        # Calculate appropriate fps
        fps = int(env_args["control_freq"])
        # Define video writer
        video_writer = imageio.get_writer("{}.mp4".format(video_name), fps=fps)

    # Pop the controller
    controller = env_args.pop("controller")
    if controller in ALL_CONTROLLERS:
        controller_config = load_controller_config(
            default_controller=controller)
    else:
        controller_config = load_controller_config(custom_fpath=controller)

    # Create env
    env_suite = suite.make(**env_args,
                           controller_configs=controller_config,
                           has_renderer=not args.record_video,
                           has_offscreen_renderer=args.record_video,
                           use_object_obs=True,
                           use_camera_obs=args.record_video,
                           reward_shaping=True)
    env = GymWrapper(env_suite)

    # Run rollout
    simulate_policy(
Пример #13
0
def test_camera_transforms():
    # set seeds
    random.seed(0)
    np.random.seed(0)

    camera_name = "agentview"
    camera_height = 120
    camera_width = 120
    env = robosuite.make(
        "Lift",
        robots=["Panda"],
        controller_configs=load_controller_config(
            default_controller="OSC_POSE"),
        has_renderer=False,
        has_offscreen_renderer=True,
        ignore_done=True,
        use_object_obs=True,
        use_camera_obs=True,
        camera_names=[camera_name],
        camera_depths=[True],
        camera_heights=[camera_height],
        camera_widths=[camera_width],
        reward_shaping=True,
        control_freq=20,
    )
    sim = env.sim
    obs_dict = env.reset()

    # ground-truth object position
    obj_pos = obs_dict["object-state"][:3]

    # camera frame
    image = obs_dict["{}_image".format(camera_name)][::-1]

    # unnormalized depth map
    depth_map = obs_dict["{}_depth".format(camera_name)][::-1]
    depth_map = CU.get_real_depth_map(sim=sim, depth_map=depth_map)

    # get camera matrices
    world_to_camera = CU.get_camera_transform_matrix(
        sim=sim,
        camera_name=camera_name,
        camera_height=camera_height,
        camera_width=camera_width,
    )
    camera_to_world = np.linalg.inv(world_to_camera)

    # transform object position into camera pixel
    obj_pixel = CU.project_points_from_world_to_camera(
        points=obj_pos,
        world_to_camera_transform=world_to_camera,
        camera_height=camera_height,
        camera_width=camera_width,
    )

    # transform from camera pixel back to world position
    estimated_obj_pos = CU.transform_from_pixels_to_world(
        pixels=obj_pixel,
        depth_map=depth_map,
        camera_to_world_transform=camera_to_world,
    )

    # the most we should be off by in the z-direction is 3^0.5 times the maximum half-size of the cube
    max_z_err = np.sqrt(3) * 0.022
    z_err = np.abs(obj_pos[2] - estimated_obj_pos[2])
    assert z_err < max_z_err

    print("pixel: {}".format(obj_pixel))
    print("obj pos: {}".format(obj_pos))
    print("estimated obj pos: {}".format(estimated_obj_pos))
    print("z err: {}".format(z_err))

    env.close()
Пример #14
0
def experiment(variant, agent="SAC"):

    # Make sure agent is a valid choice
    assert agent in AGENTS, "Invalid agent selected. Selected: {}. Valid options: {}".format(
        agent, AGENTS)

    # Get environment configs for expl and eval envs and create the appropriate envs
    # suites[0] is expl and suites[1] is eval
    suites = []
    for env_config in (variant["expl_environment_kwargs"],
                       variant["eval_environment_kwargs"]):
        # Load controller
        controller = env_config.pop("controller")
        if controller in set(ALL_CONTROLLERS):
            # This is a default controller
            controller_config = load_controller_config(
                default_controller=controller)
        else:
            # This is a string to the custom controller
            controller_config = load_controller_config(custom_fpath=controller)
        # Create robosuite env and append to our list
        suites.append(
            suite.make(
                **env_config,
                has_renderer=False,
                has_offscreen_renderer=False,
                use_object_obs=True,
                use_camera_obs=False,
                reward_shaping=True,
                controller_configs=controller_config,
            ))
    # Create gym-compatible envs
    expl_env = NormalizedBoxEnv(GymWrapper(suites[0]))
    eval_env = NormalizedBoxEnv(GymWrapper(suites[1]))

    obs_dim = expl_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    qf1 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs'],
    )
    qf2 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs'],
    )
    target_qf1 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs'],
    )
    target_qf2 = FlattenMlp(
        input_size=obs_dim + action_dim,
        output_size=1,
        **variant['qf_kwargs'],
    )

    # Define references to variables that are agent-specific
    trainer = None
    eval_policy = None
    expl_policy = None

    # Instantiate trainer with appropriate agent
    if agent == "SAC":
        expl_policy = TanhGaussianPolicy(
            obs_dim=obs_dim,
            action_dim=action_dim,
            **variant['policy_kwargs'],
        )
        eval_policy = MakeDeterministic(expl_policy)
        trainer = SACTrainer(env=eval_env,
                             policy=expl_policy,
                             qf1=qf1,
                             qf2=qf2,
                             target_qf1=target_qf1,
                             target_qf2=target_qf2,
                             **variant['trainer_kwargs'])
    elif agent == "TD3":
        eval_policy = TanhMlpPolicy(input_size=obs_dim,
                                    output_size=action_dim,
                                    **variant['policy_kwargs'])
        target_policy = TanhMlpPolicy(input_size=obs_dim,
                                      output_size=action_dim,
                                      **variant['policy_kwargs'])
        es = GaussianStrategy(
            action_space=expl_env.action_space,
            max_sigma=0.1,
            min_sigma=0.1,  # Constant sigma
        )
        expl_policy = PolicyWrappedWithExplorationStrategy(
            exploration_strategy=es,
            policy=eval_policy,
        )
        trainer = TD3Trainer(policy=eval_policy,
                             qf1=qf1,
                             qf2=qf2,
                             target_qf1=target_qf1,
                             target_qf2=target_qf2,
                             target_policy=target_policy,
                             **variant['trainer_kwargs'])
    else:
        print("Error: No valid agent chosen!")

    replay_buffer = EnvReplayBuffer(
        variant['replay_buffer_size'],
        expl_env,
    )
    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )

    # Define algorithm
    algorithm = CustomTorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()