def __init__(self, horizon=500, *args, **kwargs): options = {} controller_name = 'OSC_POSE' options["controller_configs"] = suite.load_controller_config( default_controller=controller_name) self.env = GymWrapper( suite.make( "NutAssemblyRound", # Nut Assembly task with the round peg robots="IIWA", # use IIWA robot **options, # controller options use_object_obs=True, use_camera_obs=False, # do not use pixel observations has_offscreen_renderer= False, # not needed since not using pixel obs has_renderer=False, # make sure we can render to the screen reward_shaping=True, # use dense rewards reward_scale=1.0, control_freq= 20, # control should happen fast enough so that simulation looks smooth horizon=horizon, # number of timesteps for ending episode )) self.max_episode_steps = horizon self.action_space = self.env.action_space self.observation_space = self.env.observation_space self.reward_type = 'sparse' self.distance_threshold = 0.065
def watch_trajectory(): # Setting up the robot enviornment env = suite.make( env_name=args.env_name, robots=args.robot, has_renderer=args.render, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, horizon = args.horizon, reward_shaping=False, ) obs = env.reset() state_dim = obs['robot0_robot-state'].shape[0]+obs['object-state'].shape[0] state = np.append(obs['robot0_robot-state'],obs['object-state']) agent = set_agent(state_dim, env, args) # Visualize a single run done=False while done==False: if args.algo=='REINFORCE': action, log_prob = agent.select_action(state) else: action = agent.select_action(state) obs, reward, done, info = env.step(action) state = np.append(obs['robot0_robot-state'],obs['object-state']) env.render()
def sample_goal_frames_env(args): env = suite.make( "SawyerLift", has_renderer=False, # no on-screen renderer has_offscreen_renderer= True, # off-screen renderer is required for camera observations ignore_done=True, # (optional) never terminates episode use_camera_obs=True, # use camera observations camera_height=64, # set camera height camera_width=64, # set camera width camera_name='sideview', # use "agentview" camera use_object_obs=True, # no object feature when training on pixels control_freq=60) goal_imgs = [] grip_pos_l = [] for i in range(50): x = env.reset() for k in range(100): x, _, _, _ = env.step(np.random.randn(env.dof)) im = img_as_ubyte(rotate(x['image'], 180)) goal_imgs.append(im) grip_pos = np.array(env.sim.data.site_xpos[env.sim.model.site_name2id( "grip_site")]).astype(np.float32) grip_pos_l.append(grip_pos) goal_imgs = np.stack(goal_imgs) grip_pos_l = np.stack(grip_pos_l) dir_name = "data/goal/sawyer_reach_joint" if not os.path.isdir(dir_name): os.makedirs(dir_name) data = {'image': goal_imgs, 'grip_pos': grip_pos_l} np.savez(os.path.join(dir_name, args.save_path + ".npz"), **data)
def main(): width = 512 height = 512 action_scale = 10.0 screen = pygame.display.set_mode((width, height)) # create environment instance env = suite.make( "PandaILIAD", has_renderer=False, ignore_done=True, camera_height=height, camera_width=width, gripper_visualization=False, use_camera_obs=True, use_object_obs=False, ) # reset the environment env.reset() # enable controlling the end effector directly instead of using joint velocities env = IKWrapper(env) # create the human input device joystick = Joystick() joystick.start_control() # create recorder number = sys.argv[1] savename = "demonstrations/test" + number + ".pkl" data = [] while True: input = joystick.get_controller_state() dpos, dquat, grasp, reset = ( input["dpos"], input["dquat"], input["grasp"], input["reset"], ) if reset: pickle.dump(data, open(savename, "wb")) break action = np.concatenate([dpos, dquat, grasp]) * action_scale obs, reward, done, info = env.step(action) image = np.flip(obs['image'].transpose((1, 0, 2)), 1) joint_pos = obs['joint_pos'] res_image = cv2.resize(image, dsize=(28, 28)) data.append((res_image, joint_pos)) pygame.pixelcopy.array_to_surface(screen, image) pygame.display.update() env.close()
def change_parameters_of_soft_body_demo(episodes): for _ in range(episodes): env = suite.make('Ultrasound', robots='UR5e', controller_configs=None, gripper_types='UltrasoundProbeGripper', has_renderer=True, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=False, control_freq=50, render_camera=None, horizon=800) print_world_xml_and_soft_torso_params(env.model) sim, viewer = create_mjsim_and_viewer(env) for _ in range(env.horizon): sim.step() viewer.render() glfw.destroy_window(viewer.window)
def __init__(self): self.env = suite.make(env_name="Lift", robots="Sawyer", horizon=600, has_renderer=False, has_offscreen_renderer=True, use_camera_obs=True, reward_shaping=True, camera_names="agentview", camera_heights=84, camera_widths=84) init_obs = self.env.observation_spec() obs_shape = init_obs['robot0_agentview_image'].shape obs_low = -np.full(obs_shape, np.inf) obs_high = np.full(obs_shape, np.inf) lower_bound, upper_bound = self.env.action_spec # print(lower_bound.shape, self.env.action_dim) self.action_space = spaces.Box(low=lower_bound, high=upper_bound, shape=(self.env.action_dim, ), dtype=np.float32) self.observation_space = spaces.Box(low=obs_low, high=obs_high, shape=obs_shape, dtype=np.float32)
def controller_demo(experiment_name, save_data=False): env = suite.make('Ultrasound', robots='UR5e', controller_configs=None, gripper_types=None, has_renderer=True, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=False, control_freq=50, render_camera='sideview', horizon=1600) # Reset the env env.reset() sim_time = env.horizon robot = env.robots[0] joint_pos_obs = np.empty(shape=(sim_time, robot.dof)) ref_values = np.array([np.pi / 2, 3 * np.pi / 2, -np.pi / 4]) goal_joint_pos = [ref_values[0], 0, 0, 0, 0, 0] kp = 2 kd = 1.2 for t in range(sim_time): if env.done: break env.render() action = relative2absolute_joint_pos_commands(goal_joint_pos, robot, kp, kd) if t > 1200: action = relative2absolute_joint_pos_commands( [0, ref_values[2], 0, 0, 0, 0], robot, kp, kd) elif t > 800: action = relative2absolute_joint_pos_commands([0, 0, 0, 0, 0, 0], robot, kp, kd) elif t > 350: action = relative2absolute_joint_pos_commands( [ref_values[1], 0, 0, 0, 0, 0], robot, kp, kd) observation, reward, done, info = env.step(action) joint_pos_obs[t] = observation['robot0_joint_pos'] # close window env.close() if save_data: np.savetxt('data/' + experiment_name + '_joint_pos_controller_test.csv', joint_pos_obs, delimiter=",") np.savetxt('data/' + experiment_name + '_ref_values_controller_test.csv', ref_values, delimiter=",")
def test_all_controllers(): for controller_name in controllers.keys(): # Define variables for each controller test action_dim = controllers[controller_name][0] num_test_steps = controllers[controller_name][1] test_value = controllers[controller_name][2] neutral = np.zeros(action_dim) # Define controller path to load controller_config = load_controller_config(default_controller=controller_name) # Now, create a test env for testing the controller on env = suite.make( "Lift", robots="Sawyer", has_renderer=args.render, # use on-screen renderer for visual validation only if requested has_offscreen_renderer=False, use_camera_obs=False, horizon=(steps_per_action + steps_per_rest) * num_test_steps, controller_configs=controller_config, ) print("Testing controller: {}...".format(controller_name)) env.reset() # If rendering, set controller to front view to get best angle for viewing robot movements if args.render: env.viewer.set_camera(camera_id=0) # get action range action_min, action_max = env.action_spec assert action_min.shape == action_max.shape assert action_min.shape[0] == action_dim, "Expected {}, got {}".format(action_dim, action_min.shape[0]) # Keep track of done variable to know when to break loop count = 0 # Loop through controller space while count < num_test_steps: action = neutral.copy() for i in range(steps_per_action): if controller_name in {"IK_POSE", "OSC_POSE"} and count > 2: # Set this value to be the angle and set appropriate axis vec = np.zeros(3) vec[count - 3] = test_value action[3:6] = vec else: action[count] = test_value env.step(action) if args.render: env.render() for i in range(steps_per_rest): env.step(neutral) if args.render: env.render() count += 1 # Shut down this env before starting the next test env.close() # Tests passed! print("All controller tests completed.")
def make_robosuite(env_name, env_config): import robosuite env_config.render = False env = robosuite.make( env_name, has_renderer=env_config.render, ignore_done=True, use_camera_obs=env_config.pixel_input, has_offscreen_renderer=env_config.pixel_input, camera_height=256, camera_width=256, render_collision_mesh=False, render_visual_mesh=True, camera_name='agentview', use_object_obs=(not env_config.pixel_input), camera_depth=env_config.use_depth, reward_shaping=True, # demo_config=env_config.demonstration, ) env = RobosuiteWrapper(env, env_config) env = FilterWrapper(env, env_config) env = ObservationConcatenationWrapper(env) if env_config.pixel_input: env = TransposeWrapper(env) if env_config.use_grayscale: env = GrayscaleWrapper(env) if env_config.frame_stacks: env = FrameStackWrapper(env, env_config) env_config.action_spec = env.action_spec() env_config.obs_spec = env.observation_spec() return env, env_config
def make_base_robosuite_env(env_name, kwargs, use_dm_backend=True): import gym import numpy as np gym.logger.setLevel(40) import robosuite as suite from rlkit.envs.wrappers.primitives_wrappers import ( NormalizeBoxEnvFixed, RobosuitePrimitives, RobosuiteWrapper, ) if suite.environments.robot_env.RobotEnv.__bases__[0] != RobosuitePrimitives: suite.environments.robot_env.RobotEnv.__bases__ = (RobosuitePrimitives,) RobosuitePrimitives._use_dm_backend = use_dm_backend if kwargs["has_offscreen_renderer"]: keys = ["image-state"] else: keys = None reset_action_space_kwargs = kwargs.get("reset_action_space_kwargs", {}) env_kwargs_new = kwargs.copy() if "reset_action_space_kwargs" in kwargs: del env_kwargs_new["reset_action_space_kwargs"] np.random.seed(42) env = suite.make(env_name, **env_kwargs_new) env = RobosuiteWrapper( env, keys=keys, reset_action_space_kwargs=reset_action_space_kwargs ) if reset_action_space_kwargs["control_mode"] == "robosuite": env = NormalizeBoxEnvFixed(env) return env
def gather_calibration_measurements(): env = suite.make( 'Ultrasound', robots='Panda', controller_configs=load_controller_config( default_controller='OSC_POSE'), gripper_types='UltrasoundProbeGripper', has_renderer=True, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=False, control_freq=100, render_camera=None, horizon=250, initialization_noise=None, ) # Reset the env env.reset() robot = env.robots[0] ee_z_force = np.empty(shape=(env.horizon, 1)) ee_z_pos = np.empty(shape=(env.horizon, 1)) ee_z_vel = np.empty(shape=(env.horizon, 1)) for t in range(env.horizon): print(t) if env.done: break env.render() action = [0.2, 0.05, -0.2, 0, 0, 0] if t > 50: action = [0, 0, 0, 0, 0, 0] if t > 75: action = [0, 0, -0.4, 0, 0, 0] if t > 200: action = [0, 0, 0.8, 0, 0, 0] if t > 225: action = [0, 0, 0, 0, 0, 0] observation, reward, done, info = env.step(action) ee_z_force[t] = transform_ee_frame_axes( robot.ee_force )[-1] if robot.gripper_type == 'UltrasoundProbeGripper' else robot.ee_force[ -1] ee_z_pos[t] = observation['robot0_eef_pos'][-1] ee_z_vel[t] = observation['gripper_velp'][-1] np.savetxt('data/calibration_z_force.csv', ee_z_force, delimiter=",") np.savetxt('data/calibration_z_pos.csv', ee_z_pos, delimiter=",") np.savetxt('data/calibration_z_vel.csv', ee_z_vel, delimiter=",") # close window env.close()
def _make_robosuite_env(): from gym.wrappers import FlattenDictWrapper from baselines.bench import Monitor env = suite.make(env_id) env = FlattenDictWrapper(env, ['robot-state', 'object-state']) env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def make_robosuite_env(args): demo_dict = { ## Robosuite 21: "pegs-RoundNut", # 22: "pegs-SquareNut", # 23: "pegs-full", 24: "bins-Bread", # 25: "bins-Can", # 26: "bins-Cereal", 27: "bins-Milk", # 28: "bins-full", } demo_name = demo_dict[args.env_id] hd5_demo_path = "../imitation_data/RoboTurkPilot/%s" % (demo_name) env_name = args.env_name + "_reach" # total_traj = len(os.listdir(demo_path)) demo_list = [] filename = "../imitation_data/TRAJ_h5/%s/%s_chosen.txt" % (env_name, env_name) demo_idx = -1 sort_list = np.arange(0, 10) # 10 demo. with open(filename, 'r') as ff: i = 0 for line in ff: line = line.replace(":", " ").replace("(", " ").replace( ")", " ").replace(",", " ").split() if demo_idx == -1: demo_idx = line.index("demo") + 1 if np.any(sort_list == i): demo_list += [int(line[demo_idx])] i = i + 1 env = RobosuiteReacherWrapper( robosuite.make( args.env_name, has_offscreen_renderer= False, # not needed since we do not use pixel obs has_renderer=args.render, ignore_done=False, use_camera_obs=False, gripper_visualization=False, reward_shaping=False, # control_freq=100, horizon=args.t_max # default is 500 ), demo_path=hd5_demo_path, demo_list=demo_list, need_xml=True, sampling_schemes=[ "uniform_first", "random" ], # initial state sampler. Default from the repo is uniform + random, but initilizing states at the middle of trajectory is possible only in simulations.. scheme_ratios=[0.9, 0.1], robo_task="reach", ) return env
def test_playback(): # set seeds random.seed(0) np.random.seed(0) env = robosuite.make( "Lift", robots=["Panda"], controller_configs=load_controller_config( default_controller="OSC_POSE"), has_renderer=False, has_offscreen_renderer=False, ignore_done=True, use_camera_obs=False, reward_shaping=True, control_freq=20, ) env.reset() # task instance task_xml = env.sim.model.get_xml() task_init_state = np.array(env.sim.get_state().flatten()) # trick for ensuring that we can play MuJoCo demonstrations back # deterministically by using the recorded actions open loop env.reset_from_xml_string(task_xml) env.sim.reset() env.sim.set_state_from_flattened(task_init_state) env.sim.forward() # random actions to play n_actions = 100 actions = 0.1 * np.random.uniform( low=-1.0, high=1.0, size=(n_actions, env.action_spec[0].shape[0])) # play actions print("playing random actions...") states = [task_init_state] for i in range(n_actions): env.step(actions[i]) states.append(np.array(env.sim.get_state().flatten())) # try playback print("attempting playback...") env.reset() env.reset_from_xml_string(task_xml) env.sim.reset() env.sim.set_state_from_flattened(task_init_state) env.sim.forward() for i in range(n_actions): env.step(actions[i]) state_playback = env.sim.get_state().flatten() assert np.all(np.equal(states[i + 1], state_playback)) env.close() print("test passed!")
def run_simulation(): env_id = "Ultrasound" env_options = {} env_options["robots"] = "Panda" env_options["gripper_types"] = "UltrasoundProbeGripper" env_options["controller_configs"] = { "type": "OSC_POSE", "input_max": 1, "input_min": -1, "output_max": [0.05, 0.05, 0.05, 0.5, 0.5, 0.5], "output_min": [-0.05, -0.05, -0.05, -0.5, -0.5, -0.5], "kp": 300, "damping_ratio": 1, "impedance_mode": "fixed", "kp_limits": [0, 500], "kp_input_max": 1, "kp_input_min": 0, "damping_ratio_limits": [0, 2], "position_limits": None, "orientation_limits": None, "uncouple_pos_ori": True, "control_delta": True, "interpolation": None, "ramp_ratio": 0.2 } env_options["control_freq"] = 500 env_options["has_renderer"] = True env_options["has_offscreen_renderer"] = False env_options["render_camera"] = None env_options["use_camera_obs"] = False env_options["use_object_obs"] = False env_options["horizon"] = 1000 env_options["early_termination"] = False env_options["save_data"] = False env_options["torso_solref_randomization"] = False env_options["initial_probe_pos_randomization"] = False env_options["deterministic_trajectory"] = False env_options["use_box_torso"] = True env = suite.make(env_id, **env_options) # reset the environment to prepare for a rollout obs = env.reset() done = False ret = 0. for t in range(env.horizon): action = [0.0, 0, 0, 0, 0, 0] obs, reward, done, _ = env.step(action) # play action ret += reward env.render() if done: env.close() break print("rollout completed with return {}".format(ret))
def evaluate_policy(env_config, model_path, n_eval, printout=False): if printout: print("Loading policy...") # Load trained model and corresponding policy data = torch.load(model_path) policy = data['evaluation/policy'] if printout: print("Policy loaded") # Load controller controller = env_config.pop("controller") if controller in set(ALL_CONTROLLERS): # This is a default controller controller_config = load_controller_config( default_controller=controller) else: # This is a string to the custom controller controller_config = load_controller_config(custom_fpath=controller) # Create robosuite env env = suite.make(**env_config, has_renderer=False, has_offscreen_renderer=False, use_object_obs=True, use_camera_obs=False, reward_shaping=True, controller_configs=controller_config) env = GymWrapper(env) # Use CUDA if available if torch.cuda.is_available(): set_gpu_mode(True) policy.cuda() if not isinstance( policy, MakeDeterministic) else policy.stochastic_policy.cuda() if printout: print("Evaluating policy over {} simulations...".format(n_eval)) # Create variable to hold rewards to be averaged returns = [] # Loop through simulation n_eval times and take average returns each time for i in range(n_eval): path = rollout( env, policy, max_path_length=env_config["horizon"], render=False, ) # Determine total summed rewards from episode and append to 'returns' returns.append(sum(path["rewards"])) # Average the episode rewards and return the normalized corresponding value return sum(returns) / (env_config["reward_scale"] * n_eval)
def create_from_id(self, env_id): _, task = env_id.split(".") # Env interface will do the actual job of creating the environment. env = robosuite.make(task, has_offscreen_renderer=set_eval, has_renderer=False, use_object_obs=True, reward_shaping=self.args.rs_reward_shaping, use_camera_obs=set_eval) return env
def __init__(self, has_display=False): # Create environment. print("Do I have a display?", has_display) # self.base_env = robosuite.make('BaxterLift', has_renderer=has_display) self.base_env = robosuite.make("BaxterViz", has_renderer=has_display) # Create kinematics object. self.baxter_IK_object = IKWrapper(self.base_env) self.environment = self.baxter_IK_object.env
def make_video(n_episode, slower=3): low = np.array([0.57, 0.35]) high = np.array([0.63, 0.405]) render_drop_freq = 10 env = suite.make( 'BinPackPlace', has_renderer=False, has_offscreen_renderer=True, ignore_done=False, use_camera_obs=False, control_freq=1, camera_height=240, camera_width=240, render_drop_freq=render_drop_freq, # obj_names=obj_names, action_bound=(low, high), ) subprocess.call(['rm', '-rf', 'frames']) subprocess.call(['mkdir', '-p', 'frames']) subprocess.call(['mkdir', '-p', 'demo']) time_step_counter = 0 ## make video for i_episode in range(n_episode): env.reset() for _ in range(100): actions = env.action_space.sample() obs, rew, done, info = env.step(actions) for i in range(len(info['birdview'])): image_data_bird, image_data_agent = info['birdview'][i], info[ 'targetview'][i] image_data = np.concatenate( (image_data_bird, image_data_agent), 1) img = Image.fromarray(image_data, 'RGB') for __ in range(slower): img.save('frames/frame-%.10d.png' % time_step_counter) time_step_counter += 1 if done: break subprocess.call([ 'ffmpeg', '-framerate', '50', '-y', '-i', 'frames/frame-%010d.png', '-r', '24', '-pix_fmt', 'yuv420p', '-s', '480x240', DEMO_PATH ]) subprocess.call(['rm', '-rf', 'frames'])
def main(): width = 512 height = 512 screen = pygame.display.set_mode((width, height)) # create environment instance env = suite.make( "PandaILIAD", has_renderer=False, ignore_done=True, camera_height=height, camera_width=width, gripper_visualization=False, use_camera_obs=True, use_object_obs=False, ) # reset the environment env.reset() # create the human input device joystick = Joystick() model = Model() action_scale = 0.1 # initialize state obs, reward, done, info = env.step([0.0] * 8) image = np.flip(obs['image'].transpose((1, 0, 2)), 1) joint_pos = obs['joint_pos'] while True: # get human input z, reset = joystick.input() if reset: return True # get observation res_image = cv2.resize(image, dsize=(28, 28)) # decode to high-DoF action action_arm = model.decoder(res_image, joint_pos, z) action = np.asarray(action_arm + [0.0]) * action_scale # take action obs, reward, done, info = env.step(action) image = np.flip(obs['image'].transpose((1, 0, 2)), 1) joint_pos = obs['joint_pos'] pygame.pixelcopy.array_to_surface(screen, image) pygame.display.update() env.close()
def make(has_renderer=False, init_robot_pose=(-1.37550484e-02, 5.21560077e-03, 8.78072546e-02), renderer=None, horizon=100): controller_conf = load_controller_config( default_controller="LOCOMOTION_JOINT_TORQUE") os.makedirs(log_dir, exist_ok=True) rbs_env = suite.make(env_name="Walk", robots="Laikago", controller_configs=controller_conf, has_renderer=has_renderer, render_camera=renderer, has_offscreen_renderer=False, use_camera_obs=False, control_freq=10, init_robot_pose=init_robot_pose, init_robot_ori=(0, 0, 0), reward_shaping=True, horizon=horizon) env = GymWrapper(rbs_env, keys=[ "robot0_chassis_pos", "robot0_chassis_vel", "robot0_joint_pos", "robot0_joint_vel" ]) if not has_renderer: env = Monitor(env, log_dir) print("box:") print(env.action_space) print("shape -1") print(env.action_space.shape[-1]) print("shape") print(env.action_space.shape) print("high") print(env.action_space.high) print("low") print(env.action_space.low) print("box:") print(env.observation_space) print("shape -1") print(env.observation_space.shape[-1]) print("shape") print(env.observation_space.shape) print("high") print(env.observation_space.high) print("low") print(env.observation_space.low) return env
def _thunk(): if env_id.startswith("Sawyer"): env = robosuite.make( env_id, has_renderer=False, has_offscreen_renderer=False, ignore_done=False, horizon=1000, #200, use_camera_obs=False, gripper_visualization=False, reward_shaping=True, control_freq=100 #10, # 100 ) env = GymGymWrapper(env) elif env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def main(): # create environment instance env = suite.make( "PandaLift", has_renderer=True, ignore_done=True, use_camera_obs=False, gripper_visualization=True, control_freq=100 ) # reset the environment env.reset() env.viewer.set_camera(camera_id=0) # enable controlling the end effector directly instead of using joint velocities env = IKWrapper(env) # create the human input device joystick = Joystick() # create recorder demo_number = sys.argv[1] savename = "demonstrations/demo" + demo_number + ".pkl" sample_time = 0.1 prev_time = time.time() data = [] while True: # get human input dpos, dquat, grasp, reset = joystick.input() if reset: pickle.dump(data, open(savename, "wb")) break # take action and record joint position action = np.concatenate([dpos, dquat, grasp]) obs, reward, done, info = env.step(action) joint_pos = obs['joint_pos'] # save joint position to dataset curr_time = time.time() if curr_time - prev_time > sample_time: prev_time = curr_time data.append(list(joint_pos)) pickle.dump(data, open(savename, "wb")) env.render() env.close()
def make_env(env_name): import robosuite env = robosuite.make( env_name, has_renderer=False, has_offscreen_renderer=False, ignore_done=True, use_object_obs=True, use_camera_obs=False, gripper_visualization=False, reward_shaping=True, control_freq=100, ) return env
def evaluate_grip_goal(): env = suite.make( env_name=args.env_name, robots=args.robot, has_renderer=False, has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, horizon = args.horizon, reward_shaping=False, ) success_count = 0 cumulative_success = [] for test in range(args.num_trials): obs = env.reset() state_dim = obs['robot0_robot-state'].shape[0]+obs['object-state'].shape[0] state = np.append(obs['robot0_robot-state'],obs['object-state']) agent = set_agent(state_dim, env, args) # Visualize a single run done=False while done==False: if args.algo=='REINFORCE': action, log_prob = agent.select_action(state) else: action = agent.select_action(state) obs, reward, done, info = env.step(action) if check_grip(env)==True: success_count+=1 print('The robot succeded in gripping the block') break state = np.append(obs['robot0_robot-state'],obs['object-state']) cumulative_success.append(success_count) if done==True: print('The robot failed to grip the block') print('Creating success fig...') fig=go.Figure() fig.add_trace(go.Scatter(name='actual_success_rate',x=[i for i in range(args.num_trials)],y=cumulative_success)) fig.add_trace(go.Scatter(name='max_success_rate',x=[i for i in range(args.num_trials)],y=[i+1 for i in range(args.num_trials)])) fig.update_layout(template='plotly_white', title='Grasp Success Rate', xaxis=dict(title='evaluation step'), yaxis=dict(title='num. successes')) fig.write_image('grasp_success_rate.png') print(success_count/args.num_trials)
def collect_projection_data_sawyer(args): num_frames = args.num_frames # env = gym.make("FetchPushCustom-v1", n_substeps=20) import robosuite as suite env = suite.make( "SawyerLift", has_renderer=False, # no on-screen renderer has_offscreen_renderer=True, # off-screen renderer is required for camera observations use_camera_obs=True, # use camera observations camera_name='sideview', # use "agentview" camera use_object_obs=True, # no object feature when training on pixels gripper_visualization=True, ignore_done=True, control_freq=30) source_pts = [] target_pts = [] n = 0 while n < num_frames: x = env.reset() for k in range(3): for i in range(10): x, _, _, _ = env.step(np.random.randn(env.dof)) im = skimage.img_as_ubyte(skimage.transform.rotate(x['image'], 180)) points = cpselect(im) if len(points) > 0: source_pts.append(get_grip_pos(env)) target_pts.append(points[0]) print(len(source_pts)) n += 1 data = {} data['world_coords'] = np.stack(source_pts).astype(np.float32) data['pixel_coords'] = np.stack(target_pts).astype(np.float32) for n in range(len(source_pts)): s, t = data['world_coords'][n], data['pixel_coords'][n] print(n, s, t) if not os.path.isdir(args.dir_name): os.makedirs(args.dir_name) print(os.path.join(args.dir_name, args.save_path + ".npz")) np.savez(os.path.join(args.dir_name, args.save_path + ".npz"), **data)
def main(args): """ Starts different tests Args: param1(args): args """ env = suite.make( args.env_name, has_renderer=True, # noon-screenrenderer has_offscreen_renderer=True, # no off-screen renderer use_object_obs=True, # usebject-centric feature use_camera_obs=False, # no camera reward_shaping=True, ) state = env.reset() state = createstate(state) state_dim = state.shape[0] action_dim = env.dof max_action = float(6) min_action = float(-6) print(state_dim) policy = TD31v1(state_dim, action_dim, max_action, args) directory = 'pytorch_models' filename = "TD3_" + args.env_name + '_42-' + args.agent print("Load %s/%s_actor.pth" % (directory, filename)) policy.load(filename, directory) avg_reward = 0. seeds = [x for x in range(2)] for s in seeds: state = env.reset() done = False while not done: state = createstate(state) action = policy.select_action(state) #action = action.clip(-1, 1) #action = np.random.randn(env.dof) #print(action) state, reward, done, _ = env.step(action) avg_reward += reward env.render() avg_reward /= len(seeds) print("---------------------------------------") print("Average Reward over the Evaluation Step: %f" % (avg_reward)) print("---------------------------------------")
def copy(self): """Override default copy method to allow robosuite env serialization. Robosuite environments are not serializable, and thus we cannot use the default `copy.deepcopy(self)` from `SoftlearningEnv`. Instead, we first create a copy of the self *without* robosuite environment (`self._env`) and then instantiate a new robosuite environment and attach it to the copied self. """ env = self._env self._env = None result = copy.deepcopy(self) result._env = suite.make(f"{self._domain}{self._task}", **self._env_kwargs) self._env = env return result
def __init__(self, wrapped_env_id, observation_keys=('robot-state', 'object-state'), **wrapped_env_kwargs): Serializable.quick_init(self, locals()) self._wrapped_env = suite.make(wrapped_env_id, **wrapped_env_kwargs) self.action_space = Box(self._wrapped_env.action_spec[0], self._wrapped_env.action_spec[1], dtype=np.float32) observation_dim = self._wrapped_env.observation_spec()['robot-state'].shape[0] \ + self._wrapped_env.observation_spec()['object-state'].shape[0] self.observation_space = Box( -np.inf * np.ones(observation_dim), np.inf * np.ones(observation_dim), dtype=np.float32, ) self._observation_keys = observation_keys
def make_env(): controller = load_controller_config(default_controller="OSC_POSE") env = GymWrapper( suite.make( "Door", robots="Panda", # use Sawyer robot use_camera_obs=False, # do not use pixel observations has_offscreen_renderer=False, # not needed since not using pixel obs has_renderer=False, # make sure we can render to the screen reward_shaping=True, # use dense rewards reward_scale=1.0, # scale max 1 per timestep control_freq= 20, # control should happen fast enough so that simulation looks smooth horizon=500, ignore_done=True, hard_reset=False, controller_configs=controller)) return env