def generate_vae_dataset( N=10000, test_p=0.9, use_cached=False, imsize=84, show=False, dataset_path=None, env_class=SawyerReachTorqueEnv, env_kwargs=None, init_camera=sawyer_torque_reacher_camera, ): filename = "/tmp/sawyer_torque_data" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_kwargs == None: env_kwargs = dict() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) for i in range(N): if i % 50 == 0: print('Reset') env.reset_model() exploration_policy.reset() for _ in range(1): action = exploration_policy.get_action()[0] * 1 / 10 env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, env_class=None, env_kwargs=None, init_camera=sawyer_door_env_camera, ): filename = "/tmp/sawyer_door_push_open_and_reach" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) oracle_sampled_data = int(N/2) dataset = np.zeros((N, imsize * imsize * 3)) print('Goal Space Sampling') for i in range(oracle_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos=.6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(oracle_sampled_data, N): if i % 20==0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step( action ) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_goal_data_set(env=None, num_goals=1000, use_cached_dataset=False, action_scale=1 / 10): if use_cached_dataset and osp.isfile('/tmp/goals' + str(num_goals) + '.npy'): goal_dict = np.load('/tmp/goals' + str(num_goals) + '.npy').item() print("loaded data from saved file") return goal_dict cached_goal_keys = [ 'latent_desired_goal', 'image_desired_goal', 'state_desired_goal', 'joint_desired_goal', ] goal_sizes = [ env.observation_space.spaces['latent_desired_goal'].low.size, env.observation_space.spaces['image_desired_goal'].low.size, env.observation_space.spaces['state_desired_goal'].low.size, 7 ] observation_keys = [ 'latent_observation', 'image_observation', 'state_observation', 'state_observation', ] goal_generation_dict = dict() for goal_key, goal_size, obs_key in zip( cached_goal_keys, goal_sizes, observation_keys, ): goal_generation_dict[goal_key] = [goal_size, obs_key] goal_dict = dict() policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for goal_key in goal_generation_dict: goal_size, obs_key = goal_generation_dict[goal_key] goal_dict[goal_key] = np.zeros((num_goals, goal_size)) print('Generating Random Goals') for i in range(num_goals): if i % 50 == 0: print('Reset') env.reset_model() exploration_policy.reset() action = exploration_policy.get_action()[0] * action_scale obs, _, _, _ = env.step(action) print(i) for goal_key in goal_generation_dict: goal_size, obs_key = goal_generation_dict[goal_key] goal_dict[goal_key][i, :] = obs[obs_key] np.save('/tmp/goals' + str(num_goals) + '.npy', goal_dict) return goal_dict
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerPushAndPullDoorEnv, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ratio_action_sample_to_random=1 / 2, env_id=None, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_and_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_and_pull_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_push_and_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_plus_random_sampling: if env_id is not None: import gym env = gym.make(env_id) else: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_sampled_data = int(N * ratio_action_sample_to_random) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) print('Action Space Sampling') for i in range(action_sampled_data): goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) goal = env.sample_goal() env.set_to_goal_angle(goal['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: raise NotImplementedError() n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, action_space_sampling=False, env_class=SawyerDoorEnv, env_kwargs=None, init_camera=sawyer_door_env_camera_v2, ): if policy_path is not None: filename = "/tmp/sawyer_door_pull_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_pull_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_pull_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) env.wrapped_env.reset() dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) for i in range(N): if i % 20 == 0: env.reset_model() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) # env.set_to_goal_angle(env.get_goal()['state_desired_goal']) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, policy_path=None, ratio_oracle_policy_data_to_random=1 / 2, action_space_sampling=False, env_class=None, env_kwargs=None, action_plus_random_sampling=False, init_camera=sawyer_door_env_camera, ): if policy_path is not None: filename = "/tmp/sawyer_door_push_open_oracle+random_policy_data_closer_zoom_action_limited" + str( N) + ".npy" elif action_space_sampling: filename = "/tmp/sawyer_door_push_open_zoomed_in_action_space_sampling" + str( N) + ".npy" else: filename = "/tmp/sawyer_door_push_open" + str(N) + ".npy" info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) elif action_space_sampling: env = SawyerDoorPushOpenEnv(**env_kwargs) env = ImageEnv( env, imsize, transpose=False, init_camera=sawyer_door_env_camera, normalize=False, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, env.max_y_pos, .06])) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): env.set_to_goal_pos(action_space.sample()) #move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env.get_image().flatten() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env elif action_plus_random_sampling: env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) action_space = Box(np.array([-env.max_x_pos, .5, .06]), np.array([env.max_x_pos, .6, .06])) action_sampled_data = int(N / 2) dataset = np.zeros((N, imsize * imsize * 3)) print('Action Space Sampling') for i in range(action_sampled_data): env.set_to_goal_pos(action_space.sample()) # move arm to spot goal = env.sample_goal() env.set_to_goal(goal) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .6 policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) print('Random Sampling') for i in range(action_sampled_data, N): if i % 20 == 0: env.reset() exploration_policy.reset() for _ in range(10): action = exploration_policy.get_action()[0] env.wrapped_env.step(action) img = env._get_flat_img() dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) env._wrapped_env.min_y_pos = .5 info['env'] = env else: now = time.time() env = SawyerDoorPushOpenEnv(max_angle=.5) env = ImageEnv( env, imsize, transpose=True, init_camera=sawyer_door_env_camera, normalize=True, ) info['env'] = env policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): if i % 100 == 0: env.reset() exploration_policy.reset() for _ in range(25): # env.wrapped_env.step( # env.wrapped_env.action_space.sample() # ) action = exploration_policy.get_action()[0] env.wrapped_env.step(action) goal = env.sample_goal_for_rollout() env.set_to_goal(goal) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info