def generate_vae_dataset(variant): env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu info = {} if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
self.walls = [ HorizontalWall( self.ball_radius, self.inner_wall_max_dist, -self.inner_wall_max_dist, self.inner_wall_max_dist, ) ] if wall_shape == "--": self.walls = [ HorizontalWall( self.ball_radius, 0, -self.inner_wall_max_dist, self.inner_wall_max_dist, ) ] if __name__ == "__main__": # e = Point2DEnv() import matplotlib.pyplot as plt # e = Point2DWallEnv("-", render_size=84) e = ImageEnv(Point2DWallEnv(wall_shape="u", render_size=84)) for i in range(10): e.reset() for j in range(50): e.step(np.random.rand(2)) e.render() im = e.get_image()
right_x=6, thickness=0.5 ) ] if __name__ == "__main__": # e = Point2DEnv() import matplotlib.pyplot as plt import time # e = Point2DWallEnv("-", render_size=84) # e = ImageEnv(Point2DWallEnv(wall_shape="--", render_size=84)) e = ImageEnv(Point2DWallEnv(wall_shape='maze', render_size=256, ball_radius=0.25, boundary_dist=10, action_limit=0.25, fixed_goal=False, randomize_position_on_reset=False, target_radius=0, initial_position=(-8, -8)), imsize=256) for i in range(10): e.reset() for j in range(50): # e.step(np.random.rand(2) - 0.5) e.step(np.array([1, 0])) e.render() im = e.get_image() time.sleep(0.1)
def generate_vae_dataset( N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, dataset_path=None, action_space_sampling=False, init_camera=None, env_class=None, env_kwargs=None, ): filename = "/tmp/sawyer_xyz_pos_control_new_zoom_cam" + str(N) + '.npy' info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_kwargs == None: env_kwargs = dict() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, transpose=True, init_camera=init_camera, normalize=True, ) dataset = np.zeros((N, imsize * imsize * 3), dtype=np.uint8) if action_space_sampling: action_space = Box(np.array([-.1, .5, 0]), np.array([.1, .7, .5])) for i in range(N): env.set_to_goal(env.sample_goal()) img = env._get_flat_img() dataset[i, :] = unormalize_image(img) if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) info['env'] = env else: policy = RandomPolicy(env.action_space) es = OUStrategy(action_space=env.action_space, theta=0) exploration_policy = PolicyWrappedWithExplorationStrategy( exploration_strategy=es, policy=policy, ) for i in range(N): # Move the goal out of the image env.wrapped_env.set_goal(np.array([100, 100, 100])) if i % 50 == 0: print('Reset') env.reset() exploration_policy.reset() for _ in range(1): action = exploration_policy.get_action()[0] * 10 env.wrapped_env.step(action) img = env.step(env.action_space.sample())[0] dataset[i, :] = img if show: cv2.imshow('img', img.reshape(3, 84, 84).transpose()) cv2.waitKey(1) print(i) print("done making training data", time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
# env_name = 'SawyerDoorHookResetFreeEnv-v1' env_name = 'SawyerPushHurdle-v0' # env_name = 'SawyerPushNIPSFull-v0' # env_name = 'SawyerPushNIPSEasy-v0' # env_name = 'SawyerPushHurdleResetFreeEnv-v0' multiworld.register_all_envs() imsize = 48 # presampled_goals_path = 'data/local/goals/SawyerDoorHookResetFreeEnv-v1-goal.npy' # presampled_goals_path = 'data/local/goals/SawyerPickupEnvYZEasy-v0-goal-500.npy' # presampled_goals = np.load(presampled_goals_path, allow_pickle=True).item() env = ImageEnv(env, imsize, init_camera=sawyer_init_camera_zoomed_in, transpose=True, normalize=True, presampled_goals=None) print(env.action_space.low) print(env.action_space.high) for i in range(50): o = env.reset() for t in range(50): print(t) action = env.action_space.sample() s, r, _, _ = env.step(action) img = s['image_observation'] show_obs(img, imsize=imsize, name=env_name)
def generate_vae_dataset(variant): print(variant) from tqdm import tqdm env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) batch_size = variant.get('batch_size', 128) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) augment_data = variant.get('augment_data', False) data_filter_fn = variant.get('data_filter_fn', lambda x: x) delete_after_loading = variant.get('delete_after_loading', False) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_rollout_data_set_to_goal = variant.get( 'random_rollout_data_set_to_goal', True) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) conditional_vae_dataset = variant.get('conditional_vae_dataset', False) use_env_labels = variant.get('use_env_labels', False) use_linear_dynamics = variant.get('use_linear_dynamics', False) enviorment_dataset = variant.get('enviorment_dataset', False) save_trajectories = variant.get('save_trajectories', False) save_trajectories = save_trajectories or use_linear_dynamics or conditional_vae_dataset tag = variant.get('tag', '') assert N % n_random_steps == 0, "Fix N/horizon or dataset generation will fail" from multiworld.core.image_env import ImageEnv, unormalize_image import rlkit.torch.pytorch_util as ptu from rlkit.util.io import load_local_or_remote_file from rlkit.data_management.dataset import ( TrajectoryDataset, ImageObservationDataset, InitialObservationDataset, EnvironmentDataset, ConditionalDynamicsDataset, InitialObservationNumpyDataset, InfiniteBatchLoader, InitialObservationNumpyJitteringDataset) info = {} use_test_dataset = False if dataset_path is not None: if type(dataset_path) == str: dataset = load_local_or_remote_file( dataset_path, delete_after_loading=delete_after_loading) dataset = dataset.item() N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, list): dataset = concatenate_datasets(dataset_path) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] if isinstance(dataset_path, dict): if type(dataset_path['train']) == str: dataset = load_local_or_remote_file( dataset_path['train'], delete_after_loading=delete_after_loading) dataset = dataset.item() elif isinstance(dataset_path['train'], list): dataset = concatenate_datasets(dataset_path['train']) if type(dataset_path['test']) == str: test_dataset = load_local_or_remote_file( dataset_path['test'], delete_after_loading=delete_after_loading) test_dataset = test_dataset.item() elif isinstance(dataset_path['test'], list): test_dataset = concatenate_datasets(dataset_path['test']) N = dataset['observations'].shape[0] * dataset[ 'observations'].shape[1] n_random_steps = dataset['observations'].shape[1] use_test_dataset = True else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera and hasattr(init_camera, '__name__') else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = load_local_or_remote_file( filename, delete_after_loading=delete_after_loading) if conditional_vae_dataset: dataset = dataset.item() print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) if save_trajectories: dataset = { 'observations': np.zeros((N // n_random_steps, n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), 'actions': np.zeros((N // n_random_steps, n_random_steps, env.action_space.shape[0]), dtype=np.float), 'env': np.zeros( (N // n_random_steps, imsize * imsize * num_channels), dtype=np.uint8), } else: dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) labels = [] for i in tqdm(range(N)): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif random_rollout_data: #ADD DATA WHERE JUST PUCK MOVES if i % n_random_steps == 0: env.reset() policy.reset() env_img = env._get_obs()['image_observation'] if random_rollout_data_set_to_goal: env.set_to_goal(env.get_goal()) obs = env._get_obs() u = policy.get_action_from_raw_action( env.action_space.sample()) env.step(u) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] if use_env_labels: labels.append(obs['label']) if save_trajectories: dataset['observations'][ i // n_random_steps, i % n_random_steps, :] = unormalize_image(img) dataset['actions'][i // n_random_steps, i % n_random_steps, :] = u dataset['env'][i // n_random_steps, :] = unormalize_image( env_img) else: dataset[i, :] = unormalize_image(img) if show: img = img.reshape(3, imsize, imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) #np.save(filename[:-4] + 'labels.npy', np.array(labels)) info['train_labels'] = [] info['test_labels'] = [] dataset = data_filter_fn(dataset) if use_linear_dynamics and conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :], 'env': dataset['env'][:n, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :], 'env': dataset['env'][n:, :] }) num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] try: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], 'env': dataset['env'][test_i, :] }) except: train_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][train_i, :, :], 'actions': dataset['actions'][train_i, :, :], }) test_dataset = ConditionalDynamicsDataset({ 'observations': dataset['observations'][test_i, :, :], 'actions': dataset['actions'][test_i, :, :], }) elif use_linear_dynamics: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) train_dataset = TrajectoryDataset({ 'observations': dataset['observations'][:n, :, :], 'actions': dataset['actions'][:n, :, :] }) test_dataset = TrajectoryDataset({ 'observations': dataset['observations'][n:, :, :], 'actions': dataset['actions'][n:, :, :] }) elif enviorment_dataset: n = int(n_random_steps * test_p) train_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, :n, :], }) test_dataset = EnvironmentDataset({ 'observations': dataset['observations'][:, n:, :], }) elif conditional_vae_dataset: num_trajectories = N // n_random_steps n = int(num_trajectories * test_p) indices = np.arange(num_trajectories) np.random.shuffle(indices) train_i, test_i = indices[:n], indices[n:] if augment_data: dataset_class = InitialObservationNumpyJitteringDataset else: dataset_class = InitialObservationNumpyDataset if 'env' not in dataset: dataset['env'] = dataset['observations'][:, 0] if use_test_dataset and ('env' not in test_dataset): test_dataset['env'] = test_dataset['observations'][:, 0] if use_test_dataset: train_dataset = dataset_class({ 'observations': dataset['observations'], 'env': dataset['env'] }) test_dataset = dataset_class({ 'observations': test_dataset['observations'], 'env': test_dataset['env'] }) else: train_dataset = dataset_class({ 'observations': dataset['observations'][train_i, :, :], 'env': dataset['env'][train_i, :] }) test_dataset = dataset_class({ 'observations': dataset['observations'][test_i, :, :], 'env': dataset['env'][test_i, :] }) train_batch_loader_kwargs = variant.get( 'train_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) test_batch_loader_kwargs = variant.get( 'test_batch_loader_kwargs', dict( batch_size=batch_size, num_workers=0, )) train_data_loader = data.DataLoader(train_dataset, shuffle=True, drop_last=True, **train_batch_loader_kwargs) test_data_loader = data.DataLoader(test_dataset, shuffle=True, drop_last=True, **test_batch_loader_kwargs) train_dataset = InfiniteBatchLoader(train_data_loader) test_dataset = InfiniteBatchLoader(test_data_loader) else: n = int(N * test_p) train_dataset = ImageObservationDataset(dataset[:n, :]) test_dataset = ImageObservationDataset(dataset[n:, :]) return train_dataset, test_dataset, info
multiworld.register_all_envs() #env = gym.make('SawyerPickupMultiobj-v0') #env = gym.make('SawyerPickupWideEnv-v0') env = gym.make('SawyerMultiObj-v0') #env = gym.make('SawyerPushNIPS-v0' env = ImageEnv( env, imsize=imsize, init_camera=sawyer_pusher_camera_upright_v3, transpose=True, normalize=True, ) i = 0 env.reset() for j in range(0, 2000000): action = np.array( [random.random() - .5, random.random() - .5, random.random() - .5]) obs = env.step(action)[0]['image_observation'] obs_img = 255 * obs.reshape(3, 480, 480).transpose() cv2.imwrite('/home/lab/imgs/obs' + str(j) + '.png', obs_img[..., ::-1]) #goal = env.sample_goal() #obs_img = 255*goal['desired_goal'].reshape(3, 480, 480).transpose() #cv2.imwrite('a.png', obs_img) #cv2.imshow('window', obs_img) #cv2.waitKey()
def generate_vae_dataset( env_class, N=10000, test_p=0.9, use_cached=True, imsize=84, show=False, init_camera=sawyer_init_camera_zoomed_in, dataset_path=None, env_kwargs=None, oracle_dataset=False, n_random_steps=100, ): if env_kwargs is None: env_kwargs = {} filename = "/tmp/{}_{}_{}_oracle{}.npy".format( env_class.__name__, str(N), init_camera.__name__, oracle_dataset, ) info = {} if dataset_path is not None: filename = local_path_from_s3_or_local_path(dataset_path) dataset = np.load(filename) N = dataset.shape[0] elif use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() env = env_class(**env_kwargs) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.reset() info['env'] = env dataset = np.zeros((N, imsize * imsize * 3)) for i in range(N): if oracle_dataset: goal = env.sample_goal() env.set_to_goal(goal) else: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] obs = env.step(env.action_space.sample())[0] img = obs['image_observation'] dataset[i, :] = img if show: img = img.reshape(3, 84, 84).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) # radius = input('waiting...') print("done making training data", filename, time.time() - now) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_vae_dataset(variant): """ If not provided a pre-train vae dataset generation function, this function will be used to collect the dataset for training vae. """ import rlkit.torch.pytorch_util as ptu import gym import multiworld multiworld.register_all_envs() print("generating vae dataset with original images") env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) test_p = variant.get('test_p', 0.9) use_cached = variant.get('use_cached', True) imsize = variant.get('imsize', 84) num_channels = variant.get('num_channels', 3) show = variant.get('show', False) init_camera = variant.get('init_camera', None) dataset_path = variant.get('dataset_path', None) oracle_dataset_using_set_to_goal = variant.get( 'oracle_dataset_using_set_to_goal', False) random_rollout_data = variant.get('random_rollout_data', False) random_and_oracle_policy_data = variant.get( 'random_and_oracle_policy_data', False) random_and_oracle_policy_data_split = variant.get( 'random_and_oracle_policy_data_split', 0) policy_file = variant.get('policy_file', None) n_random_steps = variant.get('n_random_steps', 100) vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) save_file_prefix = variant.get('save_file_prefix', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) tag = variant.get('tag', '') info = {} if dataset_path is not None: print('load vae training dataset from: ', dataset_path) pjhome = os.environ['PJHOME'] dataset = np.load(osp.join(pjhome, dataset_path), allow_pickle=True).item() if isinstance(dataset, dict): dataset = dataset['image_desired_goal'] dataset = unormalize_image(dataset) N = dataset.shape[0] else: if env_kwargs is None: env_kwargs = {} if save_file_prefix is None: save_file_prefix = env_id if save_file_prefix is None: save_file_prefix = env_class.__name__ filename = "/tmp/{}_N{}_{}_imsize{}_random_oracle_split_{}{}.npy".format( save_file_prefix, str(N), init_camera.__name__ if init_camera else '', imsize, random_and_oracle_policy_data_split, tag, ) if use_cached and osp.isfile(filename): dataset = np.load(filename) print("loaded data from saved file", filename) else: now = time.time() if env_id is not None: import gym import multiworld multiworld.register_all_envs() env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if random_and_oracle_policy_data: policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) if random_rollout_data: from rlkit.exploration_strategies.ou_strategy import OUStrategy policy = OUStrategy(env.action_space) dataset = np.zeros((N, imsize * imsize * num_channels), dtype=np.uint8) for i in range(N): if random_and_oracle_policy_data: num_random_steps = int(N * random_and_oracle_policy_data_split) if i < num_random_steps: env.reset() for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] else: obs = env.reset() policy.reset() for _ in range(n_random_steps): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) elif oracle_dataset_using_set_to_goal: print(i) goal = env.sample_goal() env.set_to_goal(goal) obs = env._get_obs() elif random_rollout_data: if i % n_random_steps == 0: g = dict( state_desired_goal=env.sample_goal_for_rollout()) env.set_to_goal(g) policy.reset() # env.reset() u = policy.get_action_from_raw_action( env.action_space.sample()) obs = env.step(u)[0] else: print("using totally random rollouts") for _ in range(n_random_steps): obs = env.step(env.action_space.sample())[0] img = obs[ 'image_observation'] # NOTE yufei: this is already normalized image, of detype np.float64. dataset[i, :] = unormalize_image(img) np.save(filename, dataset) n = int(N * test_p) train_dataset = dataset[:n, :] test_dataset = dataset[n:, :] return train_dataset, test_dataset, info
def generate_uniform_dataset_door(num_imgs=1000, use_cached_dataset=False, init_camera=None, imsize=48, policy_file=None, show=False, path_length=100, save_file_prefix=None, env_id=None, tag='', dataset_path=None): if dataset_path is not None: dataset = load_local_or_remote_file(dataset_path) return dataset import gym from gym.envs import registration # trigger registration import multiworld.envs.pygame import multiworld.envs.mujoco env = gym.make(env_id) env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, ) env.non_presampled_goal_img_is_garbage = True if save_file_prefix is None and env_id is not None: save_file_prefix = env_id filename = "/tmp/{}_N{}_imsize{}uniform_images_{}.npy".format( save_file_prefix, str(num_imgs), env.imsize, tag, ) if use_cached_dataset and osp.isfile(filename): images = np.load(filename) print("Loaded data from {}".format(filename)) return images policy_file = load_local_or_remote_file(policy_file) policy = policy_file['policy'] policy.to(ptu.device) print('Sampling Uniform Dataset') dataset = np.zeros((num_imgs, 3 * env.imsize**2), dtype=np.uint8) for j in range(num_imgs): obs = env.reset() policy.reset() for i in range(path_length): policy_obs = np.hstack(( obs['state_observation'], obs['state_desired_goal'], )) action, _ = policy.get_action(policy_obs) obs, _, _, _ = env.step(action) img_f = obs['image_observation'] if show: img = obs['image_observation'] img = img.reshape(3, env.imsize, env.imsize).transpose() img = img[::-1, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1) print(j) dataset[j, :] = unormalize_image(img_f) temp = env.reset_free env.reset_free = True env.reset() env.reset_free = temp np.save(filename, dataset) print("Saving file to {}".format(filename)) return dataset
def main(training_data_dir, validation_data_dir, test_data_dir, imsize): if not os.path.exists(training_data_dir): os.makedirs(training_data_dir) if not os.path.exists(validation_data_dir): os.makedirs(validation_data_dir) if not os.path.exists(test_data_dir): os.makedirs(test_data_dir) backSub = cv2.createBackgroundSubtractorMOG2(history=10) # Registering required nmultiworld environments. multiworld.register_all_envs() base_env_background = gym.make('SawyerPushHurdlePuckAndRobotInvisible-v0') env_background = ImageEnv(base_env_background, imsize=imsize, init_camera=camera, transpose=True, normalize=True) env_background.reset() base_env_hurdle = gym.make('SawyerPushHurdlePuckInvisible-v0') env_hurdle = ImageEnv(base_env_hurdle, imsize=imsize, init_camera=camera, transpose=True, normalize=True) env_hurdle.reset() # Generating training, validation and test data print("Training background subctractor") for i in range(10): action = env_background.action_space.sample() next_obs, reward, done, info = env_background.step(action) image = next_obs['observation'] image = unnormalize_image(image) bg_mask = backSub.apply(image, learningRate=-1) print("Generating training data") for i in range(1000): action = env_hurdle.action_space.sample() next_obs, reward, done, info = env_hurdle.step(action) image = next_obs['observation'] image = unnormalize_image(image) fg_mask = backSub.apply(image, learningRate=0) fg_mask = fg_mask / fg_mask.max() cv2.imwrite(training_data_dir + "/rgb_" + str(i) + ".jpg", image) cv2.imwrite(training_data_dir + "/mask_" + str(i) + ".jpg", fg_mask) print("Generating validation data") for i in range(200): action = env_hurdle.action_space.sample() next_obs, reward, done, info = env_hurdle.step(action) image = next_obs['observation'] image = unnormalize_image(image) fg_mask = backSub.apply(image, learningRate=0) fg_mask = fg_mask / fg_mask.max() cv2.imwrite(validation_data_dir + "/rgb_" + str(i) + ".jpg", image) cv2.imwrite(validation_data_dir + "/mask_" + str(i) + ".jpg", fg_mask) print("Generating testing data") for i in range(200): action = env_hurdle.action_space.sample() next_obs, reward, done, info = env_hurdle.step(action) image = next_obs['observation'] image = unnormalize_image(image) fg_mask = backSub.apply(image, learningRate=0) fg_mask = fg_mask / fg_mask.max() cv2.imwrite(test_data_dir + "/rgb_" + str(i) + ".jpg", image) cv2.imwrite(test_data_dir + "/mask_" + str(i) + ".jpg", fg_mask) print("Completed generating data at:", training_data_dir)
def generate_vae_dataset(variant): import cv2 env_class = variant.get('env_class', None) env_kwargs = variant.get('env_kwargs', None) env_id = variant.get('env_id', None) N = variant.get('N', 10000) use_images = variant.get('use_images', True) imsize = variant.get('imsize', 84) show = variant.get('show', False) init_camera = variant.get('init_camera', None) oracle_dataset = variant.get('oracle_dataset', False) if 'n_random_steps' in variant: n_random_steps = variant['n_random_steps'] else: if oracle_dataset: n_random_steps = 3 else: n_random_steps = 100 vae_dataset_specific_env_kwargs = variant.get( 'vae_dataset_specific_env_kwargs', None) non_presampled_goal_img_is_garbage = variant.get( 'non_presampled_goal_img_is_garbage', None) from multiworld.core.image_env import ImageEnv, unormalize_image info = {} from railrl.core import logger logdir = logger.get_snapshot_dir() filename = osp.join(logdir, "vae_dataset.npy") now = time.time() if env_id is not None: import gym env = gym.make(env_id) else: if vae_dataset_specific_env_kwargs is None: vae_dataset_specific_env_kwargs = {} for key, val in env_kwargs.items(): if key not in vae_dataset_specific_env_kwargs: vae_dataset_specific_env_kwargs[key] = val env = env_class(**vae_dataset_specific_env_kwargs) if not isinstance(env, ImageEnv): env = ImageEnv( env, imsize, init_camera=init_camera, transpose=True, normalize=True, non_presampled_goal_img_is_garbage= non_presampled_goal_img_is_garbage, ) else: imsize = env.imsize env.non_presampled_goal_img_is_garbage = non_presampled_goal_img_is_garbage env.reset() info['env'] = env if use_images: data_size = len(env.observation_space.spaces['image_observation'].low) dtype = np.uint8 else: data_size = len(env.observation_space.spaces['state_observation'].low) dtype = np.float32 state_size = len(env.observation_space.spaces['state_observation'].low) dataset = { 'obs': np.zeros((N, data_size), dtype=dtype), 'actions': np.zeros((N, len(env.action_space.low)), dtype=np.float32), 'next_obs': np.zeros((N, data_size), dtype=dtype), 'obs_state': np.zeros((N, state_size), dtype=np.float32), 'next_obs_state': np.zeros((N, state_size), dtype=np.float32), } for i in range(N): if i % (N / 50) == 0: print(i) if oracle_dataset: if i % 100 == 0: env.reset() goal = env.sample_goal() env.set_to_goal(goal) for _ in range(n_random_steps): env.step(env.action_space.sample()) else: env.reset() for _ in range(n_random_steps): env.step(env.action_space.sample()) obs = env._get_obs() if use_images: dataset['obs'][i, :] = unormalize_image(obs['image_observation']) else: dataset['obs'][i, :] = obs['state_observation'] dataset['obs_state'][i, :] = obs['state_observation'] action = env.action_space.sample() dataset['actions'][i, :] = action obs = env.step(action)[0] img = obs['image_observation'] if use_images: dataset['next_obs'][i, :] = unormalize_image(img) else: dataset['next_obs'][i, :] = obs['state_observation'] dataset['next_obs_state'][i, :] = obs['state_observation'] show = True #todo del own if show: img = img.reshape(3, imsize, imsize).transpose((1, 2, 0)) img = img[::, :, ::-1] cv2.imshow('img', img) cv2.waitKey(1000) print("keys and shapes:") for k in dataset.keys(): print(k, dataset[k].shape) print("done making training data", filename, time.time() - now) np.save(filename, dataset)