def test_mnist_single_threaded() -> None: config = storage.LFSConfigurations(storage_dir_path="/tmp/") lfs_storage = storage.LFSStorage(configurations=config) dataset_id = "mnist" dataset_version = "1" util.cleanup_lfs_storage(configurations=config, dataset_id=dataset_id, dataset_version=dataset_version) @lfs_storage.cacheable(dataset_id=dataset_id, dataset_version=dataset_version) def make_dataset() -> dataref.LMDBDataRef: return util.make_mnist_test_dataset() # type: ignore stream_from_cache = make_dataset().stream() dataset_from_stream = tensorflow.make_tf_dataset(stream_from_cache) original_dataset = util.make_mnist_test_dataset() data_samples = util.compare_datasets(original_dataset, dataset_from_stream) assert data_samples == 10000 assert stream_from_cache.length == data_samples util.cleanup_lfs_storage(configurations=config, dataset_id=dataset_id, dataset_version=dataset_version)
def test_storage_cacheable_single_threaded() -> None: original_range_size = 120 updated_range_size = 126 dataset_id = "range-dataset" dataset_version = "1" configurations = create_configurations() if get_cache_filepath(configurations, dataset_id, dataset_version).exists(): get_cache_filepath(configurations, dataset_id, dataset_version).unlink() lfs_storage = storage.LFSStorage(configurations=configurations) @lfs_storage.cacheable(dataset_id, dataset_version) def make_dataref(range_size: int) -> dataref.LMDBDataRef: return tf.data.Dataset.range(range_size) # type: ignore original_data_stream = make_dataref( range_size=original_range_size).stream() assert original_data_stream.length == original_range_size data_generator = original_data_stream.iterator_fn() for idx in range(original_range_size): assert idx == next(data_generator) updated_data_stream = make_dataref(range_size=updated_range_size).stream() assert updated_data_stream.length == original_range_size
def compare_performance_tf_record_dataset(data_dir: pathlib.Path) -> None: config = storage.LFSConfigurations(storage_dir_path="/tmp/") lfs_storage = storage.LFSStorage(configurations=config) dataset_id = "imagenet-train" dataset_version = "0" training = True cleanup_lfs_storage(configurations=config, dataset_id=dataset_id, dataset_version=dataset_version) @lfs_storage.cacheable(dataset_id=dataset_id, dataset_version=dataset_version) def make_dataset() -> dataref.LMDBDataRef: return make_dataset_from_tf_records(data_dir=data_dir, training=training) # type: ignore cache_creation_start_time = time.time() stream_from_cache = make_dataset().stream() cache_creation_time = time.time() - cache_creation_start_time print(f"Cache creation took: {cache_creation_time} seconds.") dataset_from_stream = tensorflow.make_tf_dataset(stream_from_cache) cache_read_time, cache_data_items = read_dataset( dataset=dataset_from_stream) print(f"Cache read took: {cache_read_time} seconds.") original_dataset_read_time, original_data_items = read_dataset( dataset=make_dataset_from_tf_records(data_dir=data_dir, training=training)) print(f"Original read took: {original_dataset_read_time} seconds.") assert cache_data_items == original_data_items
def test_storage_submit() -> None: range_size = 10 dataset_id = "range-dataset" dataset_version = "0" dataset = tf.data.Dataset.range(range_size) configurations = create_configurations() if get_cache_filepath(configurations, dataset_id, dataset_version).exists(): get_cache_filepath(configurations, dataset_id, dataset_version).unlink() lfs_storage = storage.LFSStorage(configurations=configurations) lfs_storage.submit(data=dataset, dataset_id=dataset_id, dataset_version=dataset_version) assert get_cache_filepath(configurations, dataset_id, dataset_version).is_file()
def _configure_storage(self) -> None: session_config = None # type: Optional[tf.compat.v1.ConfigProto] if self._hvd_config.use: # For multi-GPU training, we map processes to individual GPUs. TF requires # that for each instantiation of `tf.Session`, the process is mapped # to the same GPU. session_config = tf.compat.v1.ConfigProto() session_config.gpu_options.visible_device_list = str( hvd.local_rank()) scheme = "wss" if self._env.use_tls else "ws" rw_coordinator_url = ( f"{scheme}://{self._env.master_addr}:{self._env.master_port}/ws/data-layer/" ) data_layer_type = self._env.experiment_config.get_data_layer_type() if data_layer_type == StorageTypes.SHARED_FS.value: local_cache_dir_path = self._env.experiment_config[ "data_layer"].get("container_storage_path") local_cache_path = init_container_storage_path( configured_storage_path=local_cache_dir_path) storage_config = storage.LFSConfigurations( storage_dir_path=str(local_cache_path)) self._storage = storage.LFSStorage( storage_config, tensorflow_config=session_config) elif data_layer_type == StorageTypes.S3.value: local_cache_dir_path = self._env.experiment_config[ "data_layer"].get("local_cache_container_path") local_cache_path = init_container_storage_path( configured_storage_path=local_cache_dir_path) storage_config = storage.S3Configurations( bucket=self._env.experiment_config["data_layer"]["bucket"], bucket_directory_path=self._env.experiment_config["data_layer"] ["bucket_directory_path"], url=rw_coordinator_url, local_cache_dir=str(local_cache_path), access_key=self._env.experiment_config["data_layer"].get( "access_key"), secret_key=self._env.experiment_config["data_layer"].get( "secret_key"), endpoint_url=self._env.experiment_config["data_layer"].get( "endpoint_url"), coordinator_cert_file=self._env.master_cert_file, coordinator_cert_name=self._env.master_cert_name, ) self._storage = storage.S3Storage(storage_config, tensorflow_config=session_config) elif data_layer_type == StorageTypes.GCS.value: local_cache_dir_path = self._env.experiment_config[ "data_layer"].get("local_cache_container_path") local_cache_path = init_container_storage_path( configured_storage_path=local_cache_dir_path) storage_config = storage.GCSConfigurations( bucket=self._env.experiment_config["data_layer"]["bucket"], bucket_directory_path=self._env.experiment_config["data_layer"] ["bucket_directory_path"], url=rw_coordinator_url, local_cache_dir=str(local_cache_path), coordinator_cert_file=self._env.master_cert_file, coordinator_cert_name=self._env.master_cert_name, ) self._storage = storage.GCSStorage( storage_config, tensorflow_config=session_config) else: raise AssertionError( "Please select a supported data_layer type. Supported types include: " f"{[i.value for i in StorageTypes]}")