def __init__(self, extract_dir=None, manual_dir=None, download_mode=None, compute_stats=True, max_examples_per_split=None): """Constructs a `DownloadConfig`. Args: extract_dir: `str`, directory where extracted files are stored. Defaults to "<download_dir>/extracted". manual_dir: `str`, read-only directory where manually downloaded/extracted data is stored. Defaults to "<download_dir>/manual". download_mode: `tfds.GenerateMode`, how to deal with downloads or data that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will reuse both downloads and data if it already exists. compute_stats: `bool`, whether to compute statistics over the generated data. max_examples_per_split: `int`, optional max number of examples to write into each split. """ self.extract_dir = extract_dir self.manual_dir = manual_dir self.download_mode = util.GenerateMode( download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS) self.compute_stats = compute_stats self.max_examples_per_split = max_examples_per_split
def __init__( self, extract_dir=None, manual_dir=None, download_mode=None, compute_stats=None, max_examples_per_split=None, register_checksums=False, force_checksums_validation=False, beam_runner=None, beam_options=None, try_download_gcs=True, verify_ssl=True, ): """Constructs a `DownloadConfig`. Args: extract_dir: `str`, directory where extracted files are stored. Defaults to "<download_dir>/extracted". manual_dir: `str`, read-only directory where manually downloaded/extracted data is stored. Defaults to `<download_dir>/manual`. download_mode: `tfds.GenerateMode`, how to deal with downloads or data that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will reuse both downloads and data if it already exists. compute_stats: `tfds.download.ComputeStats`, whether to compute statistics over the generated data. Defaults to `AUTO`. max_examples_per_split: `int`, optional max number of examples to write into each split (used for testing). register_checksums: `bool`, defaults to False. If True, checksum of downloaded files are recorded. force_checksums_validation: `bool`, defaults to False. If True, raises an error if an URL do not have checksums. beam_runner: Runner to pass to `beam.Pipeline`, only used for datasets based on Beam for the generation. beam_options: `PipelineOptions` to pass to `beam.Pipeline`, only used for datasets based on Beam for the generation. try_download_gcs: `bool`, defaults to True. If True, prepared dataset will be downloaded from GCS, when available. If False, dataset will be downloaded and prepared from scratch. verify_ssl: `bool`, defaults to True. If True, will verify certificate when downloading dataset. """ self.extract_dir = extract_dir self.manual_dir = manual_dir self.download_mode = util.GenerateMode( download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS) self.compute_stats = util.ComputeStatsMode( compute_stats or util.ComputeStatsMode.SKIP) self.max_examples_per_split = max_examples_per_split self.register_checksums = register_checksums self.force_checksums_validation = force_checksums_validation self.beam_runner = beam_runner self.beam_options = beam_options self.try_download_gcs = try_download_gcs self.verify_ssl = verify_ssl
def __init__(self, cache_dir, mode=None): """Download manager constructor. Args: cache_dir (str): Cache directory where all downloads, extractions and other artifacts are stored. Defaults to "~/tensorflow_datasets/tmp". mode (GenerateMode): Mode to FORCE_REDOWNLOAD, REUSE_CACHE_IF_EXISTS or REUSE_DATASET_IF_EXISTS. Default to REUSE_DATASET_IF_EXISTS. """ self._cache_dir = os.path.expanduser(cache_dir or constants.CACHE_DIR) self._backend = local_backend.LocalBackend() # The generation mode to indicates if we re-use the cached download or # force re-downloading data. mode = mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS self._mode = util.GenerateMode(mode) # str to Enum # Create the root directory if not exists yet gfile.MakeDirs(self._cache_dir)
def __init__(self, extract_dir=None, manual_dir=None, download_mode=None, compute_stats=None, max_examples_per_split=None, register_checksums=False, beam_runner=None, beam_options=None): """Constructs a `DownloadConfig`. Args: extract_dir: `str`, directory where extracted files are stored. Defaults to "<download_dir>/extracted". manual_dir: `str`, read-only directory where manually downloaded/extracted data is stored. Defaults to "<download_dir>/manual". download_mode: `tfds.GenerateMode`, how to deal with downloads or data that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will reuse both downloads and data if it already exists. compute_stats: `tfds.download.ComputeStats`, whether to compute statistics over the generated data. Defaults to `AUTO`. max_examples_per_split: `int`, optional max number of examples to write into each split. register_checksums: `bool`, defaults to False. If True, checksum of downloaded files are recorded. beam_runner: Runner to pass to `beam.Pipeline`, only used for datasets based on Beam for the generation. beam_options: `PipelineOptions` to pass to `beam.Pipeline`, only used for datasets based on Beam for the generation. """ self.extract_dir = extract_dir self.manual_dir = manual_dir self.download_mode = util.GenerateMode( download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS) self.compute_stats = util.ComputeStatsMode( compute_stats or util.ComputeStatsMode.AUTO) self.max_examples_per_split = max_examples_per_split self.register_checksums = register_checksums self.beam_runner = beam_runner self.beam_options = beam_options