Пример #1
0
    def __init__(self,
                 extract_dir=None,
                 manual_dir=None,
                 download_mode=None,
                 compute_stats=True,
                 max_examples_per_split=None):
        """Constructs a `DownloadConfig`.

    Args:
      extract_dir: `str`, directory where extracted files are stored.
        Defaults to "<download_dir>/extracted".
      manual_dir: `str`, read-only directory where manually downloaded/extracted
        data is stored. Defaults to
        "<download_dir>/manual".
      download_mode: `tfds.GenerateMode`, how to deal with downloads or data
        that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will
        reuse both downloads and data if it already exists.
      compute_stats: `bool`, whether to compute statistics over the generated
        data.
      max_examples_per_split: `int`, optional max number of examples to write
        into each split.
    """
        self.extract_dir = extract_dir
        self.manual_dir = manual_dir
        self.download_mode = util.GenerateMode(
            download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS)
        self.compute_stats = compute_stats
        self.max_examples_per_split = max_examples_per_split
Пример #2
0
    def __init__(
        self,
        extract_dir=None,
        manual_dir=None,
        download_mode=None,
        compute_stats=None,
        max_examples_per_split=None,
        register_checksums=False,
        force_checksums_validation=False,
        beam_runner=None,
        beam_options=None,
        try_download_gcs=True,
        verify_ssl=True,
    ):
        """Constructs a `DownloadConfig`.

    Args:
      extract_dir: `str`, directory where extracted files are stored.
        Defaults to "<download_dir>/extracted".
      manual_dir: `str`, read-only directory where manually downloaded/extracted
        data is stored. Defaults to `<download_dir>/manual`.
      download_mode: `tfds.GenerateMode`, how to deal with downloads or data
        that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will
        reuse both downloads and data if it already exists.
      compute_stats: `tfds.download.ComputeStats`, whether to compute
        statistics over the generated data. Defaults to `AUTO`.
      max_examples_per_split: `int`, optional max number of examples to write
        into each split (used for testing).
      register_checksums: `bool`, defaults to False. If True, checksum of
        downloaded files are recorded.
      force_checksums_validation: `bool`, defaults to False. If True, raises
        an error if an URL do not have checksums.
      beam_runner: Runner to pass to `beam.Pipeline`, only used for datasets
        based on Beam for the generation.
      beam_options: `PipelineOptions` to pass to `beam.Pipeline`, only used for
        datasets based on Beam for the generation.
      try_download_gcs: `bool`, defaults to True. If True, prepared dataset
        will be downloaded from GCS, when available. If False, dataset will be
        downloaded and prepared from scratch.
      verify_ssl: `bool`, defaults to True. If True, will verify certificate
        when downloading dataset.
    """
        self.extract_dir = extract_dir
        self.manual_dir = manual_dir
        self.download_mode = util.GenerateMode(
            download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS)
        self.compute_stats = util.ComputeStatsMode(
            compute_stats or util.ComputeStatsMode.SKIP)
        self.max_examples_per_split = max_examples_per_split
        self.register_checksums = register_checksums
        self.force_checksums_validation = force_checksums_validation
        self.beam_runner = beam_runner
        self.beam_options = beam_options
        self.try_download_gcs = try_download_gcs
        self.verify_ssl = verify_ssl
Пример #3
0
  def __init__(self, cache_dir, mode=None):
    """Download manager constructor.

    Args:
      cache_dir (str): Cache directory where all downloads, extractions and
        other artifacts are stored. Defaults to "~/tensorflow_datasets/tmp".
      mode (GenerateMode): Mode to FORCE_REDOWNLOAD, REUSE_CACHE_IF_EXISTS or
        REUSE_DATASET_IF_EXISTS. Default to REUSE_DATASET_IF_EXISTS.
    """
    self._cache_dir = os.path.expanduser(cache_dir or constants.CACHE_DIR)
    self._backend = local_backend.LocalBackend()

    # The generation mode to indicates if we re-use the cached download or
    # force re-downloading data.
    mode = mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS
    self._mode = util.GenerateMode(mode)  # str to Enum

    # Create the root directory if not exists yet
    gfile.MakeDirs(self._cache_dir)
Пример #4
0
  def __init__(self,
               extract_dir=None,
               manual_dir=None,
               download_mode=None,
               compute_stats=None,
               max_examples_per_split=None,
               register_checksums=False,
               beam_runner=None,
               beam_options=None):
    """Constructs a `DownloadConfig`.

    Args:
      extract_dir: `str`, directory where extracted files are stored.
        Defaults to "<download_dir>/extracted".
      manual_dir: `str`, read-only directory where manually downloaded/extracted
        data is stored. Defaults to
        "<download_dir>/manual".
      download_mode: `tfds.GenerateMode`, how to deal with downloads or data
        that already exists. Defaults to `REUSE_DATASET_IF_EXISTS`, which will
        reuse both downloads and data if it already exists.
      compute_stats: `tfds.download.ComputeStats`, whether to compute
        statistics over the generated data. Defaults to `AUTO`.
      max_examples_per_split: `int`, optional max number of examples to write
        into each split.
      register_checksums: `bool`, defaults to False. If True, checksum of
        downloaded files are recorded.
      beam_runner: Runner to pass to `beam.Pipeline`, only used for datasets
        based on Beam for the generation.
      beam_options: `PipelineOptions` to pass to `beam.Pipeline`, only used for
        datasets based on Beam for the generation.
    """
    self.extract_dir = extract_dir
    self.manual_dir = manual_dir
    self.download_mode = util.GenerateMode(
        download_mode or util.GenerateMode.REUSE_DATASET_IF_EXISTS)
    self.compute_stats = util.ComputeStatsMode(
        compute_stats or util.ComputeStatsMode.AUTO)
    self.max_examples_per_split = max_examples_per_split
    self.register_checksums = register_checksums
    self.beam_runner = beam_runner
    self.beam_options = beam_options