Exemplo n.º 1
0
def _load_register_for_paths(
    namespace: str,
    paths: List[epath.Path],
) -> List[register_base.BaseRegister]:
  """Returns a list of registers for the given paths."""
  code_paths = []
  data_paths = []
  for path in paths:
    if os.fspath(path).startswith('github'):
      code_paths.append(path)
    else:
      data_paths.append(path)

  if code_paths and data_paths:
    raise RuntimeError(f'Both a path containing code ({code_paths}) and '
                       f'a path containing data ({data_paths} are specified. '
                       'This is not supported')

  registers = []
  if data_paths:
    register = register_path.DataDirRegister(
        namespace_to_data_dirs={namespace: data_paths})
    registers.append(register)
  if code_paths:
    registers.append(
        register_package.PackageRegister(
            path=gcs_utils.GCS_COMMUNITY_INDEX_PATH))
  return registers
def dummy_register():
  """Dummy register."""

  with tempfile.TemporaryDirectory() as tmp_path:
    tmp_path = pathlib.Path(tmp_path)

    source_path = utils.tfds_path() / 'testing/dummy_dataset/dummy_dataset.py'

    # Single-file dataset package (without checksums)
    src_single = dataset_sources.DatasetSource.from_json(os.fspath(source_path))

    # Multi-file dataset package (with checksums)
    src_multi = dataset_sources.DatasetSource.from_json({
        'root_path': os.fspath(source_path.parent),
        'filenames': ['checksums.tsv', 'dummy_dataset.py'],
    })
    src_multi_json = json.dumps(src_multi.to_json())  # `dict` -> `str`

    # Create the remote index content
    # Note the absence of `"` for the `src_multi_json` as it is parsed as `dict`
    content = textwrap.dedent(
        f"""\
        {{"name": "kaggle:dummy_dataset", "source": "{src_single.to_json()}"}}
        {{"name": "kaggle:ds1", "source": "{src_single.to_json()}"}}
        {{"name": "mlds:dummy_dataset", "source": {src_multi_json}}}
        """
    )
    dummy_path = tmp_path / 'dummy-community-datasets.toml'
    dummy_path.write_text(content)

    with mock_cache_path(tmp_path / 'cache'):
      yield register_package.PackageRegister(path=dummy_path)
Exemplo n.º 3
0
def dummy_register():
    """Dummy register."""

    with tempfile.TemporaryDirectory() as tmp_path:
        tmp_path = pathlib.Path(tmp_path)

        # Create the remote index content
        source_path = utils.tfds_path() / 'testing/dummy_dataset'
        source_str = os.fspath(source_path)
        content = textwrap.dedent(f"""\
        {{"name": "kaggle:ds0", "source": "{source_str}"}}
        {{"name": "kaggle:ds1", "source": "{source_str}"}}
        {{"name": "mlds:ds0", "source": "{source_str}"}}
        """)
        dummy_path = tmp_path / 'dummy-community-datasets.toml'
        dummy_path.write_text(content)

        with mock_cache_path(tmp_path / 'cache'):
            yield register_package.PackageRegister(path=dummy_path)