Пример #1
0
    def _test_checksums(self):
        # If no call to `dl_manager.download`, then no need to check url presence.
        if not self._download_urls:
            return

        err_msg = ("If you are developping outside TFDS and want to opt-out, "
                   "please add `SKIP_CHECKSUMS = True` to the "
                   "`DatasetBuilderTestCase`")

        url_infos = self.DATASET_CLASS.url_infos
        if url_infos is None:
            filepath = os.path.join(checksums._get_path(self.builder.name))  # pylint: disable=protected-access
            with utils.try_reraise(suffix=err_msg):
                url_infos = checksums._get_url_infos(filepath)  # pylint: disable=protected-access
        else:
            # TODO(tfds): Improve doc for dataset-as-folder (and remove
            # try_reraise above)
            filepath = str(self.DATASET_CLASS.code_path.parent /
                           "checksums.tsv")

        missing_urls = self._download_urls - set(url_infos.keys())
        self.assertEmpty(
            missing_urls, "Some urls checksums are missing at: {} "
            "Did you forget to record checksums with `--register_checksums` ? "
            "See instructions at: "
            "https://www.tensorflow.org/datasets/add_dataset#2_run_download_and_prepare_locally"
            "\n{}".format(filepath, err_msg))
Пример #2
0
    def from_spec(cls, spec: SplitArg) -> 'AbstractSplit':
        """Creates a ReadInstruction instance out of a string spec.

    Args:
      spec (str): split(s) + optional slice(s) to read. A slice can be
        specified, using absolute numbers (int) or percentages (int). E.g.
              `test`: test split.
              `test + validation`: test split + validation split.
              `test[10:]`: test split, minus its first 10 records.
              `test[:10%]`: first 10% records of test split.
              `test[:-5%]+train[40%:60%]`: first 95% of test + middle 20% of
                train.

    Returns:
      The split instance.
    """
        if isinstance(spec, AbstractSplit):
            return spec

        spec = str(spec)  # Need to convert to str in case of `Split` instance.

        subs = _ADDITION_SEP_RE.split(spec)
        if not subs:
            raise ValueError(f'No instructions could be built out of {spec!r}')
        with utils.try_reraise(f'Error parsing split {spec!r}. See format at: '
                               'https://www.tensorflow.org/datasets/splits\n'):
            instructions = [_str_to_relative_instruction(s) for s in subs]
        # Merge all splits together (_SplitAll)
        return functools.reduce(operator.add, instructions)
Пример #3
0
    def extract_features(
        self,
        features: features_lib.FeatureConnector,
    ) -> features_lib.FeatureConnector:
        """Returns the `tfds.features.FeaturesDict`.

    Extract the subset of features

    Args:
      features: Features on which extract the sub-set

    Returns:
      features_subset: A subset of the features
    """
        with utils.try_reraise(
                'Provided PartialDecoding specs does not match actual features: '
        ):

            # Convert non-features into features
            expected_feature = _normalize_feature_item(
                feature=features,
                expected_feature=self._feature_specs,
            )
            # Get the intersection of `features` and `expected_feature`
            return _extract_features(
                feature=features,
                expected_feature=features_dict.to_feature(expected_feature),
            )
Пример #4
0
def _extract_feature_item(
    feature: features_lib.FeaturesDict,
    expected_key: str,
    expected_value: features_lib.FeatureConnector,
    fn: Callable[..., Any],
) -> features_lib.FeatureConnector:
    """Calls `_extract_features(feature[key], expected_feature=value)`."""
    assert isinstance(feature, features_lib.FeaturesDict)
    if expected_key not in feature:
        raise ValueError(f'Missing expected feature {expected_key!r}.')

    with utils.try_reraise(f'In {expected_key!r}: '):
        return fn(feature=feature[expected_key],
                  expected_feature=expected_value)
Пример #5
0
    def _test_checksums(self):
        # If no call to `dl_manager.download`, then no need to check url presence.
        if not self._download_urls:
            return

        err_msg = ("If you are developping outside TFDS and want to opt-out, "
                   "please add `SKIP_CHECKSUMS = True` to the "
                   "`DatasetBuilderTestCase`")

        with utils.try_reraise(suffix=err_msg):
            filepath = os.path.join(checksums._get_path(self.builder.name))  # pylint: disable=protected-access
            sizes_checksums = checksums._get_sizes_checksums(filepath)  # pylint: disable=protected-access
            urls = sizes_checksums.keys()

        missing_urls = self._download_urls - set(urls)
        self.assertEmpty(
            missing_urls, "Some urls checksums are missing at: {} "
            "Did you forgot to record checksums with `--register_checksums` ? "
            "See instructions at: "
            "https://www.tensorflow.org/datasets/add_dataset#2_run_download_and_prepare_locally"
            "\n{}".format(filepath, err_msg))
Пример #6
0
 def serialize_single_field(k, example_data, tensor_info):
     with utils.try_reraise(
             "Error while serializing feature {} ({}): ".format(
                 k, tensor_info)):
         return _item_to_tf_feature(example_data, tensor_info)
Пример #7
0
 def build_single_spec(k, v):
     with utils.try_reraise(
             "Specification error for feature {} ({}): ".format(k, v)):
         return _to_tf_example_spec(v)
 def run_with_reraise(fn, k, example_data, tensor_info):
     with utils.try_reraise(
             "Error while serializing feature {} ({}): ".format(
                 k, tensor_info)):
         return fn(example_data, tensor_info)
Пример #9
0
 def build_single_spec(k, v):
     with utils.try_reraise(
             f"Specification error for feature {k!r} ({v}): "):
         return _to_tf_example_spec(v)