Exemplo n.º 1
0
    def _as_dataset(self,
                    split=splits_lib.Split.TRAIN,
                    decoders=None,
                    shuffle_files=False):

        if self.version.implements(utils.Experiment.S3):
            dataset = self._tfrecords_reader.read(self.name, split,
                                                  self.info.splits.values(),
                                                  shuffle_files)
        else:
            # Resolve all the named split tree by real ones
            read_instruction = split.get_read_instruction(self.info.splits)
            # Extract the list of SlicedSplitInfo objects containing the splits
            # to use and their associated slice
            list_sliced_split_info = read_instruction.get_list_sliced_split_info(
            )
            # Resolve the SlicedSplitInfo objects into a list of
            # {'filepath': 'path/to/data-00032-00100', 'mask': [True, False, ...]}
            instruction_dicts = self._slice_split_info_to_instruction_dicts(
                list_sliced_split_info)

            # Load the dataset
            dataset = dataset_utils.build_dataset(
                instruction_dicts=instruction_dicts,
                dataset_from_file_fn=self._file_format_adapter.
                dataset_from_filename,
                shuffle_files=shuffle_files,
            )

        decode_fn = functools.partial(self.info.features.decode_example,
                                      decoders=decoders)
        dataset = dataset.map(decode_fn,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE)
        return dataset
Exemplo n.º 2
0
 def _as_dataset(self, split=Split.TRAIN, shuffle_files=None):
   return dataset_utils.build_dataset(
       filepattern=self._split_files(num_shards=None, split=split).filepattern,
       dataset_from_file_fn=self._file_format_adapter.dataset_from_filename,
       process_fn=self._preprocess,
       shuffle_files=(
           split == Split.TRAIN if shuffle_files is None else shuffle_files))
Exemplo n.º 3
0
 def _as_dataset(self, split=Split.TRAIN, shuffle_files=None):
   tf_data = dataset_utils.build_dataset(
       filepattern=self._split_files(num_shards=None, split=split).filepattern,
       dataset_from_file_fn=self._file_format_adapter.dataset_from_filename,
       shuffle_files=(
           split == Split.TRAIN if shuffle_files is None else shuffle_files))
   tf_data = tf_data.map(self.info.specs.decode_sample)
   return tf_data
Exemplo n.º 4
0
    def _as_dataset(self, split=splits.Split.TRAIN, shuffle_files=None):
        # Automatically activate shuffling if training
        should_shuffle = shuffle_files
        if shuffle_files is None:
            should_shuffle = split == splits.Split.TRAIN

        # Compute filenames from the given split
        # TODO(epot): Implement synthetic splits
        filenames = self._build_split_filenames(
            data_dir=self._data_dir,
            split_info_list=[self.info.splits[split]],
        )

        # Load the dataset
        tf_data = dataset_utils.build_dataset(
            filepattern=filenames,
            dataset_from_file_fn=self._file_format_adapter.
            dataset_from_filename,
            shuffle_files=should_shuffle,
        )
        tf_data = tf_data.map(self.info.specs.decode_sample)
        return tf_data