Exemplo n.º 1
0
  def _generate_examples(self, archive, validation_labels=None):
    """Generate corrupted imagenet validation data.

    Apply corruptions to the raw images according to self.corruption_type.

    Args:
      archive: an iterator for the raw dataset.
      validation_labels: a dict that maps the file names to imagenet labels.

    Yields:
      dictionary with the file name, an image file objective, and label of each
      imagenet validation data.
    """
    # Get the current random seeds.
    numpy_st0 = np.random.get_state()
    # Set new random seeds.
    np.random.seed(135)
    logging.warning('Overwriting cv2 RNG seed.')
    tfds.core.lazy_imports.cv2.setRNGSeed(357)

    gen_fn = super(Imagenet2012Corrupted, self)._generate_examples
    for key, example in gen_fn(archive, validation_labels):
      with tf.Graph().as_default():
        tf_img = _decode_and_center_crop(example['image'].read())
        image_np = tfds.as_numpy(tf_img)
      example['image'] = self._get_corrupted_example(image_np)

      yield key, example
    # Reset the seeds back to their original values.
    np.random.set_state(numpy_st0)
Exemplo n.º 2
0
    def _generate_examples(self, path, split):
        """Yields examples."""
        if split == "adversarial":
            key = "adv-image"
        elif split == "clean":
            key = "clean-image"
        else:
            raise ValueError(f"split {split} not in ('adversarial', 'clean')")

        def _parse(serialized_example, key):
            ds_features = {
                "height": tf.io.FixedLenFeature([], tf.int64),
                "width": tf.io.FixedLenFeature([], tf.int64),
                "label": tf.io.FixedLenFeature([], tf.int64),
                "adv-image": tf.io.FixedLenFeature([], tf.string),
                "clean-image": tf.io.FixedLenFeature([], tf.string),
            }
            example = tf.io.parse_single_example(serialized_example,
                                                 ds_features)

            img = tf.io.decode_raw(example[key], tf.float32)
            # float values are integers in [0.0, 255.0] for clean and adversarial
            img = tf.cast(img, tf.uint8)
            img = tf.reshape(img, (example["height"], example["width"], 3))
            return img, example["label"]

        ds = tf.data.TFRecordDataset(filenames=[path])
        ds = ds.map(lambda x: _parse(x, key))
        ds = ds.batch(1)
        default_graph = tf.compat.v1.keras.backend.get_session().graph
        ds = tfds.as_numpy(ds, graph=default_graph)
        for i, (img, label) in enumerate(ds):
            yield str(i), {"image": img[0], "label": label[0]}
Exemplo n.º 3
0
 def _generate_examples(self, extracted_dir, file_path):
     with tf.Graph().as_default():
         dataset = tf.contrib.data.LMDBDataset(
             os.path.join(extracted_dir, file_path, "data.mdb"))
         for i, (_, jpeg_image) in enumerate(tfds.as_numpy(dataset)):
             record = {"image": io.BytesIO(jpeg_image)}
             yield i, record
Exemplo n.º 4
0
 def _generate_examples(self, extracted_dir, file_path):
     with tf.Graph().as_default():
         path = os.path.join(extracted_dir, file_path, "data.mdb")
         dataset = _make_lmdb_dataset(path)
         for i, (_, jpeg_image) in enumerate(tfds.as_numpy(dataset)):
             record = {"image": io.BytesIO(jpeg_image)}
             yield i, record
Exemplo n.º 5
0
    def _generate_examples(self, path, is_training):
        """Yields examples."""
        cycle_len = 10 if is_training else 1
        dataset = tf.data.Dataset.list_files(path)
        dataset = dataset.interleave(tf.data.TFRecordDataset,
                                     cycle_length=cycle_len)
        dataset = dataset.map(self.info.features.deserialize_example,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE)

        dataset = tfds.as_numpy(dataset)
        for example in dataset:
            yield example[_EXAMPLE_NAME], example
Exemplo n.º 6
0
 def generate_examples_one_file(
         self, path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path),
                                          compression_type='GZIP')
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         self.tf_example_to_step_ds,
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         yield self.get_episode_id(e), e
Exemplo n.º 7
0
def set_vocab():
    g_vars.df = g_vars.df.map(
        lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))

    iter_df = iter(g_vars.df)
    g_vars.vocabulary = set([])

    for review in iter_df:
        temp_vocab = sorted(set(tfds.as_numpy(review)))
        g_vars.vocabulary.update(temp_vocab)

    g_vars.vocabulary = sorted(g_vars.vocabulary)
    g_vars.char2idx = {u: i for i, u in enumerate(g_vars.vocabulary)}
    g_vars.idx2char = np.array(g_vars.vocabulary)
Exemplo n.º 8
0
 def _generate_examples_one_file(
         path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path),
                                          compression_type='GZIP')
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         self.tf_example_to_step_ds,
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         # The key of the episode is converted to string because int64 is not
         # supported as key.
         yield str(e['episode_id']), e
Exemplo n.º 9
0
 def _generate_examples_one_file(
         path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     counter = 0
     key_prefix = os.path.basename(path)
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path))
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         functools.partial(self.tf_example_to_step_ds,
                           feature_description=feature_description),
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         episode_id = counter
         yield f'{key_prefix}/{episode_id}', e
         counter += 1
def _generate_images_and_annotations(tf_record_file):
    """Yields the images and annotations from a given file.

  Args:
    tf_record_file: .tfrecord files.

  Yields:
    Waymo images and annotations.
  """
    # Go through all frames
    dataset = tf.data.TFRecordDataset(tf_record_file, compression_type="")
    for data in tfds.as_numpy(dataset):
        frame = open_dataset.Frame()
        frame.ParseFromString(bytearray(data))  # pytype: disable=wrong-arg-types

        image_and_annotation = {
            "context": {
                "name": frame.context.name
            },
            "timestamp_micros": frame.timestamp_micros
        }

        camera_calibration = {
            calibration.name: calibration
            for calibration in frame.context.camera_calibrations
        }
        camera_labels = {label.name: label for label in frame.camera_labels}

        # Go through all 5 camera images in the frame
        for frame_image in frame.images:
            labels = None
            if frame_image.name in camera_labels:
                image_height = camera_calibration[frame_image.name].height
                image_width = camera_calibration[frame_image.name].width
                labels = _convert_labels(camera_labels[frame_image.name],
                                         image_width, image_height)

            camera_name = open_dataset.CameraName.Name.Name(frame_image.name)
            image_and_annotation["camera_" + camera_name] = {
                "image": frame_image.image,
                "labels": labels
            }

        yield image_and_annotation
Exemplo n.º 11
0
def string_and_vectorized(df):
    vector = []
    temp = []
    shakespeare = []
    current = 0
    length = len(list(df))
    for example in tfds.as_numpy(df):
        current += 1
        if (current % 10 == 0):
            print("loop {0}  of  {1}".format(current, length))
        for i in example:
            x = char2idx[i]
            temp = np.append(temp, x)
        shakespeare = example
        vector = np.append(vector, temp)
        temp = []
        vector = vector.astype('int64')

    return vector, shakespeare
Exemplo n.º 12
0
  def _generate_examples_one_file(
      self, path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
    """Yields examples from one file."""
    # Dataset of tf.Examples containing full episodes.
    example_ds = tf.data.TFRecordDataset(filenames=str(path))

    example_features = _NAME_TO_FEATURES_ENCODE[self.builder_config.name]
    example_specs = example_features.get_serialized_info()
    parser = tfds.core.example_parser.ExampleParser(example_specs)

    parsed_examples = example_ds.map(parser.parse_example)
    decoded_examples = parsed_examples.map(example_features.decode_example)

    for index, example in enumerate(tfds.as_numpy(decoded_examples)):
      if self.builder_config.name == 'rlds':
        id_key = 'episode_id'
      else:
        id_key = 'task_code'
      example_id = str(index) + str(example[id_key]) + str(hash(path))
      yield example_id, example
Exemplo n.º 13
0
    def _generate_examples(self, path):
        """Yields examples."""

        clean_key = "clean"
        adversarial_key = "adversarial"

        def _parse(serialized_example):
            ds_features = {
                "height": tf.io.FixedLenFeature([], tf.int64),
                "width": tf.io.FixedLenFeature([], tf.int64),
                "label": tf.io.FixedLenFeature([], tf.int64),
                "adv-image": tf.io.FixedLenFeature([], tf.string),
                "clean-image": tf.io.FixedLenFeature([], tf.string),
            }
            example = tf.io.parse_single_example(serialized_example,
                                                 ds_features)

            img_clean = tf.io.decode_raw(example["clean-image"], tf.float32)
            img_adv = tf.io.decode_raw(example["adv-image"], tf.float32)
            # float values are integers in [0.0, 255.0] for clean and adversarial
            img_clean = tf.cast(img_clean, tf.uint8)
            img_clean = tf.reshape(img_clean,
                                   (example["height"], example["width"], 3))
            img_adv = tf.cast(img_adv, tf.uint8)
            img_adv = tf.reshape(img_adv,
                                 (example["height"], example["width"], 3))
            return {
                clean_key: img_clean,
                adversarial_key: img_adv
            }, example["label"]

        ds = tf.data.TFRecordDataset(filenames=[path])
        ds = ds.map(lambda x: _parse(x))
        default_graph = tf.compat.v1.keras.backend.get_session().graph
        ds = tfds.as_numpy(ds, graph=default_graph)

        for i, (img, label) in enumerate(ds):
            yield str(i), {
                "images": img,
                "label": label,
            }
Exemplo n.º 14
0
    def _generate_examples_validation(self, archive, labels):
        """Generate corrupted imagenet validation data.

    Apply corruptions to the raw images according to self.corruption_type.

    Args:
      archive: an iterator for the raw dataset.
      labels: a dictionary that maps the file names to imagenet labels.

    Yields:
      dictionary with the file name, an image file objective, and label of each
      imagenet validation data.
    """
        # Get the current random seeds.
        numpy_st0 = np.random.get_state()
        # Set new random seeds.
        np.random.seed(135)
        logging.warning('Overwriting cv2 RNG seed.')
        tfds.core.lazy_imports.cv2.setRNGSeed(357)

        gen_fn = super(Imagenet2012Corrupted,
                       self)._generate_examples_validation
        for example in gen_fn(archive, labels):

            if self.version.implements(tfds.core.Experiment.S3):
                key, example = example  # Unpack S3 key

            with tf.Graph().as_default():
                tf_img = tf.image.decode_jpeg(example['image'].read(),
                                              channels=3)
                image_np = tfds.as_numpy(tf_img)
            example['image'] = self._get_corrupted_example(image_np)

            if self.version.implements(tfds.core.Experiment.S3):
                yield key, example
            else:
                yield example
        # Reset the seeds back to their original values.
        np.random.set_state(numpy_st0)
Exemplo n.º 15
0
  def get_features_dict(self):
    # Loads the features dynamically.
    file_paths = rlu_common.get_files(
        prefix=self.get_file_prefix(), num_shards=self.num_shards())

    # Take one item to get the output types and shapes.
    example_item = None
    iterator = tfds.as_numpy(tf.data.TFRecordDataset(file_paths[:1]))
    for example_item in iterator:
      break
    if example_item is None:
      raise ValueError('Empty dataset')

    feature_description = tf_example_to_feature_description(example_item)
    feature_description = tree_deflatten_with_delimiter(feature_description)
    return tfds.features.FeaturesDict({
        'steps':
            tfds.features.Dataset({
                'observation':
                    tf_feature_to_tfds_feature(
                        feature_description['observation']),
                'action':
                    tf_feature_to_tfds_feature(feature_description['action']),
                'reward':
                    tf_feature_to_tfds_feature(feature_description['reward']),
                'is_terminal':
                    tf.bool,
                'is_first':
                    tf.bool,
                'is_last':
                    tf.bool,
                'discount':
                    tf_feature_to_tfds_feature(feature_description['discount']),
            }),
        'episode_return':
            tf.float32,
    })
Exemplo n.º 16
0
 def get_sorted_data(builder, split):
     splits = builder.as_dataset()
     data = list(tfds.as_numpy(splits[split]))
     return sorted(data, key=lambda x: x['path'])
Exemplo n.º 17
0
def to_numpy(obj, K=None, device=None):
    """
    Convert input to numpy

    :param obj: Any input that can be converted to numpy (raises error otherwise)
    :type obj: ```Any```

    :param K: backend engine, e.g., `np` or `tf`; defaults to `np`
    :type K: ```Literal['np', 'tf']```

    :param device: The (optional) Device to which x should be transferred.
      If given, then the result is committed to the device.
      If the device parameter is None, then this operation behaves like the identity function
      if the operand is on any device already, otherwise it transfers the data to the default device, uncommitted.
    :type device: ```Optional[Device]```

    :return: numpy type, probably np.ndarray
    :rtype: ```np.ndarray```
    """
    module_name = "numpy" if K is None else K.__name__

    if obj is None:
        return None if K is None else K.nan
    elif type(obj).__module__ == module_name:
        return obj
    elif hasattr(obj, "as_numpy"):
        return obj.as_numpy()
    elif hasattr(obj, "numpy"):
        return obj.numpy()
    elif isinstance(obj, dict) and "image" in obj and "label" in obj:
        if module_name == "jax.numpy":

            def __to_numpy(o, _K=None):
                """
                Convert input to a DeviceArray

                :param o: An object with a `numpy` method
                :type o: ```Any```

                :param _K: backend engine, e.g., `np` or `tf`; defaults to `np`
                :type _K: ```Literal['np', 'tf']```

                :return: The array on the device
                :rtype: ```DeviceArray```
                """
                import jax

                return jax.device_put(o.numpy(), device=device)

        else:
            __to_numpy = _to_numpy

        return {
            "image": __to_numpy(obj["image"], K),
            "label": __to_numpy(obj["label"], K),
        }
    elif type(obj).__name__ == "PrefetchDataset":
        # ^`isinstance` said `arg 2 must be a type or tuple of types`
        import tensorflow_datasets as tfds

        return tfds.as_numpy(obj)

    raise TypeError("Unable to convert {!r} to numpy".format(type(obj)))
Exemplo n.º 18
0
 def values(self, split):
     return [
         int(v["value"])
         for v in tfds.as_numpy(self.as_dataset(split=split))
     ]
Exemplo n.º 19
0
import tensorflow as tf
import tensorflow_datasets.public_api as tfds
from tqdm import tqdm
import numpy as np
import os

df = tfds.load(name='tiny_shakespeare')['train']
df = tfds.load(name="imdb_reviews", split='train[:5%]')
df = df.map(lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))
iter_df = iter(df)
vocabulary = set([])

for review in iter_df:
    temp_vocab = sorted(set(tfds.as_numpy(review)))
    vocabulary.update(temp_vocab)

vocabulary = sorted(vocabulary)
char2idx = {u: i for i, u in enumerate(vocabulary)}
idx2char = np.array(vocabulary)

model = tf.keras.models.load_model('models/imdb')


def generate_text(model, start_string, generation_length=2000):
    # Evaluation step (generating ABC text using the learned RNN model)

    input_eval = [char2idx[(bytes(i, encoding='utf8'))] for i in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []