Exemplos de as_numpy em Python, exemplos de tensorflow_datasets.public_api.as_numpy em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: imagenet2012_corrupted.py Projeto: zdz1130/datasets

  def _generate_examples(self, archive, validation_labels=None):
    """Generate corrupted imagenet validation data.

    Apply corruptions to the raw images according to self.corruption_type.

    Args:
      archive: an iterator for the raw dataset.
      validation_labels: a dict that maps the file names to imagenet labels.

    Yields:
      dictionary with the file name, an image file objective, and label of each
      imagenet validation data.
    """
    # Get the current random seeds.
    numpy_st0 = np.random.get_state()
    # Set new random seeds.
    np.random.seed(135)
    logging.warning('Overwriting cv2 RNG seed.')
    tfds.core.lazy_imports.cv2.setRNGSeed(357)

    gen_fn = super(Imagenet2012Corrupted, self)._generate_examples
    for key, example in gen_fn(archive, validation_labels):
      with tf.Graph().as_default():
        tf_img = _decode_and_center_crop(example['image'].read())
        image_np = tfds.as_numpy(tf_img)
      example['image'] = self._get_corrupted_example(image_np)

      yield key, example
    # Reset the seeds back to their original values.
    np.random.set_state(numpy_st0)

Exemplo n.º 2

0

Exibir arquivo

    def _generate_examples(self, path, split):
        """Yields examples."""
        if split == "adversarial":
            key = "adv-image"
        elif split == "clean":
            key = "clean-image"
        else:
            raise ValueError(f"split {split} not in ('adversarial', 'clean')")

        def _parse(serialized_example, key):
            ds_features = {
                "height": tf.io.FixedLenFeature([], tf.int64),
                "width": tf.io.FixedLenFeature([], tf.int64),
                "label": tf.io.FixedLenFeature([], tf.int64),
                "adv-image": tf.io.FixedLenFeature([], tf.string),
                "clean-image": tf.io.FixedLenFeature([], tf.string),
            }
            example = tf.io.parse_single_example(serialized_example,
                                                 ds_features)

            img = tf.io.decode_raw(example[key], tf.float32)
            # float values are integers in [0.0, 255.0] for clean and adversarial
            img = tf.cast(img, tf.uint8)
            img = tf.reshape(img, (example["height"], example["width"], 3))
            return img, example["label"]

        ds = tf.data.TFRecordDataset(filenames=[path])
        ds = ds.map(lambda x: _parse(x, key))
        ds = ds.batch(1)
        default_graph = tf.compat.v1.keras.backend.get_session().graph
        ds = tfds.as_numpy(ds, graph=default_graph)
        for i, (img, label) in enumerate(ds):
            yield str(i), {"image": img[0], "label": label[0]}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: lsun.py Projeto: yuqin50/datasets

 def _generate_examples(self, extracted_dir, file_path):
     with tf.Graph().as_default():
         dataset = tf.contrib.data.LMDBDataset(
             os.path.join(extracted_dir, file_path, "data.mdb"))
         for i, (_, jpeg_image) in enumerate(tfds.as_numpy(dataset)):
             record = {"image": io.BytesIO(jpeg_image)}
             yield i, record

Exemplo n.º 4

0

Exibir arquivo

 def _generate_examples(self, extracted_dir, file_path):
     with tf.Graph().as_default():
         path = os.path.join(extracted_dir, file_path, "data.mdb")
         dataset = _make_lmdb_dataset(path)
         for i, (_, jpeg_image) in enumerate(tfds.as_numpy(dataset)):
             record = {"image": io.BytesIO(jpeg_image)}
             yield i, record

Exemplo n.º 5

0

Exibir arquivo

    def _generate_examples(self, path, is_training):
        """Yields examples."""
        cycle_len = 10 if is_training else 1
        dataset = tf.data.Dataset.list_files(path)
        dataset = dataset.interleave(tf.data.TFRecordDataset,
                                     cycle_length=cycle_len)
        dataset = dataset.map(self.info.features.deserialize_example,
                              num_parallel_calls=tf.data.experimental.AUTOTUNE)

        dataset = tfds.as_numpy(dataset)
        for example in dataset:
            yield example[_EXAMPLE_NAME], example

Exemplo n.º 6

0

Exibir arquivo

 def generate_examples_one_file(
         self, path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path),
                                          compression_type='GZIP')
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         self.tf_example_to_step_ds,
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         yield self.get_episode_id(e), e

Exemplo n.º 7

0

Exibir arquivo

def set_vocab():
    g_vars.df = g_vars.df.map(
        lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))

    iter_df = iter(g_vars.df)
    g_vars.vocabulary = set([])

    for review in iter_df:
        temp_vocab = sorted(set(tfds.as_numpy(review)))
        g_vars.vocabulary.update(temp_vocab)

    g_vars.vocabulary = sorted(g_vars.vocabulary)
    g_vars.char2idx = {u: i for i, u in enumerate(g_vars.vocabulary)}
    g_vars.idx2char = np.array(g_vars.vocabulary)

Exemplo n.º 8

0

Exibir arquivo

 def _generate_examples_one_file(
         path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path),
                                          compression_type='GZIP')
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         self.tf_example_to_step_ds,
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         # The key of the episode is converted to string because int64 is not
         # supported as key.
         yield str(e['episode_id']), e

Exemplo n.º 9

0

Exibir arquivo

Arquivo: rlu_rwrl.py Projeto: tensorflow/datasets

 def _generate_examples_one_file(
         path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
     """Yields examples from one file."""
     counter = 0
     key_prefix = os.path.basename(path)
     # Dataset of tf.Examples containing full episodes.
     example_ds = tf.data.TFRecordDataset(filenames=str(path))
     # Dataset of episodes, each represented as a dataset of steps.
     episode_ds = example_ds.map(
         functools.partial(self.tf_example_to_step_ds,
                           feature_description=feature_description),
         num_parallel_calls=tf.data.experimental.AUTOTUNE)
     episode_ds = tfds.as_numpy(episode_ds)
     for e in episode_ds:
         episode_id = counter
         yield f'{key_prefix}/{episode_id}', e
         counter += 1

Exemplo n.º 10

0

Exibir arquivo

Arquivo: waymo_open_dataset.py Projeto: albertvillanova/tensorflow_datasets

def _generate_images_and_annotations(tf_record_file):
    """Yields the images and annotations from a given file.

  Args:
    tf_record_file: .tfrecord files.

  Yields:
    Waymo images and annotations.
  """
    # Go through all frames
    dataset = tf.data.TFRecordDataset(tf_record_file, compression_type="")
    for data in tfds.as_numpy(dataset):
        frame = open_dataset.Frame()
        frame.ParseFromString(bytearray(data))  # pytype: disable=wrong-arg-types

        image_and_annotation = {
            "context": {
                "name": frame.context.name
            },
            "timestamp_micros": frame.timestamp_micros
        }

        camera_calibration = {
            calibration.name: calibration
            for calibration in frame.context.camera_calibrations
        }
        camera_labels = {label.name: label for label in frame.camera_labels}

        # Go through all 5 camera images in the frame
        for frame_image in frame.images:
            labels = None
            if frame_image.name in camera_labels:
                image_height = camera_calibration[frame_image.name].height
                image_width = camera_calibration[frame_image.name].width
                labels = _convert_labels(camera_labels[frame_image.name],
                                         image_width, image_height)

            camera_name = open_dataset.CameraName.Name.Name(frame_image.name)
            image_and_annotation["camera_" + camera_name] = {
                "image": frame_image.image,
                "labels": labels
            }

        yield image_and_annotation

Exemplo n.º 11

0

Exibir arquivo

Arquivo: shakespeare_ml.py Projeto: KozecM/Shakespear_app

def string_and_vectorized(df):
    vector = []
    temp = []
    shakespeare = []
    current = 0
    length = len(list(df))
    for example in tfds.as_numpy(df):
        current += 1
        if (current % 10 == 0):
            print("loop {0}  of  {1}".format(current, length))
        for i in example:
            x = char2idx[i]
            temp = np.append(temp, x)
        shakespeare = example
        vector = np.append(vector, temp)
        temp = []
        vector = vector.astype('int64')

    return vector, shakespeare

Exemplo n.º 12

0

Exibir arquivo

  def _generate_examples_one_file(
      self, path) -> Generator[Tuple[str, Dict[str, Any]], None, None]:
    """Yields examples from one file."""
    # Dataset of tf.Examples containing full episodes.
    example_ds = tf.data.TFRecordDataset(filenames=str(path))

    example_features = _NAME_TO_FEATURES_ENCODE[self.builder_config.name]
    example_specs = example_features.get_serialized_info()
    parser = tfds.core.example_parser.ExampleParser(example_specs)

    parsed_examples = example_ds.map(parser.parse_example)
    decoded_examples = parsed_examples.map(example_features.decode_example)

    for index, example in enumerate(tfds.as_numpy(decoded_examples)):
      if self.builder_config.name == 'rlds':
        id_key = 'episode_id'
      else:
        id_key = 'task_code'
      example_id = str(index) + str(example[id_key]) + str(hash(path))
      yield example_id, example

Exemplo n.º 13

0

Exibir arquivo

    def _generate_examples(self, path):
        """Yields examples."""

        clean_key = "clean"
        adversarial_key = "adversarial"

        def _parse(serialized_example):
            ds_features = {
                "height": tf.io.FixedLenFeature([], tf.int64),
                "width": tf.io.FixedLenFeature([], tf.int64),
                "label": tf.io.FixedLenFeature([], tf.int64),
                "adv-image": tf.io.FixedLenFeature([], tf.string),
                "clean-image": tf.io.FixedLenFeature([], tf.string),
            }
            example = tf.io.parse_single_example(serialized_example,
                                                 ds_features)

            img_clean = tf.io.decode_raw(example["clean-image"], tf.float32)
            img_adv = tf.io.decode_raw(example["adv-image"], tf.float32)
            # float values are integers in [0.0, 255.0] for clean and adversarial
            img_clean = tf.cast(img_clean, tf.uint8)
            img_clean = tf.reshape(img_clean,
                                   (example["height"], example["width"], 3))
            img_adv = tf.cast(img_adv, tf.uint8)
            img_adv = tf.reshape(img_adv,
                                 (example["height"], example["width"], 3))
            return {
                clean_key: img_clean,
                adversarial_key: img_adv
            }, example["label"]

        ds = tf.data.TFRecordDataset(filenames=[path])
        ds = ds.map(lambda x: _parse(x))
        default_graph = tf.compat.v1.keras.backend.get_session().graph
        ds = tfds.as_numpy(ds, graph=default_graph)

        for i, (img, label) in enumerate(ds):
            yield str(i), {
                "images": img,
                "label": label,
            }

Exemplo n.º 14

0

Exibir arquivo

Arquivo: imagenet2012_corrupted.py Projeto: youikim/datasets

    def _generate_examples_validation(self, archive, labels):
        """Generate corrupted imagenet validation data.

    Apply corruptions to the raw images according to self.corruption_type.

    Args:
      archive: an iterator for the raw dataset.
      labels: a dictionary that maps the file names to imagenet labels.

    Yields:
      dictionary with the file name, an image file objective, and label of each
      imagenet validation data.
    """
        # Get the current random seeds.
        numpy_st0 = np.random.get_state()
        # Set new random seeds.
        np.random.seed(135)
        logging.warning('Overwriting cv2 RNG seed.')
        tfds.core.lazy_imports.cv2.setRNGSeed(357)

        gen_fn = super(Imagenet2012Corrupted,
                       self)._generate_examples_validation
        for example in gen_fn(archive, labels):

            if self.version.implements(tfds.core.Experiment.S3):
                key, example = example  # Unpack S3 key

            with tf.Graph().as_default():
                tf_img = tf.image.decode_jpeg(example['image'].read(),
                                              channels=3)
                image_np = tfds.as_numpy(tf_img)
            example['image'] = self._get_corrupted_example(image_np)

            if self.version.implements(tfds.core.Experiment.S3):
                yield key, example
            else:
                yield example
        # Reset the seeds back to their original values.
        np.random.set_state(numpy_st0)

Exemplo n.º 15

0

Exibir arquivo

  def get_features_dict(self):
    # Loads the features dynamically.
    file_paths = rlu_common.get_files(
        prefix=self.get_file_prefix(), num_shards=self.num_shards())

    # Take one item to get the output types and shapes.
    example_item = None
    iterator = tfds.as_numpy(tf.data.TFRecordDataset(file_paths[:1]))
    for example_item in iterator:
      break
    if example_item is None:
      raise ValueError('Empty dataset')

    feature_description = tf_example_to_feature_description(example_item)
    feature_description = tree_deflatten_with_delimiter(feature_description)
    return tfds.features.FeaturesDict({
        'steps':
            tfds.features.Dataset({
                'observation':
                    tf_feature_to_tfds_feature(
                        feature_description['observation']),
                'action':
                    tf_feature_to_tfds_feature(feature_description['action']),
                'reward':
                    tf_feature_to_tfds_feature(feature_description['reward']),
                'is_terminal':
                    tf.bool,
                'is_first':
                    tf.bool,
                'is_last':
                    tf.bool,
                'discount':
                    tf_feature_to_tfds_feature(feature_description['discount']),
            }),
        'episode_return':
            tf.float32,
    })

Exemplo n.º 16

0

Exibir arquivo

Arquivo: xtreme_s_test.py Projeto: suvarnak/datasets

 def get_sorted_data(builder, split):
     splits = builder.as_dataset()
     data = list(tfds.as_numpy(splits[split]))
     return sorted(data, key=lambda x: x['path'])

Exemplo n.º 17

0

Exibir arquivo

Arquivo: utils.py Projeto: SamuelMarks/ml-params

def to_numpy(obj, K=None, device=None):
    """
    Convert input to numpy

    :param obj: Any input that can be converted to numpy (raises error otherwise)
    :type obj: ```Any```

    :param K: backend engine, e.g., `np` or `tf`; defaults to `np`
    :type K: ```Literal['np', 'tf']```

    :param device: The (optional) Device to which x should be transferred.
      If given, then the result is committed to the device.
      If the device parameter is None, then this operation behaves like the identity function
      if the operand is on any device already, otherwise it transfers the data to the default device, uncommitted.
    :type device: ```Optional[Device]```

    :return: numpy type, probably np.ndarray
    :rtype: ```np.ndarray```
    """
    module_name = "numpy" if K is None else K.__name__

    if obj is None:
        return None if K is None else K.nan
    elif type(obj).__module__ == module_name:
        return obj
    elif hasattr(obj, "as_numpy"):
        return obj.as_numpy()
    elif hasattr(obj, "numpy"):
        return obj.numpy()
    elif isinstance(obj, dict) and "image" in obj and "label" in obj:
        if module_name == "jax.numpy":

            def __to_numpy(o, _K=None):
                """
                Convert input to a DeviceArray

                :param o: An object with a `numpy` method
                :type o: ```Any```

                :param _K: backend engine, e.g., `np` or `tf`; defaults to `np`
                :type _K: ```Literal['np', 'tf']```

                :return: The array on the device
                :rtype: ```DeviceArray```
                """
                import jax

                return jax.device_put(o.numpy(), device=device)

        else:
            __to_numpy = _to_numpy

        return {
            "image": __to_numpy(obj["image"], K),
            "label": __to_numpy(obj["label"], K),
        }
    elif type(obj).__name__ == "PrefetchDataset":
        # ^`isinstance` said `arg 2 must be a type or tuple of types`
        import tensorflow_datasets as tfds

        return tfds.as_numpy(obj)

    raise TypeError("Unable to convert {!r} to numpy".format(type(obj)))

Exemplo n.º 18

0

Exibir arquivo

Arquivo: splits_test.py Projeto: skommareddi/datasets

 def values(self, split):
     return [
         int(v["value"])
         for v in tfds.as_numpy(self.as_dataset(split=split))
     ]

Exemplo n.º 19

0

Exibir arquivo

Arquivo: miniShakespeare.py Projeto: KozecM/Shakespear_app

import tensorflow as tf
import tensorflow_datasets.public_api as tfds
from tqdm import tqdm
import numpy as np
import os

df = tfds.load(name='tiny_shakespeare')['train']
df = tfds.load(name="imdb_reviews", split='train[:5%]')
df = df.map(lambda x: tf.strings.unicode_split(x['text'], 'UTF-8'))
iter_df = iter(df)
vocabulary = set([])

for review in iter_df:
    temp_vocab = sorted(set(tfds.as_numpy(review)))
    vocabulary.update(temp_vocab)

vocabulary = sorted(vocabulary)
char2idx = {u: i for i, u in enumerate(vocabulary)}
idx2char = np.array(vocabulary)

model = tf.keras.models.load_model('models/imdb')


def generate_text(model, start_string, generation_length=2000):
    # Evaluation step (generating ABC text using the learned RNN model)

    input_eval = [char2idx[(bytes(i, encoding='utf8'))] for i in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # Empty string to store our results
    text_generated = []