Exemplo n.º 1
0
def combine_batches(path):
    """
    Path points to the directory cifar-10-batches-py. Code based on:
    https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/keras/
        datasets/cifar10.py
    """

    num_train_samples = 50000
    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples,), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
def load_data(label_mode='fine'):
  """Loads CIFAR100 dataset.

  Arguments:
      label_mode: one of "fine", "coarse".

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

  Raises:
      ValueError: in case of invalid `label_mode`.
  """
  if label_mode not in ['fine', 'coarse']:
    raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

  dirname = 'cifar-100-python'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
  path = get_file(dirname, origin=origin, untar=True)

  fpath = os.path.join(path, 'train')
  x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

  fpath = os.path.join(path, 'test')
  x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 3
0
def get_data(
    data_path: str
) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]:
    num_train_samples = 50000

    train_data = np.empty((num_train_samples, 3, 32, 32), dtype="uint8")
    train_labels = np.empty((num_train_samples, ), dtype="uint8")

    for i in range(1, 6):
        fpath = os.path.join(data_path, "data_batch_" + str(i))
        (
            train_data[(i - 1) * 10000:i * 10000, :, :, :],
            train_labels[(i - 1) * 10000:i * 10000],
        ) = load_batch(fpath)

    fpath = os.path.join(data_path, "test_batch")
    test_data, test_labels = load_batch(fpath)

    train_labels = np.reshape(train_labels, (len(train_labels), 1))
    test_labels = np.reshape(test_labels, (len(test_labels), 1))

    if keras.backend.image_data_format() == "channels_last":
        train_data = train_data.transpose(0, 2, 3, 1)
        test_data = test_data.transpose(0, 2, 3, 1)

    return (train_data, train_labels), (test_data, test_labels)
Exemplo n.º 4
0
def load_cifar10_data(path='cifar-10-batches-py'):
    from tensorflow.python.keras.datasets.cifar import load_batch
    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 5
0
def load_data():
    """Loads CIFAR10 dataset.
    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    dirname = 'cifar-10-batches-py'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    path = get_file(dirname, origin=origin, untar=True, cache_dir='.')

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
def load_data():
    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    path = os.getcwd() + '/cifar-10-batches-py'

    for i in range(1, 6):
        f_path = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(f_path)

    f_path = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(f_path)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 7
0
def data():
    """
    https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/datasets/cifar10.py
    """
    path = 'cifar-10/cifar-10-batches-py'

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1)).astype(np.int32)
    y_test = np.reshape(y_test, (len(y_test), 1)).astype(np.int32)

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 8
0
def load_data(path="TensorFlow/data/mnist.npz"):
    if path=="TensorFlow/data/mnist.npz":
        f = np.load(path)
        x_train, y_train = f['x_train'], f['y_train']
        x_test, y_test = f['x_test'], f['y_test']
        f.close()
        return (x_train, y_train), (x_test, y_test)
    elif path=="TensorFlow/data/cifar-10-batches-py/":
        num_train_samples = 50000
        x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
        y_train = np.empty((num_train_samples,), dtype='uint8')
        for i in range(1, 6):
            fpath = os.path.join(path, 'data_batch_' + str(i))
            (x_train[(i - 1) * 10000:i * 10000, :, :, :],
            y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)
        fpath = os.path.join(path, 'test_batch')
        x_test, y_test = load_batch(fpath)

        y_train = np.reshape(y_train, (len(y_train), 1))
        y_test = np.reshape(y_test, (len(y_test), 1))
        
        if K.image_data_format() == 'channels_last':
            x_train = x_train.transpose(0, 2, 3, 1)
            x_test = x_test.transpose(0, 2, 3, 1)

        x_test = x_test.astype(x_train.dtype)
        y_test = y_test.astype(y_train.dtype)

        return (x_train, y_train), (x_test, y_test)
Exemplo n.º 9
0
def load_data():
  """Loads CIFAR10 dataset.
  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """
  from tensorflow.python.keras.datasets.cifar import load_batch
  from tensorflow.python.keras import backend as K
  dirname = 'cifar-10-batches-py'
  #origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
  #path = get_file(dirname, origin=origin, untar=True)
  path = '/mnt/bb/%s/%s'%(os.environ['USER'],dirname)
  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  fpath = os.path.join(path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  #if K.image_data_format() == 'channels_last':
  #x_train = x_train.transpose(0, 2, 3, 1)
  #x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 10
0
def load_data():
  """Loads CIFAR10 dataset.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """
  dirname = 'cifar-10-batches-py'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
  path = get_file(dirname, origin=origin, untar=True)

  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  fpath = os.path.join(path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 11
0
def load_data(path='/cifar10/cifar-10-batches-py'):
    """Loads CIFAR10 dataset.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 12
0
def load_cifar100(split, path=None):
    if path is None:
        cache_path = os.path.join(os.path.expanduser('~'), ".capslayer")
        path = get_file('cifar-100-python',
                        cache_dir=cache_path,
                        file_hash=md5sum,
                        origin=URL,
                        untar=True)

    split = split.lower()
    if split == 'test':
        fpath = os.path.join(path, 'test')
        images, labels = load_batch(fpath, label_key='fine_labels')
    else:
        fpath = os.path.join(path, 'train')
        images, labels = load_batch(fpath, label_key='fine_labels')

        idx = np.arange(len(images))
        np.random.seed(201808)
        np.random.shuffle(idx)

        labels = np.reshape(labels, (-1, ))
        images = images[idx[:45000]] if split == "train" else images[
            idx[45000:]]
        labels = labels[idx[:45000]] if split == "train" else labels[
            idx[45000:]]
    images = np.reshape(images.transpose(0, 2, 3, 1),
                        (-1, 3072)).astype(np.float32)
    labels = np.reshape(labels, (-1, )).astype(np.int32)

    return (zip(images, labels))
Exemplo n.º 13
0
def read_cifar_100_data(path_data, subdir, label_mode='fine', STANDARDIZE_BOOL = True):
    """Loads [CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).
    This is a dataset of 50,000 32x32 color training images and
    10,000 test images, labeled over 100 fine-grained classes that are
    grouped into 20 coarse-grained classes. See more info at the
    [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).
    Arguments:
      label_mode: one of "fine", "coarse". If it is "fine" the category labels
      are the fine-grained labels, if it is "coarse" the output labels are the
      coarse-grained superclasses.
    Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
      x_train, x_test: uint8 arrays of RGB image data with shape
        (num_samples, 3, 32, 32) if the `tf.keras.backend.image_data_format` is
        'channels_first', or (num_samples, 32, 32, 3) if the data format
        is 'channels_last'.
      y_train, y_test: uint8 arrays of category labels with shape
        (num_samples, 1).
    Raises:
      ValueError: in case of invalid `label_mode`.
    """
    if label_mode not in ['fine', 'coarse']:
        raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

    dirname = 'cifar-100-python'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    path = get_file(  dirname, origin=origin, untar=True, file_hash= '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7', cache_dir = path_data,
cache_subdir = subdir)

    fpath = os.path.join(path, 'train')
    x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

    fpath = os.path.join(path, 'test')
    x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

    y_train = onp.reshape(y_train, (len(y_train), 1))
    y_test = onp.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)
    
    # Pre-processing (normalize)
    train_images = onp.divide(x_train, 255, dtype=onp.float32)
    test_images = onp.divide(x_test, 255, dtype=onp.float32)
    
    train_labels = dense_to_one_hot(y_train, num_classes=100)
    test_labels = dense_to_one_hot(y_test, num_classes=100)
    
    if STANDARDIZE_BOOL: 
        channel_mean = onp.mean(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True)
        channel_std = onp.std(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True)
        train_images = (train_images - channel_mean) / channel_std
        test_images = (test_images - channel_mean) / channel_std

    dataset = {
        'train': {'input': train_images, 'label': train_labels},
        'test': {'input': test_images, 'label': test_labels}}
    
    return dataset
Exemplo n.º 14
0
def prep_data():
  """Loads CIFAR10 dataset.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """
  print("##data_prep called...")
  i_cdir = "../../"
  i_imgpath = "1000_left.jpeg"
  config = cutil.Config(configid="myConfId",cdir=i_cdir)
  img1 = myimg.myImg(imageid="xx",config=config,ekey='x123',path=i_imgpath)
  img1.printImageProp()
  
  train_samples = 1 
  w, h = img1.getImageDim()
  #x_train1 = np.empty(( train_samples, 3, w, h), dtype='uint8')
  x_train1 = np.empty(( train_samples, w, h, 3), dtype='uint8')
  print(" x_train1 size [{}]".format(x_train1.shape))
  x_train1[ 0, :, :, :] = img1.getImage()
  print(" x_train1 size [{}]".format(x_train1.shape))
  #x_train1 = x_train1.transpose(0, 2, 3, 1)
  #print(" x_train1 size [{}]".format(x_train1.shape))  
  
   
  dirname = 'cifar-10-batches-py'
  #./.keras/datasets/cifar-10-batches-py.tar.gz
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
  origin = 'file://Users/pankaj.petkar/.keras/datasets/cifar-10-batches-py.tar.gz'
  path = get_file(dirname, origin=origin, untar=True)

  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  print(x_train.shape)
  print(y_train.shape)

  fpath = os.path.join(path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)
   
  print(x_train.shape)
  print(y_train.shape)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 15
0
def load_data(filename):
    """Loads [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).

  This is a dataset of 50,000 32x32 color training images and 10,000 test
  images, labeled over 10 categories. See more info at the
  [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

      **x_train, x_test**: uint8 arrays of RGB image data with shape
        `(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is
        `'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format
        is `'channels_last'`.

      **y_train, y_test**: uint8 arrays of category labels
        (integers in range 0-9) each with shape (num_samples, 1).
  """
    dirname = 'cifar-10-batches-py'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'

    if filename != None:
        t = open(filename)
        t.extractall(path='./')
        path = dirname
    else:
        path = get_file(
            dirname,
            origin=origin,
            untar=True,
            file_hash=
            '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce')

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 16
0
    def load_data(self, rank):
        """Loads CIFAR10 dataset.
        Returns:
            Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
        """
        #path = '/projects/datascience/hsharma/bnn_horovod/TFP_CIFAR10/RunScript/cifar-10-batches-py'
        #path = '/home/hsharma/WORK/Project_BNN/bnn_horovod/TFP_CIFAR10/cifar-10-batches-py'

        if self.FLAGS.DATA_NAME == 'CIFAR-10':
            path = self.FLAGS.DATA_PATH
            print(path)
            num_train_samples = 50000

            x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
            y_train = np.empty((num_train_samples, ), dtype='uint8')

            for i in range(1, 6):
                fpath = os.path.join(path, 'data_batch_' + str(i))
                (x_train[(i - 1) * 10000:i * 10000, :, :, :],
                 y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

            fpath = os.path.join(path, 'test_batch')
            x_test, y_test = load_batch(fpath)

            y_train = np.reshape(y_train, (len(y_train), 1))
            y_test = np.reshape(y_test, (len(y_test), 1))

            if K.image_data_format() == 'channels_last':
                x_train = x_train.transpose(0, 2, 3, 1)
                x_test = x_test.transpose(0, 2, 3, 1)

            x_test = x_test.astype(x_train.dtype)
            y_test = y_test.astype(y_train.dtype)

            x_train = x_train.astype("float32")
            x_test = x_test.astype("float32")

            x_train /= 255
            x_test /= 255

            if self.FLAGS.subtract_pixel_mean:
                x_train_mean = np.mean(x_train, axis=0)
                x_train -= x_train_mean
                x_test -= x_train_mean

            # y_train = y_train.flatten()
            # y_test = y_test.flatten()
            y_train = np.int32(y_train)
            y_test = np.int32(y_test)

        else:
            print("ERROR: The dataset is not Available...!")
            return
        return (x_train, y_train), (x_test, y_test)
Exemplo n.º 17
0
def load_data(label_mode='fine'):
  """Loads [CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html).

  This is a dataset of 50,000 32x32 color training images and
  10,000 test images, labeled over 100 fine-grained classes that are
  grouped into 20 coarse-grained classes. See more info at the
  [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).

  Args:
      label_mode: one of "fine", "coarse". If it is "fine" the category labels
      are the fine-grained labels, if it is "coarse" the output labels are the
      coarse-grained superclasses.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

      **x_train, x_test**: uint8 arrays of RGB image data with shape
        `(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is
        `'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format
        is `'channels_last'`.

      **y_train, y_test**: uint8 arrays of category labels with shape
        (num_samples, 1).

  Raises:
      ValueError: in case of invalid `label_mode`.
  """
  if label_mode not in ['fine', 'coarse']:
    raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

  dirname = 'cifar-100-python'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
  path = get_file(
      dirname,
      origin=origin,
      untar=True,
      file_hash=
      '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7')

  fpath = os.path.join(path, 'train')
  x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

  fpath = os.path.join(path, 'test')
  x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 18
0
def get_cifar10():
    num_train_samples = 50000
    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples,), dtype='uint8')
    filedir = os.path.dirname(__file__)
    path = os.path.join(filedir, "../../data/", "cifar-10-batches-py")

    for i in range(1, 6):
	    fpath = os.path.join(path, 'data_batch_' + str(i))
	    (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
	    x_train = x_train.transpose(0, 2, 3, 1)
	    x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 19
0
def read_cifar_10_data(path_data, subdir, STANDARDIZE_BOOL = True):
    dirname = 'cifar-10-batches-py'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    path = get_file(dirname, origin=origin, untar=True,
                    file_hash='6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce', cache_dir=path_data,
                    cache_subdir=subdir)

    num_train_samples = 50000

    x_train = onp.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = onp.empty((num_train_samples,), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1)*10000: i*10000,:,:,:], y_train[(i - 1)*10000: i*10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = onp.reshape(y_train, (len(y_train), 1))
    y_test = onp.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)
    
    # Pre-processing (normalize)
    train_images = onp.divide(x_train, 255, dtype=onp.float32)
    test_images = onp.divide(x_test, 255, dtype=onp.float32)

    train_labels = dense_to_one_hot(y_train, num_classes=10)
    test_labels = dense_to_one_hot(y_test, num_classes=10)

    if STANDARDIZE_BOOL: 
        channel_mean = onp.mean(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True)
        channel_std = onp.std(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True)
        train_images = (train_images - channel_mean) / channel_std
        test_images = (test_images - channel_mean) / channel_std

    dataset = {
        'train': {'input': train_images, 'label': train_labels},
        'test': {'input': test_images, 'label': test_labels},
    }
    return dataset
Exemplo n.º 20
0
def cifar10(train=True, data_path='cifar10'):
  """Returns mnist loaded data.

Args:
  train: if true use training data, else use testing data

Returns:
  dataset: A Dataset namedtuple containing the generated data and labels
"""
  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(data_path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  fpath = os.path.join(data_path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  # if K.image_data_format() == 'channels_last':
  x_train = x_train.transpose(0, 2, 3, 1)
  x_test = x_test.transpose(0, 2, 3, 1)
  print ("image_shape!!!!!!")
  print (x_test.shape)
  x_test = x_test.astype(x_train.dtype)
  y_test = y_test.astype(y_train.dtype)
  x_train = x_train.astype(np.float32) / 255.
  y_train = y_train.astype(np.int32)
  x_test = x_test.astype(np.float32) / 255.
  y_test = y_test.astype(np.int32)

  if train:
    imgs = x_train
    labels = y_train
  else:
    imgs = x_test
    labels = y_test

  return Dataset(imgs, labels)
Exemplo n.º 21
0
def load_data(dataset_base_path, train=True, label_mode="fine"):
    """Loads CIFAR100 dataset.

    Parameters
    ----------
    dataset_base_path: str
        Path to create dataset dir, a recommended choice is project root dir
    train: bool
        flag, return training set or test set
    label_mode: str
        one of "fine", "coarse".

    Returns
    -------
    Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.

    Raises
    ------
    ValueError: in case of invalid `label_mode`.
    """
    if label_mode not in ['fine', 'coarse']:
        raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')
    dirname = 'cifar-100-python'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    path = get_file(dirname,
                    origin=origin,
                    untar=True,
                    cache_dir=dataset_base_path)

    if train:
        fpath = os.path.join(path, 'train')
        x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')
        # y_train = np.reshape(y_train, (len(y_train), 1))
        y_train = np.asarray(y_train)
        if K.image_data_format() == 'channels_last':
            x_train = x_train.transpose((0, 2, 3, 1))
        return x_train, y_train
    else:
        fpath = os.path.join(path, 'test')
        x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')
        # y_test = np.reshape(y_test, (len(y_test), 1))
        y_test = np.asarray(y_test)
        if K.image_data_format() == 'channels_last':
            x_test = x_test.transpose((0, 2, 3, 1))
        return x_test, y_test
Exemplo n.º 22
0
def load_cifar_data():

    dirname = 'cifar-10-batches-py'
    path = "datasets/" + dirname

    if os.path.exists(path):
        print("Local data found.")

    else:
        print("Local data not found. Retrieving from source.")
        origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
        get_file(
            dirname,
            origin=origin,
            untar=True,
            file_hash=
            '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce',
            cache_dir=os.getcwd()),

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 23
0
def load_data(dataset_base_path, train=True):
    """Loads CIFAR10 dataset.

    Parameters
    ----------
    dataset_base_path: str
        Path to create dataset dir, a recommended choice is project root dir
    train: bool
        flag, return training set or test set

    Returns:
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """
    dirname = 'cifar-10-batches-py'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    path = get_file(dirname,
                    origin=origin,
                    untar=True,
                    cache_dir=dataset_base_path)

    if train:
        num_train_samples = 50000
        x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
        y_train = np.empty((num_train_samples, ), dtype='uint8')
        for i in range(1, 6):
            fpath = os.path.join(path, 'data_batch_' + str(i))
            (x_train[(i - 1) * 10000:i * 10000, :, :, :],
             y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)
        # y_train = np.reshape(y_train, (len(y_train), 1))
        y_train = np.asarray(y_train)
        if K.image_data_format() == 'channels_last':
            x_train = x_train.transpose((0, 2, 3, 1))
        return x_train, y_train
    else:
        fpath = os.path.join(path, 'test_batch')
        x_test, y_test = load_batch(fpath)
        # y_test = np.reshape(y_test, (len(y_test), 1))
        y_test = np.asarray(y_test)
        if K.image_data_format() == 'channels_last':
            x_test = x_test.transpose((0, 2, 3, 1))
        return x_test, y_test
Exemplo n.º 24
0
def load_cifar10(path, split):
    split = split.lower()
    if split == 'eval':
        fpath = os.path.join(path, 'cifar-10-batches-py', 'test_batch')
        images, labels = load_batch(fpath)
    else:
        num_samples = 50000
        images = np.empty((num_samples, 3, 32, 32), dtype='uint8')
        labels = np.empty((num_samples, ), dtype='uint8')

        for i in range(1, 6):
            fpath = os.path.join(path, 'cifar-10-batches-py',
                                 'data_batch_' + str(i))
            (images[(i - 1) * 10000:i * 10000, :, :, :],
             labels[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    images = np.reshape(images.transpose(0, 2, 3, 1),
                        (-1, 3072)).astype(np.float32)
    labels = np.reshape(labels, (-1, )).astype(np.int32)

    return (zip(images, labels))
Exemplo n.º 25
0
def cifar10_load_data(path):
    """Loads CIFAR10 dataset.

  Returns:
      Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
  """
    #dirname = 'cifar-10-batches-py'
    # origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
    # path = get_file(
    #     dirname,
    #     origin=origin,
    #     untar=True,
    #     file_hash=
    #     '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce')

    num_train_samples = 50000

    x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
    y_train = np.empty((num_train_samples, ), dtype='uint8')

    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        (x_train[(i - 1) * 10000:i * 10000, :, :, :],
         y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

    fpath = os.path.join(path, 'test_batch')
    x_test, y_test = load_batch(fpath)

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    x_test = x_test.astype(x_train.dtype)
    y_test = y_test.astype(y_train.dtype)

    return (x_train, y_train), (x_test, y_test)
    def load_data(dirname):
        """Loads CIFAR10 dataset.
        Returns:
            Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
        """
        # num_train_samples = 50000
        # x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
        # y_train = np.empty((num_train_samples,), dtype='uint8')
        train_path = os.path.join(dirname, 'train')
        test_path = os.path.join(dirname, 'test')
        label_key = "fine_labels" if is_fine_label else "coarse_labels"
        x_train, y_train = load_batch(train_path, label_key=label_key)
        x_test, y_test = load_batch(test_path, label_key=label_key)

        y_train = np.reshape(y_train, (len(y_train), 1))
        y_test = np.reshape(y_test, (len(y_test), 1))

        if K.image_data_format() == 'channels_last':
            x_train = x_train.transpose(0, 2, 3, 1)
            x_test = x_test.transpose(0, 2, 3, 1)

        return (x_train, y_train), (x_test, y_test)
Exemplo n.º 27
0
def load_data(label_mode='fine', data_directory=None):
    """Loads CIFAR100 dataset. Reference: https://github.com/tensorflow/tensorflow/blob/v2.0.0/tensorflow/python/keras/datasets/cifar100.py
    Arguments:
        label_mode: one of "fine", "coarse".
    Returns:
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    Raises:
        ValueError: in case of invalid `label_mode`.
    """
    if label_mode not in ['fine', 'coarse']:
        raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

    dirname = 'cifar-100-python'
    origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
    path = get_file(
        dirname,
        origin=origin,
        untar=True,
        file_hash='85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b8'
        '55ba677a7',
        cache_dir=data_directory)

    fpath = os.path.join(path, 'train')
    x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

    fpath = os.path.join(path, 'test')
    x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

    if K.image_data_format() == 'channels_last':
        x_train = x_train.transpose(0, 2, 3, 1)
        x_test = x_test.transpose(0, 2, 3, 1)

    return (x_train, y_train), (x_test, y_test)
Exemplo n.º 28
0
def load_cifar10(split, path=None):
    if path is None:
        cache_path = os.path.join(os.path.expanduser('~'), ".capslayer")
        path = get_file('cifar-10-batches-py',
                        cache_dir=cache_path,
                        file_hash=md5sum,
                        origin=URL,
                        untar=True)

    split = split.lower()
    if split == 'test':
        fpath = os.path.join(path, 'test_batch')
        images, labels = load_batch(fpath)
    else:
        num_samples = 50000
        images = np.empty((num_samples, 3, 32, 32), dtype='uint8')
        labels = np.empty((num_samples, ), dtype='uint8')

        for i in range(1, 6):
            fpath = os.path.join(path, 'data_batch_' + str(i))
            (images[(i - 1) * 10000:i * 10000, :, :, :],
             labels[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

        idx = np.arange(len(images))
        np.random.seed(201808)
        np.random.shuffle(idx)

        images = images[idx[:45000]] if split == "train" else images[
            idx[45000:]]
        labels = labels[idx[:45000]] if split == "train" else labels[
            idx[45000:]]
    images = np.reshape(images.transpose(0, 2, 3, 1),
                        (-1, 3072)).astype(np.float32)
    labels = np.reshape(labels, (-1, )).astype(np.int32)

    return (zip(images, labels))
Exemplo n.º 29
0
def load_data():
  """Loads the CIFAR10 dataset.

  This is a dataset of 50,000 32x32 color training images and 10,000 test
  images, labeled over 10 categories. See more info at the
  [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).

  The classes are:

  | Label | Description |
  |:-----:|-------------|
  |   0   | airplane    |
  |   1   | automobile  |
  |   2   | bird        |
  |   3   | cat         |
  |   4   | deer        |
  |   5   | dog         |
  |   6   | frog        |
  |   7   | horse       |
  |   8   | ship        |
  |   9   | truck       |

  Returns:
    Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`.

  **x_train**: uint8 NumPy array of grayscale image data with shapes
    `(50000, 32, 32, 3)`, containing the training data. Pixel values range
    from 0 to 255.

  **y_train**: uint8 NumPy array of labels (integers in range 0-9)
    with shape `(50000, 1)` for the training data.

  **x_test**: uint8 NumPy array of grayscale image data with shapes
    (10000, 32, 32, 3), containing the test data. Pixel values range
    from 0 to 255.

  **y_test**: uint8 NumPy array of labels (integers in range 0-9)
    with shape `(10000, 1)` for the test data.

  Example:

  ```python
  (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
  assert x_train.shape == (50000, 32, 32, 3)
  assert x_test.shape == (10000, 32, 32, 3)
  assert y_train.shape == (50000, 1)
  assert y_test.shape == (10000, 1)
  ```
  """
  dirname = 'cifar-10-batches-py'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
  path = get_file(
      dirname,
      origin=origin,
      untar=True,
      file_hash=
      '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce')

  num_train_samples = 50000

  x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
  y_train = np.empty((num_train_samples,), dtype='uint8')

  for i in range(1, 6):
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)

  fpath = os.path.join(path, 'test_batch')
  x_test, y_test = load_batch(fpath)

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  x_test = x_test.astype(x_train.dtype)
  y_test = y_test.astype(y_train.dtype)

  return (x_train, y_train), (x_test, y_test)
Exemplo n.º 30
0
)

num_train_samples = 50000

# empty(shape[, dtype, order])
x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8')
y_train = np.empty((num_train_samples, ), dtype='uint8')

for i in range(1, 6):
    # 如果参数中某个部分是绝对路径,则绝对路径前的路径都将被丢弃,并从绝对路径部分开始连接。
    # load_batch return:
    # data = data.reshape(data.shape[0], 3, 32, 32)
    # labels = d[label_key]
    fpath = os.path.join(path, 'data_batch_' + str(i))
    (x_train[(i - 1) * 10000:i * 10000, :, :, :],
     y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath)
print('x_train.type:', type(x_train))
print('y_train.type:', type(y_train))
print('x_train.shape:', x_train.shape)
print('y_train.shape:', np.shape(y_train))

fpath = os.path.join(path, 'test_batch')
x_test, y_test = load_batch(fpath)
print('x_test.shape:', x_test.shape)
print('y_test.shape:', np.shape(y_test))

y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
print('y_train.shape:', y_train.shape)
print('y_test.shape:', y_test.shape)
Exemplo n.º 31
0
def load_data(label_mode='fine'):
  """Loads the CIFAR100 dataset.

  This is a dataset of 50,000 32x32 color training images and
  10,000 test images, labeled over 100 fine-grained classes that are
  grouped into 20 coarse-grained classes. See more info at the
  [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html).

  Args:
    label_mode: one of "fine", "coarse". If it is "fine" the category labels
      are the fine-grained labels, if it is "coarse" the output labels are the
      coarse-grained superclasses.

  Returns:
    Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`.

  **x_train**: uint8 NumPy array of grayscale image data with shapes
    `(50000, 32, 32, 3)`, containing the training data. Pixel values range
    from 0 to 255.

  **y_train**: uint8 NumPy array of labels (integers in range 0-99)
    with shape `(50000, 1)` for the training data.

  **x_test**: uint8 NumPy array of grayscale image data with shapes
    (10000, 32, 32, 3), containing the test data. Pixel values range
    from 0 to 255.

  **y_test**: uint8 NumPy array of labels (integers in range 0-99)
    with shape `(10000, 1)` for the test data.

  Example:

  ```python
  (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()
  assert x_train.shape == (50000, 32, 32, 3)
  assert x_test.shape == (10000, 32, 32, 3)
  assert y_train.shape == (50000, 1)
  assert y_test.shape == (10000, 1)
  ```
  """
  if label_mode not in ['fine', 'coarse']:
    raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.')

  dirname = 'cifar-100-python'
  origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz'
  path = get_file(
      dirname,
      origin=origin,
      untar=True,
      file_hash=
      '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7')

  fpath = os.path.join(path, 'train')
  x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels')

  fpath = os.path.join(path, 'test')
  x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels')

  y_train = np.reshape(y_train, (len(y_train), 1))
  y_test = np.reshape(y_test, (len(y_test), 1))

  if K.image_data_format() == 'channels_last':
    x_train = x_train.transpose(0, 2, 3, 1)
    x_test = x_test.transpose(0, 2, 3, 1)

  return (x_train, y_train), (x_test, y_test)