Exemplo n.º 1
0
def read_data_sets(cnt,
                   limit,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        return base.Datasets(train=train)

    train_images = extract_images('/tf/data/images.dat', cnt)
    train_labels = extract_labels('/tf/data/labels.dat',
                                  cnt,
                                  one_hot=one_hot,
                                  num_classes=limit)

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train)
Exemplo n.º 2
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    local_file = maybe_download(TRAIN_IMAGES, train_dir)
    with open(local_file, 'rb') as f:
        train_images = extract_images2(f)

    local_file = maybe_download(TRAIN_LABELS, train_dir)
    with open(local_file, 'rb') as f:
        train_labels = extract_labels2(f, one_hot=one_hot)

    local_file = maybe_download(TEST_IMAGES, train_dir)
    with open(local_file, 'rb') as f:
        test_images = extract_images2(f)

    local_file = maybe_download(TEST_LABELS, train_dir)
    with open(local_file, 'rb') as f:
        test_labels = extract_labels2(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Exemplo n.º 3
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
    VALIDATION_SIZE = 5000

    local_file = base.maybe_download(TRAIN_IMAGES, train_dir)
    train_images = extract_images(local_file)

    local_file = base.maybe_download(TRAIN_LABELS, train_dir)
    train_labels = extract_labels(local_file, one_hot=one_hot)

    local_file = base.maybe_download(TEST_IMAGES, train_dir)
    test_images = extract_images(local_file)

    local_file = base.maybe_download(TEST_LABELS, train_dir)
    test_labels = extract_labels(local_file, one_hot=one_hot)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    train = DataSet(train_images, train_labels, dtype=dtype)
    validation = DataSet(validation_images, validation_labels, dtype=dtype)
    test = DataSet(test_images, test_labels, dtype=dtype)

    return base.Datasets(train=train, validation=validation, test=test)
Exemplo n.º 4
0
def read_data_set_one_image(img, one_hot=False,
                   reshape=True,
                   dtype=dtypes.float32,
                   seed=None):
    test_images, test_labels = process_image(img, one_hot=one_hot)
    test = DataSet(
        test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed)

    return base.Datasets(test=test, train = test, validation = test)
Exemplo n.º 5
0
def read_datasets():
    data_dir = "/s/red/a/nobackup/cwc/palm_recognition/dataset/"
    print "Loading data ", data_dir

    X_train = np.load(os.path.join(data_dir, "X_train.npy"))
    y_train = np.load(os.path.join(data_dir, "y_train.npy"))

    X_validation = np.load(os.path.join(data_dir, "X_validation.npy"))
    y_validation = np.load(os.path.join(data_dir, "y_validation.npy"))

    print X_train.shape, y_train.shape, X_validation.shape, y_validation.shape

    train = DataSet(X_train, y_train)
    validation = DataSet(X_validation, y_validation)
    print "Loading done"
    return base.Datasets(train=train, validation=validation, test=None)
def read_data_set(train_dir,validation_dir):
    dirs=[]
    for file in os.listdir(train_dir):  
        file_path = os.path.join(train_dir, file)  
        if os.path.isdir(file_path):  
            dirs.append(file_path+'/')
    num_dirs=len(dirs)
#     train_images=np.ndarray((0,60,60,3),dtype=np.uint8)
    train_images=np.ndarray((0,60,60),dtype=np.uint8)
    train_labels=np.ndarray((0,1),dtype=np.uint8)
    for i in range(num_dirs):
        image_data=extract_images(str(dirs[i]))
        image_labels=extract_labels(str(dirs[i]))
        train_images=np.vstack((train_images,image_data))
        train_labels=np.vstack((train_labels,image_labels))
    dirs=[]
    for file in os.listdir(validation_dir):  
        file_path = os.path.join(validation_dir, file)  
        if os.path.isdir(file_path):  
            dirs.append(file_path+'/')
#     validation_images=np.ndarray((0,60,60,3),dtype=np.uint8)
    validation_images=np.ndarray((0,60,60),dtype=np.uint8)
    validation_labels=np.ndarray((0,1),dtype=np.uint8)
    for i in range(num_dirs):
        image_data=extract_images(str(dirs[i]))
        image_labels=extract_labels(str(dirs[i]))
        validation_images=np.vstack((validation_images,image_data))
        validation_labels=np.vstack((validation_labels,image_labels))
#     if not 0 <= validation_size <= len(train_images):
#         raise ValueError(
#             'Validation size should be between 0 and {}. Received: {}.'
#             .format(len(train_images), validation_size))
    
#     validation_images = train_images[:validation_size]
#     validation_labels = train_labels[:validation_size]
#     train_images = train_images[validation_size:]
#     train_labels = train_labels[validation_size:]
    
    train = DataSet(train_images, train_labels)
    validation = DataSet(validation_images, validation_labels)
    return base.Datasets(train=train,validation=validation)
Exemplo n.º 7
0
def read_data_sets(one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=500,
                   seed=None):
    train_images, train_labels = process_images(1, one_hot=one_hot)
    validation_images, validation_labels = process_images(2, one_hot=one_hot)
    test_images, test_labels = process_images(3, one_hot=one_hot)
    print ('test_images', len(train_images), '-', len (validation_images), '--,', len(test_images))
    
    train = DataSet(
        train_images, train_labels, dtype=dtype, reshape=reshape, seed=seed)
    validation = DataSet(
        validation_images,
        validation_labels,
        dtype=dtype,
        reshape=reshape,
        seed=seed)
    test = DataSet(
        test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed)

    return base.Datasets(train=train, validation=validation, test=test)
Exemplo n.º 8
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=tf.float32,
                   reshape=True,
                   validation_size=5000,
                   seed=None,
                   source_url=DEFAULT_SOURCE_URL):
  if fake_data:

    def fake():
      return DataSet(
          [], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  if not source_url:  # empty string check
    source_url = DEFAULT_SOURCE_URL

  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
  TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                   source_url + TRAIN_IMAGES)
  with tf.gfile.Open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                   source_url + TRAIN_LABELS)
  with tf.gfile.Open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                   source_url + TEST_IMAGES)
  with tf.gfile.Open(local_file, 'rb') as f:
    test_images = extract_images(f)

  local_file = base.maybe_download(TEST_LABELS, train_dir,
                                   source_url + TEST_LABELS)
  with tf.gfile.Open(local_file, 'rb') as f:
    test_labels = extract_labels(f, one_hot=one_hot)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError('Validation size should be between 0 and {}. Received: {}.'
                     .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  options = dict(dtype=dtype, reshape=reshape, seed=seed)

  train = DataSet(train_images, train_labels, **options)
  validation = DataSet(validation_images, validation_labels, **options)
  test = DataSet(test_images, test_labels, **options)

  return base.Datasets(train=train, validation=validation, test=test)
Exemplo n.º 9
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)
    normal_const = 20502  #To normalize the input value
    #loading training dataset
    train_values = []  #for training dataset values
    train_labels = []  #for training dataset labels
    for file in glob.glob(train_dir + "/*type1*"):
        f = gzip.open(file)
        a = pickle.load(f)
        for i in range(0, len(a[0])):
            train_values.append(a[0][i] / normal_const)
            train_labels.append(a[1][i])
    #print(len(train_dataset_values), len(train_dataset_labels))
    train_values = numpy.array(train_values)
    train_labels = numpy.array(train_labels, numpy.uint16)
    train_labels = train_labels - 1  #CLASSES [0,NUM_CLASSES)
    train = DataSet(train_values, train_labels, dtype=dtype)
    print("Number of training dataset: %d " % len(train_values))
    #print(train_labels)
    #loading valiation dataset
    validation_values = []  #for training dataset values
    validation_labels = []  #for training dataset labels
    for file in glob.glob(train_dir + "/*type2*"):
        f = gzip.open(file)
        a = pickle.load(f)
        for i in range(0, len(a[0])):
            validation_values.append(a[0][i] / normal_const)
            validation_labels.append(a[1][i])
    #print(len(train_dataset_values), len(train_dataset_labels))
    validation_values = numpy.array(validation_values)
    validation_labels = numpy.array(validation_labels, dtype=numpy.uint16)
    validation_labels = validation_labels - 1  #CLASSES [0,NUM_CLASSES)
    validation = DataSet(validation_values, validation_labels, dtype=dtype)
    print("Number of valiation dataset: %d " % len(validation_values))
    #loading test dataset
    test_values = []  #for training dataset values
    test_labels = []  #for training dataset labels
    for file in glob.glob(train_dir + "/*type3*"):
        f = gzip.open(file)
        a = pickle.load(f)
        for i in range(0, len(a[0])):
            test_values.append(a[0][i] / normal_const)
            test_labels.append(a[1][i])
    #print(len(train_dataset_values), len(train_dataset_labels))
    test_values = numpy.array(test_values)
    test_labels = numpy.array(test_labels, dtype=numpy.uint16)
    test_labels = test_labels - 1  #CLASSES [0,NUM_CLASSES)
    test = DataSet(test_values, test_labels, dtype=dtype)
    print("Number of test dataset: %d " % len(test_values))
    return base.Datasets(train=train, validation=validation, test=test)
Exemplo n.º 10
0
def read_data_sets(cnt, path, dtype=dtypes.float32, reshape=True):
    train_images = extract_images(path, cnt)

    train = DataSet(train_images, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train)
Exemplo n.º 11
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
  if fake_data:
    def fake():
      return DataSet([], [], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
  TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                   SOURCE_URL + TRAIN_IMAGES)
  with open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                   SOURCE_URL + TRAIN_LABELS)
  with open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  TRAIN_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/train/DATA/'
  VALIDATION_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/validation/DATA/'
  

  local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                   SOURCE_URL + TEST_IMAGES)
  with open(local_file, 'rb') as f:
    test_images = extract_images(f)

  local_file = base.maybe_download(TEST_LABELS, train_dir,
                                   SOURCE_URL + TEST_LABELS)
  with open(local_file, 'rb') as f:
    test_labels = extract_labels(f, one_hot=one_hot)

  TEST_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/test/DATA/'
  test_barcodes = extract_barcodes(TEST_FILEPATH)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))


  # print(train_images.shape)
  # print(train_barcodes.shape)
  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  validation_barcodes = extract_barcodes(VALIDATION_FILEPATH)
  # TODO get validation barcodes
  # validation_barcodes = train_barcodes[:validation_size]

  # print(train_images.shape)
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]
  train_barcodes = extract_barcodes(TRAIN_FILEPATH)
  # train_barcodes = train_barcodes


  train = DataSet(train_images, train_labels, train_barcodes, dtype=dtype, reshape=reshape)


  validation = DataSet(validation_images,
                       validation_labels,
                       validation_barcodes,
                       dtype=dtype,
                       reshape=reshape)
  test = DataSet(test_images, test_labels, test_barcodes, dtype=dtype, reshape=reshape)

  # print(train_images.num_examples)
  # print(validation.num_examples)
  # print(test.num_examples)

  return base.Datasets(train=train, validation=validation, test=test)