def read_data_sets(cnt, limit, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() return base.Datasets(train=train) train_images = extract_images('/tf/data/images.dat', cnt) train_labels = extract_labels('/tf/data/labels.dat', cnt, one_hot=one_hot, num_classes=limit) train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train)
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = maybe_download(TRAIN_IMAGES, train_dir) with open(local_file, 'rb') as f: train_images = extract_images2(f) local_file = maybe_download(TRAIN_LABELS, train_dir) with open(local_file, 'rb') as f: train_labels = extract_labels2(f, one_hot=one_hot) local_file = maybe_download(TEST_IMAGES, train_dir) with open(local_file, 'rb') as f: test_images = extract_images2(f) local_file = maybe_download(TEST_LABELS, train_dir) with open(local_file, 'rb') as f: test_labels = extract_labels2(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.'.format( len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape) return base.Datasets(train=train, validation=validation, test=test)
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' VALIDATION_SIZE = 5000 local_file = base.maybe_download(TRAIN_IMAGES, train_dir) train_images = extract_images(local_file) local_file = base.maybe_download(TRAIN_LABELS, train_dir) train_labels = extract_labels(local_file, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir) test_images = extract_images(local_file) local_file = base.maybe_download(TEST_LABELS, train_dir) test_labels = extract_labels(local_file, one_hot=one_hot) validation_images = train_images[:VALIDATION_SIZE] validation_labels = train_labels[:VALIDATION_SIZE] train_images = train_images[VALIDATION_SIZE:] train_labels = train_labels[VALIDATION_SIZE:] train = DataSet(train_images, train_labels, dtype=dtype) validation = DataSet(validation_images, validation_labels, dtype=dtype) test = DataSet(test_images, test_labels, dtype=dtype) return base.Datasets(train=train, validation=validation, test=test)
def read_data_set_one_image(img, one_hot=False, reshape=True, dtype=dtypes.float32, seed=None): test_images, test_labels = process_image(img, one_hot=one_hot) test = DataSet( test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed) return base.Datasets(test=test, train = test, validation = test)
def read_datasets(): data_dir = "/s/red/a/nobackup/cwc/palm_recognition/dataset/" print "Loading data ", data_dir X_train = np.load(os.path.join(data_dir, "X_train.npy")) y_train = np.load(os.path.join(data_dir, "y_train.npy")) X_validation = np.load(os.path.join(data_dir, "X_validation.npy")) y_validation = np.load(os.path.join(data_dir, "y_validation.npy")) print X_train.shape, y_train.shape, X_validation.shape, y_validation.shape train = DataSet(X_train, y_train) validation = DataSet(X_validation, y_validation) print "Loading done" return base.Datasets(train=train, validation=validation, test=None)
def read_data_set(train_dir,validation_dir): dirs=[] for file in os.listdir(train_dir): file_path = os.path.join(train_dir, file) if os.path.isdir(file_path): dirs.append(file_path+'/') num_dirs=len(dirs) # train_images=np.ndarray((0,60,60,3),dtype=np.uint8) train_images=np.ndarray((0,60,60),dtype=np.uint8) train_labels=np.ndarray((0,1),dtype=np.uint8) for i in range(num_dirs): image_data=extract_images(str(dirs[i])) image_labels=extract_labels(str(dirs[i])) train_images=np.vstack((train_images,image_data)) train_labels=np.vstack((train_labels,image_labels)) dirs=[] for file in os.listdir(validation_dir): file_path = os.path.join(validation_dir, file) if os.path.isdir(file_path): dirs.append(file_path+'/') # validation_images=np.ndarray((0,60,60,3),dtype=np.uint8) validation_images=np.ndarray((0,60,60),dtype=np.uint8) validation_labels=np.ndarray((0,1),dtype=np.uint8) for i in range(num_dirs): image_data=extract_images(str(dirs[i])) image_labels=extract_labels(str(dirs[i])) validation_images=np.vstack((validation_images,image_data)) validation_labels=np.vstack((validation_labels,image_labels)) # if not 0 <= validation_size <= len(train_images): # raise ValueError( # 'Validation size should be between 0 and {}. Received: {}.' # .format(len(train_images), validation_size)) # validation_images = train_images[:validation_size] # validation_labels = train_labels[:validation_size] # train_images = train_images[validation_size:] # train_labels = train_labels[validation_size:] train = DataSet(train_images, train_labels) validation = DataSet(validation_images, validation_labels) return base.Datasets(train=train,validation=validation)
def read_data_sets(one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=500, seed=None): train_images, train_labels = process_images(1, one_hot=one_hot) validation_images, validation_labels = process_images(2, one_hot=one_hot) test_images, test_labels = process_images(3, one_hot=one_hot) print ('test_images', len(train_images), '-', len (validation_images), '--,', len(test_images)) train = DataSet( train_images, train_labels, dtype=dtype, reshape=reshape, seed=seed) validation = DataSet( validation_images, validation_labels, dtype=dtype, reshape=reshape, seed=seed) test = DataSet( test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed) return base.Datasets(train=train, validation=validation, test=test)
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32, reshape=True, validation_size=5000, seed=None, source_url=DEFAULT_SOURCE_URL): if fake_data: def fake(): return DataSet( [], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) if not source_url: # empty string check source_url = DEFAULT_SOURCE_URL TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, source_url + TRAIN_IMAGES) with tf.gfile.Open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, source_url + TRAIN_LABELS) with tf.gfile.Open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) local_file = base.maybe_download(TEST_IMAGES, train_dir, source_url + TEST_IMAGES) with tf.gfile.Open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, source_url + TEST_LABELS) with tf.gfile.Open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) if not 0 <= validation_size <= len(train_images): raise ValueError('Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] options = dict(dtype=dtype, reshape=reshape, seed=seed) train = DataSet(train_images, train_labels, **options) validation = DataSet(validation_images, validation_labels, **options) test = DataSet(test_images, test_labels, **options) return base.Datasets(train=train, validation=validation, test=test)
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32): if fake_data: def fake(): return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) normal_const = 20502 #To normalize the input value #loading training dataset train_values = [] #for training dataset values train_labels = [] #for training dataset labels for file in glob.glob(train_dir + "/*type1*"): f = gzip.open(file) a = pickle.load(f) for i in range(0, len(a[0])): train_values.append(a[0][i] / normal_const) train_labels.append(a[1][i]) #print(len(train_dataset_values), len(train_dataset_labels)) train_values = numpy.array(train_values) train_labels = numpy.array(train_labels, numpy.uint16) train_labels = train_labels - 1 #CLASSES [0,NUM_CLASSES) train = DataSet(train_values, train_labels, dtype=dtype) print("Number of training dataset: %d " % len(train_values)) #print(train_labels) #loading valiation dataset validation_values = [] #for training dataset values validation_labels = [] #for training dataset labels for file in glob.glob(train_dir + "/*type2*"): f = gzip.open(file) a = pickle.load(f) for i in range(0, len(a[0])): validation_values.append(a[0][i] / normal_const) validation_labels.append(a[1][i]) #print(len(train_dataset_values), len(train_dataset_labels)) validation_values = numpy.array(validation_values) validation_labels = numpy.array(validation_labels, dtype=numpy.uint16) validation_labels = validation_labels - 1 #CLASSES [0,NUM_CLASSES) validation = DataSet(validation_values, validation_labels, dtype=dtype) print("Number of valiation dataset: %d " % len(validation_values)) #loading test dataset test_values = [] #for training dataset values test_labels = [] #for training dataset labels for file in glob.glob(train_dir + "/*type3*"): f = gzip.open(file) a = pickle.load(f) for i in range(0, len(a[0])): test_values.append(a[0][i] / normal_const) test_labels.append(a[1][i]) #print(len(train_dataset_values), len(train_dataset_labels)) test_values = numpy.array(test_values) test_labels = numpy.array(test_labels, dtype=numpy.uint16) test_labels = test_labels - 1 #CLASSES [0,NUM_CLASSES) test = DataSet(test_values, test_labels, dtype=dtype) print("Number of test dataset: %d " % len(test_values)) return base.Datasets(train=train, validation=validation, test=test)
def read_data_sets(cnt, path, dtype=dtypes.float32, reshape=True): train_images = extract_images(path, cnt) train = DataSet(train_images, dtype=dtype, reshape=reshape) return base.Datasets(train=train)
def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=dtypes.float32, reshape=True, validation_size=5000): if fake_data: def fake(): return DataSet([], [], [], fake_data=True, one_hot=one_hot, dtype=dtype) train = fake() validation = fake() test = fake() return base.Datasets(train=train, validation=validation, test=test) TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' TEST_IMAGES = 't10k-images-idx3-ubyte.gz' TEST_LABELS = 't10k-labels-idx1-ubyte.gz' local_file = base.maybe_download(TRAIN_IMAGES, train_dir, SOURCE_URL + TRAIN_IMAGES) with open(local_file, 'rb') as f: train_images = extract_images(f) local_file = base.maybe_download(TRAIN_LABELS, train_dir, SOURCE_URL + TRAIN_LABELS) with open(local_file, 'rb') as f: train_labels = extract_labels(f, one_hot=one_hot) TRAIN_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/train/DATA/' VALIDATION_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/validation/DATA/' local_file = base.maybe_download(TEST_IMAGES, train_dir, SOURCE_URL + TEST_IMAGES) with open(local_file, 'rb') as f: test_images = extract_images(f) local_file = base.maybe_download(TEST_LABELS, train_dir, SOURCE_URL + TEST_LABELS) with open(local_file, 'rb') as f: test_labels = extract_labels(f, one_hot=one_hot) TEST_FILEPATH='/Users/billvarcho/Documents/Research/MNIST/test/DATA/' test_barcodes = extract_barcodes(TEST_FILEPATH) if not 0 <= validation_size <= len(train_images): raise ValueError( 'Validation size should be between 0 and {}. Received: {}.' .format(len(train_images), validation_size)) # print(train_images.shape) # print(train_barcodes.shape) validation_images = train_images[:validation_size] validation_labels = train_labels[:validation_size] validation_barcodes = extract_barcodes(VALIDATION_FILEPATH) # TODO get validation barcodes # validation_barcodes = train_barcodes[:validation_size] # print(train_images.shape) train_images = train_images[validation_size:] train_labels = train_labels[validation_size:] train_barcodes = extract_barcodes(TRAIN_FILEPATH) # train_barcodes = train_barcodes train = DataSet(train_images, train_labels, train_barcodes, dtype=dtype, reshape=reshape) validation = DataSet(validation_images, validation_labels, validation_barcodes, dtype=dtype, reshape=reshape) test = DataSet(test_images, test_labels, test_barcodes, dtype=dtype, reshape=reshape) # print(train_images.num_examples) # print(validation.num_examples) # print(test.num_examples) return base.Datasets(train=train, validation=validation, test=test)