def load(size=64,want_mean=False,want_dense=False): X = np.zeros((24300,size**2)) data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat','r') data = data.read() data = np.fromstring(data[24:], dtype='uint8') data = data.reshape(24300,2,96,96).astype("float32") if size==64: X = data[:,0,16:80,16:80].reshape(24300,64**2) else: for n in range(24300): X[n] = scipy.misc.imresize(data[n,0,16:80,16:80], (size,size) , 'bilinear').flatten() data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat','r') data = data.read() data = np.fromstring(data[20:], dtype='uint32') T_train_labels = data T = np.zeros((24300,5)) for n in range(24300): T[n,T_train_labels[n]] = 1 ################################### X_test = np.zeros((24300,size**2)) data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat','r') data = data.read() data = np.fromstring(data[24:], dtype='uint8') data = data.reshape(24300,2,96,96).astype("float32") if size==64: X_test = data[:,0,16:80,16:80].reshape(24300,64**2) else: for n in range(24300): X_test[n] = scipy.misc.imresize(data[n,0,16:80,16:80], (size,size) , 'bilinear').flatten() data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat','r') data = data.read() data = np.fromstring(data[20:], dtype='uint32') T_labels = data T_test = np.zeros((24300,5)) for n in range(24300): T_test[n,T_labels[n]]=1 if want_mean: X = X/255.0 X_test = X_test/255.0 X_mean= X.mean(0) X_std = X.std(0) X = (X-X_mean)/X_std X_test = (X_test-X_mean)/X_std if not want_dense: X = X.reshape(24300,1,size,size) X_test = X_test.reshape(24300,1,size,size) return X,T,X_test,T_test,T_train_labels,T_labels
def load_pylearn2(want_dense=False): f_train = open_file( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/mirzamom/splitted_train_32x32.h5", mode="r") X = np.array(f_train.root.Data.X).reshape(-1, 3, 32, 32)[:70000, :, :, :] T_train_labels = None T = f_train.root.Data.y[:10000] f_test = open_file( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/mirzamom/test_32x32.h5", mode="r") X_test = np.array(f_test.root.Data.X).reshape(-1, 3, 32, 32)[:10000, :, :, :] T_labels = None T_test = f_test.root.Data.y[:10000] if want_dense: X = X.reshape(70000, 3072) X_test = X_test.reshape(10000, 3072) return X, T, X_test, T_test, T_train_labels, T_labels
def load_extra(want_mean = False,want_dense=False): X_total = np.zeros((600000,3,32,32)) T_train_labels_total = np.zeros(600000) T_total = np.zeros((600000,10)) f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/train_32x32.mat") X = f['X'].astype("float64") X = np.swapaxes(X,0,3) X = np.swapaxes(X,1,2) X = np.swapaxes(X,2,3) X_total[:70000,:,:,:] = X[:70000,:,:,:] T_train_labels_total[:70000] = f['y'].ravel()[:70000]%10 print "Train Loaded." f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/extra_32x32.mat") X = f['X'].astype("float32") X = np.swapaxes(X,0,3) X = np.swapaxes(X,1,2) X = np.swapaxes(X,2,3) X_total[70000:600000,:,:,:] = X[:530000,:,:,:] T_train_labels_total[70000:600000] = f['y'].ravel()[:530000]%10 print "Extra Loaded." T_total = np.zeros((600000,10)) for i in range(600000): T_total[i,T_train_labels_total[i]]= 1 f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/test_32x32.mat") X_test = f['X'].astype("float32") X_test = np.swapaxes(X_test,0,3) X_test = np.swapaxes(X_test,1,2) X_test = np.swapaxes(X_test,2,3) X_test = X_test[:20000,:,:,:] T_labels = f['y'].ravel()[:20000]%10 T_test = np.zeros((20000,10)) for i in range(20000): T_test[i,T_labels[i]]= 1 print "Test Loaded." if want_mean: X_mean= X_total.mean(0) X_std = X_total.std(0) X_total = (X_total-X_mean)/X_std X_test = (X_test-X_mean)/X_std if want_dense: X_total = X_total.reshape(600000,3072) X_test = X_test.reshape(20000,3072) return X_total,T_total,X_test,T_test,T_train_labels_total,T_labels
def load(backend="numpy",binary = False, want_dense = False): s=60000 T=np.zeros((s,10)) data_=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/MNIST/train-images.idx3-ubyte','r') data=data_.read() data_=np.fromstring(data[16:], dtype='uint8') X=np.reshape(data_,(s,784))/255.0 data_=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/MNIST/train-labels.idx1-ubyte','r') data=data_.read() T_train_labels = np.fromstring(data[8:], dtype='uint8') for n in range(s): T[n,T_train_labels[n]]=1 s_test=10000 X_test=np.zeros((s_test,784)) T_test=np.zeros((s_test,10)) data_=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/MNIST/t10k-images.idx3-ubyte','r') data=data_.read() data_=np.fromstring(data[16:], dtype='uint8') X_test=np.reshape(data_,(s_test,784))/255.0 data_=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/MNIST/t10k-labels.idx1-ubyte','r') data=data_.read() T_labels = np.fromstring(data[8:], dtype='uint8') T_labels = T_labels.astype("float32") for n in range(s_test): T_test[n,T_labels[n]]=1 if binary: X[X>.5]=1 X[X<.5]=0 X_test[X_test>.5]=1.0 X_test[X_test<.5]=0.0 if want_dense==False: X = X.reshape(60000,1,28,28) X_test = X_test.reshape(10000,1,28,28) if backend=="numpy": X=nn.array(X);T=nn.array(T);X_test=nn.array(X_test);T_test=nn.array(T_test);T_train_labels=nn.array(T_train_labels);T_labels=nn.array(T_labels) if backend=="gnumpy": X=nn.garray(X);T=nn.garray(T);X_test=nn.garray(X_test);T_test=nn.garray(T_test);T_train_labels=nn.garray(T_train_labels);T_labels=nn.garray(T_labels) # print X.dtype,T.dtype return X,T,X_test,T_test,T_train_labels,T_labels
def load(backend="numpy", bias=None, raw=False): f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/Frey/frey_rawface.mat") X = f['ff'].T X_mean = X.mean(axis=1) if not raw: X_std = (X.var(1) + 10)**.5 if not raw: X = (X - X_mean[:, np.newaxis]) / X_std[:, np.newaxis] else: X = (X - X_mean[:, np.newaxis]) # M = X.mean(axis=0) # X -= M def cov(X): X_mean = X.mean(axis=0) X -= X_mean return np.dot(X.T, X) / (1.0 * X.shape[0] - 1) if not raw and bias: sigma = cov(X) u, s, v = np.linalg.svd(sigma) P = np.dot(np.dot(u, np.diag(np.sqrt(1. / (s + bias)))), u.T) X = np.dot(X, P) X = X.reshape(1965, 1, 28, 20) X = X[:, :, 3:-5, :] if backend == "numpy": return X else: return nn.garray(X)
def load(self,X,T,train_range_id,T_labels=None,offset_x=0,offset_y=0): # print train_range_id T[:]=0 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_'+str(train_range_id), 'rb') dict = cPickle.load(fo) fo.close() # print dict['data'].T.shape temp = dict['data'].T if self.bias: temp_mean=temp.mean(axis=1) temp_std = (temp.var(1)+10)**.5 temp = (temp-temp_mean[:, np.newaxis])/temp_std[:, np.newaxis] temp -= self.M temp = np.dot(temp,self.P) temp = temp.reshape(10000,3,32,32) for i in xrange(10000): if np.random.rand()>.5: # print i, for j in xrange(3): temp[i,j,:,:]=np.fliplr(temp[i,j,:,:]) # print X.shape,temp.shape X[:]=temp[:,:,offset_x:self.crop_size+offset_x,offset_y:self.crop_size+offset_y] if T_labels: T_labels[:]= dict['labels'] for i in range(10000): T[i,dict['labels'][i]]= 1
def load(want_mean=False,want_dense=False): f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/Toronto-Face/TFD_ranzato_48x48.mat") labs_ex = f['labs_ex'] # print labs_ex.shape folds = f['folds'] # print folds.shape labs_id = f['labs_id'] # print labs_id.shape X = f['images'] X = X.reshape(-1,1,48,48).astype("float32") print X.shape # print X.max() if want_mean: X_mean= X.mean(0) X_std = X.std(0) X = (X-X_mean)/X_std # X_test = (X_test-X_mean)/X_std if want_dense: X = X.reshape(-1,2304) # print X.max() return X
def load_torch(want_dense=False, want_bw=False): myFile = h5py.File( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/SVHN/torch/svhn_rgb_21.h5', 'r') X = np.array(myFile['X'])[:70000, :, :, :] T_train_labels = np.array(myFile['T_train_labels'])[:70000] T_train_labels = T_train_labels % 10 T = np.zeros((70000, 10)) for i in range(70000): T[i, T_train_labels[i]] = 1 X_test = np.array(myFile['X_test'])[:10000, :, :, :] T_labels = np.array(myFile['T_labels'])[:10000] T_labels = T_labels % 10 T_test = np.zeros((10000, 10)) for i in range(10000): T_test[i, T_labels[i]] = 1 if want_dense: X = X.reshape(70000, 3072) X_test = X_test.reshape(10000, 3072) return X, T, X_test, T_test, T_train_labels, T_labels
def load(self, X, T, train_range_id, T_labels=None): T[:] = 0 fo = np.load( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/SVHN/batches/data_batch_' + str(train_range_id) + '.npz') if self.bias: temp = fo['X'].reshape(3072, 32 * 32 * 3) temp_mean = temp.mean(axis=1) temp_std = (temp.var(1) + 10)**.5 temp = (temp - temp_mean[:, np.newaxis]) / temp_std[:, np.newaxis] temp -= self.M temp = np.dot(temp, self.P) X[:] = temp.reshape(3072, 3, 32, 32) elif self.mean: X[:] = (fo['X'] - self.M) / self.std else: X[:] = fo['X'] for i in range(3072): T[i, fo['T'][i] % 10] = 1 if T_labels != None: T_labels[:] = fo['T'] % 10
def load(backend="numpy", bias=None,raw = False): f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/Frey/frey_rawface.mat") X = f['ff'].T X_mean=X.mean(axis=1) if not raw: X_std = (X.var(1)+10)**.5 if not raw: X = (X-X_mean[:, np.newaxis])/X_std[:, np.newaxis] else: X = (X-X_mean[:, np.newaxis]) # M = X.mean(axis=0) # X -= M def cov(X): X_mean = X.mean(axis=0) X -= X_mean return np.dot(X.T,X)/(1.0*X.shape[0]-1) if not raw and bias: sigma = cov(X) u,s,v=np.linalg.svd(sigma) P = np.dot(np.dot(u,np.diag(np.sqrt(1./(s+bias)))),u.T) X=np.dot(X,P) X = X.reshape(1965,1,28,20) X = X[:,:,3:-5,:] if backend=="numpy": return X else: return nn.garray(X)
def load_torch(want_dense=False,want_bw=False): myFile = h5py.File(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/SVHN/torch/svhn_rgb_21.h5', 'r') X = np.array(myFile['X'])[:70000,:,:,:] T_train_labels = np.array(myFile['T_train_labels'])[:70000] T_train_labels = T_train_labels%10 T = np.zeros((70000,10)) for i in range(70000): T[i,T_train_labels[i]]= 1 X_test = np.array(myFile['X_test'])[:10000,:,:,:] T_labels = np.array(myFile['T_labels'])[:10000] T_labels = T_labels%10 T_test = np.zeros((10000,10)) for i in range(10000): T_test[i,T_labels[i]]= 1 if want_dense: X = X.reshape(70000,3072) X_test = X_test.reshape(10000,3072) return X,T,X_test,T_test,T_train_labels,T_labels
def __init__(self,train_range=None,test_range=None,mini_batch = None,crop_size = 24,want_auto = False,bias=None): self.bias = bias if bias: # print nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_"+str(bias) try: f = np.load(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_"+str(bias)+".npz") self.M = f['M']; self.P = f['P']; except: # print "hello" _,_,_,_,_,_,self.M,self.P = load_cifar10_adam(backend="numpy",bias = bias) print "M and P saved to NAS." np.savez(nn.nas_address+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_.1",M=M,P=P) # self.want_whiten = want_whiten self.num_threads = 5 self.want_auto = want_auto self.start = True self.switch = 0 self.train_range = train_range self.test_range = test_range assert train_range!=None self.train_range_id = self.train_range[0] self.test_range_id = self.test_range[0] self.mini_batch = mini_batch self.crop_size = crop_size self.crop_offset = (32-crop_size) shared_array_base_X0 = multiprocessing.Array(ctypes.c_double, 3*self.crop_size*self.crop_size*10000) shared_array_X0 = np.ctypeslib.as_array(shared_array_base_X0.get_obj()) self.X0 = shared_array_X0.reshape(10000,3,self.crop_size,self.crop_size) shared_array_base_X1 = multiprocessing.Array(ctypes.c_double, 3*self.crop_size*self.crop_size*10000) shared_array_X1 = np.ctypeslib.as_array(shared_array_base_X1.get_obj()) self.X1 = shared_array_X1.reshape(10000,3,self.crop_size,self.crop_size) shared_array_base_T0 = multiprocessing.Array(ctypes.c_double, 10000*10) shared_array_T0 = np.ctypeslib.as_array(shared_array_base_T0.get_obj()) self.T0 = shared_array_T0.reshape(10000,10) shared_array_base_T1 = multiprocessing.Array(ctypes.c_double, 10000*10) shared_array_T1 = np.ctypeslib.as_array(shared_array_base_T1.get_obj()) self.T1 = shared_array_T1.reshape(10000,10) if test_range!=None: shared_array_base_X_test = multiprocessing.Array(ctypes.c_double, 3*self.crop_size*self.crop_size*10000) shared_array_X_test = np.ctypeslib.as_array(shared_array_base_X_test.get_obj()) self.X_test = shared_array_X_test.reshape(10000,3,self.crop_size,self.crop_size) assert self.X_test.base.base is shared_array_base_X_test.get_obj() shared_array_base_T_test = multiprocessing.Array(ctypes.c_double, 10000*10000) shared_array_T_test = np.ctypeslib.as_array(shared_array_base_T_test.get_obj()) self.T_test = shared_array_T_test.reshape(10000,10000) assert self.T_test.base.base is shared_array_base_T_test.get_obj() shared_array_base_T_labels_test = multiprocessing.Array(ctypes.c_double, 10000) shared_array_T_labels_test = np.ctypeslib.as_array(shared_array_base_T_labels_test.get_obj()) self.T_labels_test = shared_array_T_labels_test.reshape(10000) assert self.T_labels_test.base.base is shared_array_base_T_labels_test.get_obj()
def load(want_mean=False, want_dense=False): f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/Toronto-Face/TFD_ranzato_48x48.mat" ) labs_ex = f['labs_ex'] # print labs_ex.shape folds = f['folds'] # print folds.shape labs_id = f['labs_id'] # print labs_id.shape X = f['images'] X = X.reshape(-1, 1, 48, 48).astype("float32") print X.shape # print X.max() if want_mean: X_mean = X.mean(0) X_std = X.std(0) X = (X - X_mean) / X_std # X_test = (X_test-X_mean)/X_std if want_dense: X = X.reshape(-1, 2304) # print X.max() return X
def load(want_mean=False, want_dense=False): f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/train_32x32.mat") X = f['X'].astype("float64") X = np.swapaxes(X, 0, 3) X = np.swapaxes(X, 1, 2) X = np.swapaxes(X, 2, 3) X = X[:70000, :, :, :] T_train_labels = f['y'].ravel()[:70000] % 10 T = np.zeros((70000, 10)) for i in range(70000): T[i, T_train_labels[i]] = 1 f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/test_32x32.mat") X_test = f['X'].astype("float64") X_test = np.swapaxes(X_test, 0, 3) X_test = np.swapaxes(X_test, 1, 2) X_test = np.swapaxes(X_test, 2, 3) # print X_test.shape X_test = X_test[:20000, :, :, :] T_labels = f['y'].ravel()[:20000] % 10 T_test = np.zeros((20000, 10)) for i in range(20000): T_test[i, T_labels[i]] = 1 if want_mean: X_mean = X.mean(0) X_std = X.std(0) X = (X - X_mean) / X_std X_test = (X_test - X_mean) / X_std if want_dense: X = X.reshape(70000, 3072) X_test = X_test.reshape(20000, 3072) return X, T, X_test, T_test, T_train_labels, T_labels
def load_pylearn2(want_dense=False): f_train = open_file(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/mirzamom/splitted_train_32x32.h5", mode = "r") X = np.array(f_train.root.Data.X).reshape(-1,3,32,32)[:70000,:,:,:] T_train_labels = None T = f_train.root.Data.y[:10000] f_test = open_file(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/mirzamom/test_32x32.h5", mode = "r") X_test = np.array(f_test.root.Data.X).reshape(-1,3,32,32)[:10000,:,:,:] T_labels = None T_test = f_test.root.Data.y[:10000] if want_dense: X = X.reshape(70000,3072) X_test = X_test.reshape(10000,3072) return X,T,X_test,T_test,T_train_labels,T_labels
def load(want_mean = False,want_dense=False): f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/train_32x32.mat") X = f['X'].astype("float64") X = np.swapaxes(X,0,3) X = np.swapaxes(X,1,2) X = np.swapaxes(X,2,3) X = X[:70000,:,:,:] T_train_labels = f['y'].ravel()[:70000]%10 T = np.zeros((70000,10)) for i in range(70000): T[i,T_train_labels[i]]= 1 f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/test_32x32.mat") X_test = f['X'].astype("float64") X_test = np.swapaxes(X_test,0,3) X_test = np.swapaxes(X_test,1,2) X_test = np.swapaxes(X_test,2,3) # print X_test.shape X_test = X_test[:20000,:,:,:] T_labels = f['y'].ravel()[:20000]%10 T_test = np.zeros((20000,10)) for i in range(20000): T_test[i,T_labels[i]]= 1 if want_mean: X_mean= X.mean(0) X_std = X.std(0) X = (X-X_mean)/X_std X_test = (X_test-X_mean)/X_std if want_dense: X = X.reshape(70000,3072) X_test = X_test.reshape(20000,3072) return X,T,X_test,T_test,T_train_labels,T_labels
def load_file_svhn_contrast_extra(): myFile = h5py.File(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/SVHN/extra_contrast21.01_new.h5', 'r') X = myFile['X'] T = myFile['T'] X_test = myFile['X_test'] T_test = myFile['T_test'] T_train_labels = myFile['T_train_labels'] T_labels = myFile['T_labels'] # if want_dense: # X = X_cn.reshape(600000,3072) # X_test = X_test_cn.reshape(10000,3072) return X,T,X_test,T_test,T_train_labels,T_labels
def load_file_svhn_contrast_extra(): myFile = h5py.File( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/SVHN/extra_contrast21.01_new.h5', 'r') X = myFile['X'] T = myFile['T'] X_test = myFile['X_test'] T_test = myFile['T_test'] T_train_labels = myFile['T_train_labels'] T_labels = myFile['T_labels'] # if want_dense: # X = X_cn.reshape(600000,3072) # X_test = X_test_cn.reshape(10000,3072) return X, T, X_test, T_test, T_train_labels, T_labels
def load_extra_torch(): myFile = h5py.File( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/SVHN/torch/svhn_extra_rgb_13.h5', 'r') # myFile = h5py.File('svhn_old.h5', 'r') X = np.array(myFile['X']) # temp = X[10000:10900,:,:,:] # nn.show_images(temp,(30,30)); plt.show() T_train_labels = np.array(myFile['T_train_labels']) T_train_labels = T_train_labels % 10 # print T_train_labels[100000:1000010] print "dataset loaded" T = np.zeros((600000, 10)) for i in range(600000): # if i%10000==0: # print i,T_train_labels[i:i+10] T[i, T_train_labels[i]] = 1 X_test = np.array(myFile['X_test'])[:10000, :, :, :] T_labels = np.array(myFile['T_labels'])[:10000] T_labels = T_labels % 10 T_test = np.zeros((10000, 10)) for i in range(10000): T_test[i, T_labels[i]] = 1 # if want_bw: # X = X[:,:1,:,:].reshape(70000,1024) # X_test = X_test[:,:1,:,:].reshape(70000,1024) # return X,T,X_test,T_test,T_train_labels,T_labels # if want_dense: # X = X.reshape(70000,3072) # X_test = X_test.reshape(10000,3072) return X, T, X_test, T_test, T_train_labels, T_labels
def load(self,X,T,train_range_id,T_labels=None): T[:]=0 fo = np.load(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/SVHN/batches/data_batch_'+str(train_range_id)+'.npz') if self.bias: temp = fo['X'].reshape(3072,32*32*3) temp_mean=temp.mean(axis=1) temp_std = (temp.var(1)+10)**.5 temp = (temp-temp_mean[:, np.newaxis])/temp_std[:, np.newaxis] temp -= self.M temp = np.dot(temp,self.P) X[:] = temp.reshape(3072,3,32,32) elif self.mean: X[:] = (fo['X']-self.M)/self.std else: X[:] = fo['X'] for i in range(3072): T[i,fo['T'][i]%10]= 1 if T_labels!=None: T_labels[:] = fo['T']%10
def load_extra_torch(): myFile = h5py.File(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/SVHN/torch/svhn_extra_rgb_13.h5', 'r') # myFile = h5py.File('svhn_old.h5', 'r') X = np.array(myFile['X']) # temp = X[10000:10900,:,:,:] # nn.show_images(temp,(30,30)); plt.show() T_train_labels = np.array(myFile['T_train_labels']) T_train_labels = T_train_labels%10 # print T_train_labels[100000:1000010] print "dataset loaded" T = np.zeros((600000,10)) for i in range(600000): # if i%10000==0: # print i,T_train_labels[i:i+10] T[i,T_train_labels[i]]= 1 X_test = np.array(myFile['X_test'])[:10000,:,:,:] T_labels = np.array(myFile['T_labels'])[:10000] T_labels = T_labels%10 T_test = np.zeros((10000,10)) for i in range(10000): T_test[i,T_labels[i]]= 1 # if want_bw: # X = X[:,:1,:,:].reshape(70000,1024) # X_test = X_test[:,:1,:,:].reshape(70000,1024) # return X,T,X_test,T_test,T_train_labels,T_labels # if want_dense: # X = X.reshape(70000,3072) # X_test = X_test.reshape(10000,3072) return X,T,X_test,T_test,T_train_labels,T_labels
def load(self, X, T, train_range_id, T_labels=None, offset_x=0, offset_y=0): # print train_range_id T[:] = 0 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_' + str(train_range_id), 'rb') dict = cPickle.load(fo) fo.close() # print dict['data'].T.shape temp = dict['data'].T if self.bias: temp_mean = temp.mean(axis=1) temp_std = (temp.var(1) + 10)**.5 temp = (temp - temp_mean[:, np.newaxis]) / temp_std[:, np.newaxis] temp -= self.M temp = np.dot(temp, self.P) temp = temp.reshape(10000, 3, 32, 32) for i in xrange(10000): if np.random.rand() > .5: # print i, for j in xrange(3): temp[i, j, :, :] = np.fliplr(temp[i, j, :, :]) # print X.shape,temp.shape X[:] = temp[:, :, offset_x:self.crop_size + offset_x, offset_y:self.crop_size + offset_y] if T_labels: T_labels[:] = dict['labels'] for i in range(10000): T[i, dict['labels'][i]] = 1
def load_norb(size=64,mode="single",want_dense=False): rnd_permute = np.arange(24300) data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat','r') data = data.read() data = np.fromstring(data[20:], dtype='uint32') T_train_labels = data[rnd_permute] if (mode == "single" or mode == "parallel" or mode == "binocular"): T = np.zeros((24300,5)) for n in range(24300): T[n,T_train_labels[n]] = 1 elif mode == "serial": T_train_labels = np.concatenate((T_train_labels,T_train_labels),axis=1) T = np.zeros((48600,5)) for n in range(48600): T[n,T_train_labels[n]]=1 if mode == "single": X = np.zeros((24300,size**2)) elif mode == "parallel": X = np.zeros((24300,2*size**2)) elif mode == "serial": X = np.zeros((48600,size**2)) elif mode == "binocular": X = np.zeros((24300,2*size**2)) data_ = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat','r') data_ = data_.read() data_ = np.fromstring(data_[24:], dtype='uint8') data_ = np.reshape(data_,(24300,2,96,96)) #X = X[rnd_permute,:] if mode == "serial": data = data_[:,0,16:80,16:80] for n in range(24300): X[n] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() data = data_[:,1,16:80,16:80] for n in range(24300,48600): X[n] = scipy.misc.imresize(data[n-24300,:,:], (size,size) , 'bilinear').flatten() elif mode == "parallel": data = data_[:,0,16:80,16:80] for n in range(24300): X[n][:size**2] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() data = data_[:,1,16:80,16:80] for n in range(24300): X[n][-size**2:] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() elif mode == "single": data = data_[:,0,16:80,16:80] for n in range(24300): X[n] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() elif mode == "binocular": data0 = data_[:,0,16:80,16:80] data1 = data_[:,1,16:80,16:80] for n in range(24300): a = scipy.misc.imresize(data0[n,:,:], (size,size) , 'bilinear') b = scipy.misc.imresize(data1[n,:,:], (size,size) , 'bilinear') X[n] = np.concatenate((a,b),axis=1).ravel() # X = X/255.0 data=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat','r') data=data.read() data=np.fromstring(data[20:], dtype='uint32') T_labels=data if (mode == "single" or mode == "parallel" or mode == "binocular"): T_test = np.zeros((24300,5)) for n in range(24300): T_test[n,T_labels[n]]=1 elif mode == "serial": T_labels = np.concatenate((T_labels,T_labels),axis=1) T_test = np.zeros((48600,5)) for n in range(48600): T_test[n,T_labels[n]]=1 # print T_test.shape if mode == "single": X_test = np.zeros((24300,size**2)) elif mode == "parallel": X_test = np.zeros((24300,2*size**2)) elif mode == "serial": X_test = np.zeros((48600,size**2)) elif mode == "binocular": X_test = np.zeros((24300,2*size**2)) data_=open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat','r') data_=data_.read() data_=np.fromstring(data_[24:], dtype='uint8') data_=np.reshape(data_,(24300,2,96,96)) data=data_[:,0,16:80,16:80] if mode == "serial": data = data_[:,0,16:80,16:80] for n in range(24300): X_test[n] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() data = data_[:,1,16:80,16:80] for n in range(24300,48600): X_test[n] = scipy.misc.imresize(data[n-24300,:,:], (size,size) , 'bilinear').flatten() elif mode == "parallel": data = data_[:,0,16:80,16:80] for n in range(24300): X_test[n][:resize**2] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() data = data_[:,1,16:80,16:80] for n in range(24300): X_test[n][-resize**2:] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() elif mode == "single": data = data_[:,0,16:80,16:80] for n in range(24300): X_test[n] = scipy.misc.imresize(data[n,:,:], (size,size) , 'bilinear').flatten() elif mode == "binocular": data0 = data_[:,0,16:80,16:80] data1 = data_[:,1,16:80,16:80] for n in range(24300): a = scipy.misc.imresize(data0[n,:,:], (size,size) , 'bilinear') b = scipy.misc.imresize(data1[n,:,:], (size,size) , 'bilinear') X_test[n] = np.concatenate((a,b),axis=1).ravel() # X = X[rnd_permute,:] # X_test = X_test/255.0 # print X.shape # print X_test.shape # if backend=="numpy": X=np.array(X,dtype);T=np.array(T,dtype);X_test=np.array(X_test,dtype);T_test=np.array(T_test,dtype);T_train_labels=np.array(T_train_labels,dtype);T_labels=np.array(T_labels,dtype) # if backend=="gnumpy": X=gp.garray(X);T=gp.garray(T) # if backend=="gnumpy": X=gp.garray(X);T=gp.garray(T);X_test=gp.garray(X_test);T_test=gp.garray(T_test);T_train_labels=gp.garray(T_train_labels);T_labels=gp.garray(T_labels) #return X,T X = X.astype("float32") X_test = X_test.astype("float32") X = X/255.0 X_test = X_test/255.0 if not want_dense: X = X.reshape(-1,1,size,size) X_test = X_test.reshape(-1,1,size,size) return X,T,X_test,T_test,T_train_labels,T_labels
def __init__(self, train_range=None, test_range=None, mini_batch=None, crop_size=24, want_auto=False, bias=None): self.bias = bias if bias: # print nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_"+str(bias) try: f = np.load( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_" + str(bias) + ".npz") self.M = f['M'] self.P = f['P'] except: # print "hello" _, _, _, _, _, _, self.M, self.P = load_cifar10_adam( backend="numpy", bias=bias) print "M and P saved to NAS." np.savez(nn.nas_address + "/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_.1", M=M, P=P) # self.want_whiten = want_whiten self.num_threads = 5 self.want_auto = want_auto self.start = True self.switch = 0 self.train_range = train_range self.test_range = test_range assert train_range != None self.train_range_id = self.train_range[0] self.test_range_id = self.test_range[0] self.mini_batch = mini_batch self.crop_size = crop_size self.crop_offset = (32 - crop_size) shared_array_base_X0 = multiprocessing.Array( ctypes.c_double, 3 * self.crop_size * self.crop_size * 10000) shared_array_X0 = np.ctypeslib.as_array( shared_array_base_X0.get_obj()) self.X0 = shared_array_X0.reshape(10000, 3, self.crop_size, self.crop_size) shared_array_base_X1 = multiprocessing.Array( ctypes.c_double, 3 * self.crop_size * self.crop_size * 10000) shared_array_X1 = np.ctypeslib.as_array( shared_array_base_X1.get_obj()) self.X1 = shared_array_X1.reshape(10000, 3, self.crop_size, self.crop_size) shared_array_base_T0 = multiprocessing.Array( ctypes.c_double, 10000 * 10) shared_array_T0 = np.ctypeslib.as_array( shared_array_base_T0.get_obj()) self.T0 = shared_array_T0.reshape(10000, 10) shared_array_base_T1 = multiprocessing.Array( ctypes.c_double, 10000 * 10) shared_array_T1 = np.ctypeslib.as_array( shared_array_base_T1.get_obj()) self.T1 = shared_array_T1.reshape(10000, 10) if test_range != None: shared_array_base_X_test = multiprocessing.Array( ctypes.c_double, 3 * self.crop_size * self.crop_size * 10000) shared_array_X_test = np.ctypeslib.as_array( shared_array_base_X_test.get_obj()) self.X_test = shared_array_X_test.reshape( 10000, 3, self.crop_size, self.crop_size) assert self.X_test.base.base is shared_array_base_X_test.get_obj( ) shared_array_base_T_test = multiprocessing.Array( ctypes.c_double, 10000 * 10000) shared_array_T_test = np.ctypeslib.as_array( shared_array_base_T_test.get_obj()) self.T_test = shared_array_T_test.reshape(10000, 10000) assert self.T_test.base.base is shared_array_base_T_test.get_obj( ) shared_array_base_T_labels_test = multiprocessing.Array( ctypes.c_double, 10000) shared_array_T_labels_test = np.ctypeslib.as_array( shared_array_base_T_labels_test.get_obj()) self.T_labels_test = shared_array_T_labels_test.reshape(10000) assert self.T_labels_test.base.base is shared_array_base_T_labels_test.get_obj( )
def load(): f = scipy.io.loadmat(nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/Natural/IMAGES.mat") X = f["IMAGES"] X = X.reshape(512 ** 2, 10).T.reshape(10, 1, 512, 512) print X.max() return X
def load(backend="numpy", want_mean=True, want_dense=False): work_address = os.environ["WORK"] X = np.zeros((50000, 3072)) T = np.zeros((50000, 10)) T_train_labels = np.zeros(50000) X_test = np.zeros((10000, 3072)) T_test = np.zeros((10000, 10)) T_labels = np.zeros(10000) # nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/alex/ fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_1', 'rb') dict = cPickle.load(fo) fo.close() X[:10000] = dict['data'].T T_train_labels[:10000] = dict['labels'] for i in range(10000): T[i, dict['labels'][i]] = 1 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_2', 'rb') dict = cPickle.load(fo) fo.close() X[10000:20000] = dict['data'].T T_train_labels[10000:20000] = dict['labels'] for i in range(10000): T[i + 10000, dict['labels'][i]] = 1 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_3', 'rb') dict = cPickle.load(fo) fo.close() X[20000:30000] = dict['data'].T T_train_labels[20000:30000] = dict['labels'] for i in range(10000): T[i + 20000, dict['labels'][i]] = 1 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_4', 'rb') dict = cPickle.load(fo) fo.close() X[30000:40000] = dict['data'].T T_train_labels[30000:40000] = dict['labels'] for i in range(10000): T[i + 30000, dict['labels'][i]] = 1 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_5', 'rb') dict = cPickle.load(fo) fo.close() X[40000:50000] = dict['data'].T T_train_labels[40000:50000] = dict['labels'] for i in range(10000): T[i + 40000, dict['labels'][i]] = 1 fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_6', 'rb') dict = cPickle.load(fo) fo.close() X_test[:10000] = dict['data'].T T_labels[:10000] = dict['labels'] for i in range(10000): T_test[i, dict['labels'][i]] = 1 if want_mean: fo = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/batches.meta', 'rb') dict = cPickle.load(fo) fo.close() X_mean = dict['data_mean'] X -= X_mean.T X_test -= X_mean.T # print X_mean.max() X = X / 255.0 X_test = X_test / 255.0 # print "Dataset mean subtracted." else: pass # print "Dataset mean NOT subtracted." if not want_dense: X = X.reshape(50000, 3, 32, 32) X_test = X_test.reshape(10000, 3, 32, 32) if backend == "numpy": X = np.array(X) T = np.array(T) X_test = np.array(X_test) T_test = np.array(T_test) T_train_labels = np.array(T_train_labels) T_labels = np.array(T_labels) if backend == "gnumpy": X = gp.garray(X) T = gp.garray(T) X_test = gp.garray(X_test) T_test = gp.garray(T_test) T_train_labels = gp.garray(T_train_labels) T_labels = gp.garray(T_labels) return X, T, X_test, T_test, T_train_labels, T_labels
def load_extra(want_mean=False, want_dense=False): X_total = np.zeros((600000, 3, 32, 32)) T_train_labels_total = np.zeros(600000) T_total = np.zeros((600000, 10)) f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/train_32x32.mat") X = f['X'].astype("float64") X = np.swapaxes(X, 0, 3) X = np.swapaxes(X, 1, 2) X = np.swapaxes(X, 2, 3) X_total[:70000, :, :, :] = X[:70000, :, :, :] T_train_labels_total[:70000] = f['y'].ravel()[:70000] % 10 print "Train Loaded." f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/extra_32x32.mat") X = f['X'].astype("float32") X = np.swapaxes(X, 0, 3) X = np.swapaxes(X, 1, 2) X = np.swapaxes(X, 2, 3) X_total[70000:600000, :, :, :] = X[:530000, :, :, :] T_train_labels_total[70000:600000] = f['y'].ravel()[:530000] % 10 print "Extra Loaded." T_total = np.zeros((600000, 10)) for i in range(600000): T_total[i, T_train_labels_total[i]] = 1 f = scipy.io.loadmat( nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/test_32x32.mat") X_test = f['X'].astype("float32") X_test = np.swapaxes(X_test, 0, 3) X_test = np.swapaxes(X_test, 1, 2) X_test = np.swapaxes(X_test, 2, 3) X_test = X_test[:20000, :, :, :] T_labels = f['y'].ravel()[:20000] % 10 T_test = np.zeros((20000, 10)) for i in range(20000): T_test[i, T_labels[i]] = 1 print "Test Loaded." if want_mean: X_mean = X_total.mean(0) X_std = X_total.std(0) X_total = (X_total - X_mean) / X_std X_test = (X_test - X_mean) / X_std if want_dense: X_total = X_total.reshape(600000, 3072) X_test = X_test.reshape(20000, 3072) return X_total, T_total, X_test, T_test, T_train_labels_total, T_labels
def __init__(self, train_range=None, test_range=None, mini_batch=None, want_auto=False, bias=None, mean=False): self.bias = bias self.mean = mean if bias: # print nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_"+str(bias) try: f = np.load(nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/MP_" + str(bias) + ".npz") self.M = f['M'] self.P = f['P'] except: print "Started computing M and P for bias=", bias self.M, self.P = svhn_MP(bias=bias) print "M and P saved to NAS for bias=", bias np.savez(nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/MP_" + str(bias), M=self.M, P=self.P) elif mean: f = np.load(nn.nas_address() + "/PSI-Share-no-backup/Ali/Dataset/SVHN/mean.npz") self.M = f['svhn_mean'] self.std = f['svhn_std'] self.want_auto = want_auto self.start = True self.switch = 0 self.train_range = train_range self.test_range = test_range assert train_range != None self.train_range_id = self.train_range[0] self.test_range_id = self.test_range[0] self.mini_batch = mini_batch shared_array_base_X0 = multiprocessing.Array( ctypes.c_double, 3 * 32 * 32 * 3072) shared_array_X0 = np.ctypeslib.as_array( shared_array_base_X0.get_obj()) self.X0 = shared_array_X0.reshape(3072, 3, 32, 32) shared_array_base_X1 = multiprocessing.Array( ctypes.c_double, 3 * 32 * 32 * 3072) shared_array_X1 = np.ctypeslib.as_array( shared_array_base_X1.get_obj()) self.X1 = shared_array_X1.reshape(3072, 3, 32, 32) shared_array_base_T0 = multiprocessing.Array( ctypes.c_double, 3072 * 10) shared_array_T0 = np.ctypeslib.as_array( shared_array_base_T0.get_obj()) self.T0 = shared_array_T0.reshape(3072, 10) shared_array_base_T1 = multiprocessing.Array( ctypes.c_double, 3072 * 10) shared_array_T1 = np.ctypeslib.as_array( shared_array_base_T1.get_obj()) self.T1 = shared_array_T1.reshape(3072, 10) if test_range != None: shared_array_base_X_test = multiprocessing.Array( ctypes.c_double, 3 * 32 * 32 * 3072) shared_array_X_test = np.ctypeslib.as_array( shared_array_base_X_test.get_obj()) self.X_test = shared_array_X_test.reshape(3072, 3, 32, 32) assert self.X_test.base.base is shared_array_base_X_test.get_obj( ) shared_array_base_T_test = multiprocessing.Array( ctypes.c_double, 3072 * 10) shared_array_T_test = np.ctypeslib.as_array( shared_array_base_T_test.get_obj()) self.T_test = shared_array_T_test.reshape(3072, 10) assert self.T_test.base.base is shared_array_base_T_test.get_obj( ) shared_array_base_T_labels_test = multiprocessing.Array( ctypes.c_double, 3072) shared_array_T_labels_test = np.ctypeslib.as_array( shared_array_base_T_labels_test.get_obj()) self.T_labels_test = shared_array_T_labels_test.reshape(3072) assert self.T_labels_test.base.base is shared_array_base_T_labels_test.get_obj( )
def __init__(self,train_range=None,test_range=None,mini_batch = None,want_auto = False,bias=None,mean=False): self.bias = bias self.mean = mean if bias: # print nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/CIFAR10/MP_"+str(bias) try: f = np.load(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/MP_"+str(bias)+".npz") self.M = f['M']; self.P = f['P']; except: print "Started computing M and P for bias=",bias self.M,self.P = svhn_MP(bias = bias) print "M and P saved to NAS for bias=",bias np.savez(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/MP_"+str(bias),M=self.M,P=self.P) elif mean: f = np.load(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/SVHN/mean.npz") self.M = f['svhn_mean'] self.std = f['svhn_std'] self.want_auto = want_auto self.start = True self.switch = 0 self.train_range = train_range self.test_range = test_range assert train_range!=None self.train_range_id = self.train_range[0] self.test_range_id = self.test_range[0] self.mini_batch = mini_batch shared_array_base_X0 = multiprocessing.Array(ctypes.c_double, 3*32*32*3072) shared_array_X0 = np.ctypeslib.as_array(shared_array_base_X0.get_obj()) self.X0 = shared_array_X0.reshape(3072,3,32,32) shared_array_base_X1 = multiprocessing.Array(ctypes.c_double, 3*32*32*3072) shared_array_X1 = np.ctypeslib.as_array(shared_array_base_X1.get_obj()) self.X1 = shared_array_X1.reshape(3072,3,32,32) shared_array_base_T0 = multiprocessing.Array(ctypes.c_double, 3072*10) shared_array_T0 = np.ctypeslib.as_array(shared_array_base_T0.get_obj()) self.T0 = shared_array_T0.reshape(3072,10) shared_array_base_T1 = multiprocessing.Array(ctypes.c_double, 3072*10) shared_array_T1 = np.ctypeslib.as_array(shared_array_base_T1.get_obj()) self.T1 = shared_array_T1.reshape(3072,10) if test_range!=None: shared_array_base_X_test = multiprocessing.Array(ctypes.c_double, 3*32*32*3072) shared_array_X_test = np.ctypeslib.as_array(shared_array_base_X_test.get_obj()) self.X_test = shared_array_X_test.reshape(3072,3,32,32) assert self.X_test.base.base is shared_array_base_X_test.get_obj() shared_array_base_T_test = multiprocessing.Array(ctypes.c_double, 3072*10) shared_array_T_test = np.ctypeslib.as_array(shared_array_base_T_test.get_obj()) self.T_test = shared_array_T_test.reshape(3072,10) assert self.T_test.base.base is shared_array_base_T_test.get_obj() shared_array_base_T_labels_test = multiprocessing.Array(ctypes.c_double, 3072) shared_array_T_labels_test = np.ctypeslib.as_array(shared_array_base_T_labels_test.get_obj()) self.T_labels_test = shared_array_T_labels_test.reshape(3072) assert self.T_labels_test.base.base is shared_array_base_T_labels_test.get_obj()
def load(): f = scipy.io.loadmat(nn.nas_address()+"/PSI-Share-no-backup/Ali/Dataset/Natural/IMAGES.mat") X = f['IMAGES'] X = X.reshape(512**2,10).T.reshape(10,1,512,512) print X.max() return X
def load(backend="numpy",want_mean=True,want_dense = False): work_address = os.environ["WORK"] X=np.zeros((50000,3072)) T=np.zeros((50000,10)) T_train_labels=np.zeros(50000) X_test=np.zeros((10000,3072)) T_test=np.zeros((10000,10)) T_labels=np.zeros(10000) # nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/alex/ fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_1', 'rb') dict = cPickle.load(fo) fo.close() X[:10000]=dict['data'].T T_train_labels[:10000]= dict['labels'] for i in range(10000): T[i,dict['labels'][i]]= 1 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_2', 'rb') dict = cPickle.load(fo) fo.close() X[10000:20000]=dict['data'].T T_train_labels[10000:20000]= dict['labels'] for i in range(10000): T[i+10000,dict['labels'][i]]= 1 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_3', 'rb') dict = cPickle.load(fo) fo.close() X[20000:30000]=dict['data'].T T_train_labels[20000:30000]= dict['labels'] for i in range(10000): T[i+20000,dict['labels'][i]]= 1 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_4', 'rb') dict = cPickle.load(fo) fo.close() X[30000:40000]=dict['data'].T T_train_labels[30000:40000]= dict['labels'] for i in range(10000): T[i+30000,dict['labels'][i]]= 1 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_5', 'rb') dict = cPickle.load(fo) fo.close() X[40000:50000]=dict['data'].T T_train_labels[40000:50000]= dict['labels'] for i in range(10000): T[i+40000,dict['labels'][i]]= 1 fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/data_batch_6', 'rb') dict = cPickle.load(fo) fo.close() X_test[:10000]=dict['data'].T T_labels[:10000]= dict['labels'] for i in range(10000): T_test[i,dict['labels'][i]]= 1 if want_mean: fo = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/CIFAR10/batches/batches.meta', 'rb') dict = cPickle.load(fo) fo.close() X_mean=dict['data_mean'] X-=X_mean.T X_test-=X_mean.T # print X_mean.max() X = X/255.0 X_test = X_test/255.0 # print "Dataset mean subtracted." else: pass # print "Dataset mean NOT subtracted." if not want_dense: X = X.reshape(50000,3,32,32) X_test = X_test.reshape(10000,3,32,32) if backend=="numpy": X=np.array(X);T=np.array(T);X_test=np.array(X_test);T_test=np.array(T_test);T_train_labels=np.array(T_train_labels);T_labels=np.array(T_labels) if backend=="gnumpy": X=gp.garray(X);T=gp.garray(T);X_test=gp.garray(X_test);T_test=gp.garray(T_test);T_train_labels=gp.garray(T_train_labels);T_labels=gp.garray(T_labels) return X,T,X_test,T_test,T_train_labels,T_labels
def load(backend="numpy", binary=False, want_dense=False): s = 60000 T = np.zeros((s, 10)) data_ = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/MNIST/train-images.idx3-ubyte', 'r') data = data_.read() data_ = np.fromstring(data[16:], dtype='uint8') X = np.reshape(data_, (s, 784)) / 255.0 data_ = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/MNIST/train-labels.idx1-ubyte', 'r') data = data_.read() T_train_labels = np.fromstring(data[8:], dtype='uint8') for n in range(s): T[n, T_train_labels[n]] = 1 s_test = 10000 X_test = np.zeros((s_test, 784)) T_test = np.zeros((s_test, 10)) data_ = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/MNIST/t10k-images.idx3-ubyte', 'r') data = data_.read() data_ = np.fromstring(data[16:], dtype='uint8') X_test = np.reshape(data_, (s_test, 784)) / 255.0 data_ = open( nn.nas_address() + '/PSI-Share-no-backup/Ali/Dataset/MNIST/t10k-labels.idx1-ubyte', 'r') data = data_.read() T_labels = np.fromstring(data[8:], dtype='uint8') T_labels = T_labels.astype("float32") for n in range(s_test): T_test[n, T_labels[n]] = 1 if binary: X[X > .5] = 1 X[X < .5] = 0 X_test[X_test > .5] = 1.0 X_test[X_test < .5] = 0.0 if want_dense == False: X = X.reshape(60000, 1, 28, 28) X_test = X_test.reshape(10000, 1, 28, 28) if backend == "numpy": X = nn.array(X) T = nn.array(T) X_test = nn.array(X_test) T_test = nn.array(T_test) T_train_labels = nn.array(T_train_labels) T_labels = nn.array(T_labels) if backend == "gnumpy": X = nn.garray(X) T = nn.garray(T) X_test = nn.garray(X_test) T_test = nn.garray(T_test) T_train_labels = nn.garray(T_train_labels) T_labels = nn.garray(T_labels) # print X.dtype,T.dtype return X, T, X_test, T_test, T_train_labels, T_labels
def load(size=64,want_mean=False,want_dense=False,serial=True): if serial: X = np.zeros((48600,size**2)) else: X = np.zeros((24300,size**2)) data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-dat.mat','r') data = data.read() data = np.fromstring(data[24:], dtype='uint8') data = data.reshape(24300,2,96,96).astype("float32") for n in range(24300): X[n] = scipy.misc.imresize(data[n,0,16:80,16:80], (size,size) , 'bilinear').flatten() if serial: for n in range(24300): X[n+24300] = scipy.misc.imresize(data[n,1,16:80,16:80], (size,size) , 'bilinear').flatten() data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x46789x9x18x6x2x96x96-training-cat.mat','r') data = data.read() data = np.fromstring(data[20:], dtype='uint32') T_train_labels[:24300] = data if serial: T_train_labels[24300:] = data if serial: T = np.zeros((48600,5)) for n in range(48600): T[n,T_train_labels[n]] = 1 else: T = np.zeros((24300,5)) for n in range(24300): T[n,T_train_labels[n]] = 1 ################################### if serial: X_test = np.zeros((48600,size**2)) else: X_test = np.zeros((24300,size**2)) data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-dat.mat','r') data = data.read() data = np.fromstring(data[24:], dtype='uint8') data = data.reshape(24300,2,96,96).astype("float32") for n in range(24300): X_test[n] = scipy.misc.imresize(data[n,0,16:80,16:80], (size,size) , 'bilinear').flatten() if serial: for n in range(24300): X_test[n+24300] = scipy.misc.imresize(data[n,1,16:80,16:80], (size,size) , 'bilinear').flatten() data = open(nn.nas_address()+'/PSI-Share-no-backup/Ali/Dataset/NORB/smallnorb-5x01235x9x18x6x2x96x96-testing-cat.mat','r') data = data.read() data = np.fromstring(data[20:], dtype='uint32') T_labels[:24300] = data if serial: T_labels[24300:] = data if serial: T_test = np.zeros((48600,5)) for n in range(48600): T_test[n,T_labels[n]]=1 else: T_test = np.zeros((24300,5)) for n in range(24300): T_test[n,T_labels[n]]=1 if want_mean: X_mean= X.mean(0) X_std = X.std(0) X = (X-X_mean)/X_std X_test = (X_test-X_mean)/X_std if not want_dense: X = X.reshape(24300,1,size,size) X_test = X_test.reshape(24300,1,size,size) return X,T,X_test,T_test,T_train_labels,T_labels