def pickle_load(filename='result', jobid='', sliceno=None, verbose=False, default=None, encoding='bytes'): filename = full_filename(filename, '.pickle', sliceno, jobid) if not filename and default is not None: return default if verbose: print('Pickle load "%s" ... ' % (filename, ), end='') t0 = time.time() try: with status('Loading ' + filename): with open(filename, 'rb') as fh: if PY3: ret = pickle.load(fh, encoding=encoding) else: ret = pickle.load(fh) except IOError: if default is not None: return default raise if verbose: print('done (%f seconds).' % (time.time() - t0, )) return ret
def load_svhn( dataset=_get_datafolder_path()+'/svhn/', normalize=True, dequantify=True, extra=False): ''' :param dataset: :param normalize: :param dequantify: Add uniform noise to dequantify the data following Uria et. al 2013 "RNADE: The real-valued neural autoregressive density-estimator" :param extra: include extra svhn samples :return: ''' if not os.path.isfile(dataset +'svhn_train.cpkl'): datasetfolder = os.path.dirname(dataset +'svhn_train.cpkl') if not os.path.exists(datasetfolder): os.makedirs(datasetfolder) _download_svhn(dataset, extra=False) with open(dataset +'svhn_train.cpkl', 'rb') as f: train_x,train_y = cPkl.load(f) with open(dataset +'svhn_test.cpkl', 'rb') as f: test_x,test_y = cPkl.load(f) if extra: if not os.path.isfile(dataset +'svhn_extra.cpkl'): datasetfolder = os.path.dirname(dataset +'svhn_train.cpkl') if not os.path.exists(datasetfolder): os.makedirs(datasetfolder) _download_svhn(dataset, extra=True) with open(dataset +'svhn_extra.cpkl', 'rb') as f: extra_x,extra_y = cPkl.load(f) train_x = np.concatenate([train_x,extra_x]) train_y = np.concatenate([train_y,extra_y]) train_x = train_x.astype('float32') test_x = test_x.astype('float32') train_y = train_y.astype('int32') test_y = test_y.astype('int32') if dequantify: train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32') test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32') if normalize: normalizer = train_x.max().astype('float32') train_x = train_x / normalizer test_x = test_x / normalizer return train_x, train_y, test_x, test_y
def load_cifar10( dataset=_get_datafolder_path()+'/cifar10/cifar-10-python.tar.gz', normalize=True, dequantify=True): ''' Loads the cifar10 dataset :param dataset: path to dataset file :param normalize: normalize the x data to the range [0,1] :param dequantify: Add uniform noise to dequantify the data following Uria et. al 2013 "RNADE: The real-valued neural autoregressive density-estimator" :return: train and test data ''' datasetfolder = os.path.dirname(dataset) batch_folder = datasetfolder+ '/cifar-10-batches-py/' if not os.path.isfile(dataset): if not os.path.exists(datasetfolder): os.makedirs(datasetfolder) _download_cifar10(dataset) if not os.path.isfile(batch_folder + 'data_batch_5'): with tarfile.open(dataset) as tar: tar.extractall(os.path.dirname(dataset)) train_x, train_y = [],[] for i in ['1','2','3','4','5']: with open(batch_folder + 'data_batch_'+ i,'r') as f: data = cPkl.load(f) train_x += [data['data']] train_y += [data['labels']] train_x = np.concatenate(train_x) train_y = np.concatenate(train_y) with open(batch_folder + 'test_batch','r') as f: data = cPkl.load(f) test_x = data['data'] test_y = np.asarray(data['labels']) train_x = train_x.astype('float32') test_x = test_x.astype('float32') if dequantify: train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32') test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32') if normalize: normalizer = train_x.max().astype('float32') train_x = train_x / normalizer test_x = test_x / normalizer train_x = train_x.reshape((50000, 3, 32, 32)).transpose(0, 2, 3, 1) test_x = test_x.reshape((10000, 3, 32, 32)).transpose(0, 2, 3, 1) return train_x.astype('float32'), train_y, test_x.astype('float32'), test_y
def load_norb_small( dataset=_get_datafolder_path()+'/norb_small/norbsmall32x32.cpkl', dequantify=True, normalize=True ): ''' Loads the real valued MNIST dataset :param dataset: path to dataset file :return: None ''' if not os.path.isfile(dataset): datasetfolder = os.path.dirname(dataset) if not os.path.exists(datasetfolder): os.makedirs(datasetfolder) _download_norb_small(datasetfolder) with open(dataset,'r') as f: train_x, train_t, test_x, test_t = cPkl.load(f) if dequantify: train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32') test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32') if normalize: normalizer = train_x.max().astype('float32') train_x = train_x / normalizer test_x = test_x / normalizer return train_x, train_t, test_x, test_t
def _get(self, path, timeout): if os.path.exists(path) is False: # no record return None self.lock.acquire() try: # acquire lock and open f_lock = self._lock_file(path, False) datafile = open(path, 'rb') # read pickled object created_time, value = pickle.load(datafile) datafile.close() # check if value is expired if timeout is None: timeout = self.timeout if timeout > 0 and (time.time() - created_time) >= timeout: # expired! delete from cache value = None self._delete_file(path) # unlock and return result self._unlock_file(f_lock) return value finally: self.lock.release()
def load_omniglot(dataset=_get_datafolder_path()+'/omniglot'): ''' Loads the real valued MNIST dataset :param dataset: path to dataset file :return: None ''' if not os.path.exists(dataset): os.makedirs(dataset) _download_omniglot(dataset) with open(dataset+'/omniglot.cpkl', 'rb') as f: train, test = cPkl.load(f) train = train.astype('float32') test = test.astype('float32') return train, test
def load_lfw( dataset=_get_datafolder_path()+'/lfw/lfw', normalize=True, dequantify=True, size=0.25): ''' :param dataset: :param normalize: :param dequantify: Add uniform noise to dequantify the data following Uria et. al 2013 "RNADE: The real-valued neural autoregressive density-estimator" :param size: rescaling factor :return: ''' dataset="%s_%0.2f.cpkl"%(dataset,size) datasetfolder = os.path.dirname(dataset) if not os.path.isfile(dataset): if not os.path.exists(datasetfolder): os.makedirs(datasetfolder) _download_lwf(dataset,size) if not os.path.isfile(datasetfolder + '/fixed_split.pkl'): urllib.urlretrieve('https://raw.githubusercontent.com/casperkaae/' 'extra_parmesan/master/data_splits/' 'lfw_fixed_split.pkl', datasetfolder + '/fixed_split.pkl') f = gzip.open(dataset, 'rb') data = cPkl.load(f)[0].astype('float32') f.close() if dequantify: data = data + np.random.uniform(0,1,size=data.shape).astype('float32') if normalize: normalizer = data.max().astype('float32') data = data / normalizer return data
def _unpickle(f): fo = open(f, 'rb') d = cPkl.load(fo) fo.close() return d