Exemplo n.º 1
0
def pickle_load(filename='result',
                jobid='',
                sliceno=None,
                verbose=False,
                default=None,
                encoding='bytes'):
    filename = full_filename(filename, '.pickle', sliceno, jobid)
    if not filename and default is not None:
        return default
    if verbose:
        print('Pickle load "%s" ... ' % (filename, ), end='')
        t0 = time.time()
    try:
        with status('Loading ' + filename):
            with open(filename, 'rb') as fh:
                if PY3:
                    ret = pickle.load(fh, encoding=encoding)
                else:
                    ret = pickle.load(fh)
    except IOError:
        if default is not None:
            return default
        raise
    if verbose:
        print('done (%f seconds).' % (time.time() - t0, ))
    return ret
Exemplo n.º 2
0
def load_svhn(
        dataset=_get_datafolder_path()+'/svhn/',
        normalize=True,
        dequantify=True,
        extra=False):
    '''
    :param dataset:
    :param normalize:
    :param dequantify: Add uniform noise to dequantify the data following
        Uria et. al 2013
        "RNADE: The real-valued neural autoregressive density-estimator"
    :param extra: include extra svhn samples
    :return:
    '''

    if not os.path.isfile(dataset +'svhn_train.cpkl'):
        datasetfolder = os.path.dirname(dataset +'svhn_train.cpkl')
        if not os.path.exists(datasetfolder):
            os.makedirs(datasetfolder)
        _download_svhn(dataset, extra=False)

    with open(dataset +'svhn_train.cpkl', 'rb') as f:
        train_x,train_y = cPkl.load(f)
    with open(dataset +'svhn_test.cpkl', 'rb') as f:
        test_x,test_y = cPkl.load(f)

    if extra:
        if not os.path.isfile(dataset +'svhn_extra.cpkl'):
            datasetfolder = os.path.dirname(dataset +'svhn_train.cpkl')
            if not os.path.exists(datasetfolder):
                os.makedirs(datasetfolder)
            _download_svhn(dataset, extra=True)

        with open(dataset +'svhn_extra.cpkl', 'rb') as f:
            extra_x,extra_y = cPkl.load(f)
        train_x = np.concatenate([train_x,extra_x])
        train_y = np.concatenate([train_y,extra_y])

    train_x = train_x.astype('float32')
    test_x = test_x.astype('float32')
    train_y = train_y.astype('int32')
    test_y = test_y.astype('int32')

    if dequantify:
        train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32')
        test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32')

    if normalize:
        normalizer = train_x.max().astype('float32')
        train_x = train_x / normalizer
        test_x = test_x / normalizer

    return train_x, train_y, test_x, test_y
Exemplo n.º 3
0
def load_cifar10(
        dataset=_get_datafolder_path()+'/cifar10/cifar-10-python.tar.gz',
        normalize=True,
        dequantify=True):
    '''
    Loads the cifar10 dataset
    :param dataset: path to dataset file
    :param normalize: normalize the x data to the range [0,1]
    :param dequantify: Add uniform noise to dequantify the data following
        Uria et. al 2013
        "RNADE: The real-valued neural autoregressive density-estimator"
    :return: train and test data
    '''
    datasetfolder = os.path.dirname(dataset)
    batch_folder = datasetfolder+ '/cifar-10-batches-py/'
    if not os.path.isfile(dataset):
        if not os.path.exists(datasetfolder):
            os.makedirs(datasetfolder)
        _download_cifar10(dataset)

    if not os.path.isfile(batch_folder + 'data_batch_5'):
        with tarfile.open(dataset) as tar:
            tar.extractall(os.path.dirname(dataset))

    train_x, train_y = [],[]
    for i in ['1','2','3','4','5']:
        with open(batch_folder + 'data_batch_'+ i,'r') as f:
            data = cPkl.load(f)
            train_x += [data['data']]
            train_y += [data['labels']]
    train_x = np.concatenate(train_x)
    train_y = np.concatenate(train_y)


    with open(batch_folder + 'test_batch','r') as f:
        data = cPkl.load(f)
        test_x = data['data']
        test_y = np.asarray(data['labels'])

    train_x = train_x.astype('float32')
    test_x = test_x.astype('float32')
    if dequantify:
        train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32')
        test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32')
    if normalize:
        normalizer = train_x.max().astype('float32')
        train_x = train_x / normalizer
        test_x = test_x / normalizer

    train_x = train_x.reshape((50000, 3, 32, 32)).transpose(0, 2, 3, 1)
    test_x = test_x.reshape((10000, 3, 32, 32)).transpose(0, 2, 3, 1)

    return train_x.astype('float32'), train_y, test_x.astype('float32'), test_y
Exemplo n.º 4
0
def load_norb_small(
        dataset=_get_datafolder_path()+'/norb_small/norbsmall32x32.cpkl',
        dequantify=True,
        normalize=True ):
    '''
    Loads the real valued MNIST dataset
    :param dataset: path to dataset file
    :return: None
    '''
    if not os.path.isfile(dataset):
        datasetfolder = os.path.dirname(dataset)
        if not os.path.exists(datasetfolder):
            os.makedirs(datasetfolder)
        _download_norb_small(datasetfolder)

    with open(dataset,'r') as f:
        train_x, train_t, test_x, test_t = cPkl.load(f)

    if dequantify:
        train_x += np.random.uniform(0,1,size=train_x.shape).astype('float32')
        test_x += np.random.uniform(0,1,size=test_x.shape).astype('float32')
    if normalize:
        normalizer = train_x.max().astype('float32')
        train_x = train_x / normalizer
        test_x = test_x / normalizer

    return train_x, train_t, test_x, test_t
Exemplo n.º 5
0
    def _get(self, path, timeout):
        if os.path.exists(path) is False:
            # no record
            return None
        self.lock.acquire()
        try:
            # acquire lock and open
            f_lock = self._lock_file(path, False)
            datafile = open(path, 'rb')

            # read pickled object
            created_time, value = pickle.load(datafile)
            datafile.close()

            # check if value is expired
            if timeout is None:
                timeout = self.timeout
            if timeout > 0 and (time.time() - created_time) >= timeout:
                # expired! delete from cache
                value = None
                self._delete_file(path)

            # unlock and return result
            self._unlock_file(f_lock)
            return value
        finally:
            self.lock.release()
Exemplo n.º 6
0
    def _get(self, path, timeout):
        if os.path.exists(path) is False:
            # no record
            return None
        self.lock.acquire()
        try:
            # acquire lock and open
            f_lock = self._lock_file(path, False)
            datafile = open(path, 'rb')

            # read pickled object
            created_time, value = pickle.load(datafile)
            datafile.close()

            # check if value is expired
            if timeout is None:
                timeout = self.timeout
            if timeout > 0 and (time.time() - created_time) >= timeout:
                # expired! delete from cache
                value = None
                self._delete_file(path)

            # unlock and return result
            self._unlock_file(f_lock)
            return value
        finally:
            self.lock.release()
Exemplo n.º 7
0
def load_omniglot(dataset=_get_datafolder_path()+'/omniglot'):
    '''
    Loads the real valued MNIST dataset
    :param dataset: path to dataset file
    :return: None
    '''
    if not os.path.exists(dataset):
        os.makedirs(dataset)
        _download_omniglot(dataset)

    with open(dataset+'/omniglot.cpkl', 'rb') as f:
        train, test = cPkl.load(f)

    train = train.astype('float32')
    test = test.astype('float32')

    return train, test
Exemplo n.º 8
0
def load_lfw(
        dataset=_get_datafolder_path()+'/lfw/lfw',
        normalize=True,
        dequantify=True,
        size=0.25):
    '''
    :param dataset:
    :param normalize:
    :param dequantify: Add uniform noise to dequantify the data following
        Uria et. al 2013
        "RNADE: The real-valued neural autoregressive density-estimator"
    :param size: rescaling factor
    :return:
    '''

    dataset="%s_%0.2f.cpkl"%(dataset,size)
    datasetfolder = os.path.dirname(dataset)
    if not os.path.isfile(dataset):
        if not os.path.exists(datasetfolder):
            os.makedirs(datasetfolder)
        _download_lwf(dataset,size)

    if not os.path.isfile(datasetfolder + '/fixed_split.pkl'):
        urllib.urlretrieve('https://raw.githubusercontent.com/casperkaae/'
                           'extra_parmesan/master/data_splits/'
                           'lfw_fixed_split.pkl',
                           datasetfolder + '/fixed_split.pkl')


    f = gzip.open(dataset, 'rb')
    data = cPkl.load(f)[0].astype('float32')
    f.close()
    if dequantify:
        data = data + np.random.uniform(0,1,size=data.shape).astype('float32')
    if normalize:
        normalizer = data.max().astype('float32')
        data = data / normalizer
    return data
Exemplo n.º 9
0
def _unpickle(f):
    fo = open(f, 'rb')
    d = cPkl.load(fo)
    fo.close()
    return d