Пример #1
0
def get_data(height=256,
             width=256,
             n_image=200,
             patch_size=(12, 12),
             datapath='database/',
             name_database='serre07_distractors',
             max_patches=1024,
             seed=None,
             patch_norm=True,
             verbose=0,
             data_cache='/tmp/data_cache',
             matname=None):
    """
    Extract data:

    Extract from a given database composed of image of size (height, width) a
    series a random patches.

    """
    if matname is None:
        # Load natural images and extract patches
        from SLIP import Image
        slip = Image({
            'N_X': height,
            'N_Y': width,
            'white_n_learning': 0,
            'seed': seed,
            'white_N': .07,
            'white_N_0': .0,  # olshausen = 0.
            'white_f_0': .4,  # olshausen = 0.2
            'white_alpha': 1.4,
            'white_steepness': 4.,
            'datapath': datapath,
            'do_mask': True,
            'N_image': n_image
        })

        if verbose:
            import sys
            # setup toolbar
            sys.stdout.write('Extracting data...')
            sys.stdout.flush()
            sys.stdout.write(
                "\b" *
                (toolbar_width + 1))  # return to start of line, after '['
            t0 = time.time()
        import os
        imagelist = slip.make_imagelist(
            name_database=name_database)  #, seed=seed)
        for filename, croparea in imagelist:
            # whitening
            image, filename_, croparea_ = slip.patch(
                name_database,
                filename=filename,
                croparea=croparea,
                center=False)  #, seed=seed)
            image = slip.whitening(image)
            # Extract all reference patches and ravel them
            data_ = slip.extract_patches_2d(
                image, patch_size, N_patches=int(max_patches))  #, seed=seed)
            data_ = data_.reshape(data_.shape[0], -1)
            data_ -= np.mean(data_, axis=0)
            if patch_norm:
                data_ /= np.std(data_, axis=0)
            # collect everything as a matrix
            try:
                data = np.vstack((data, data_))
            except Exception:
                data = data_.copy()
            if verbose:
                # update the bar
                sys.stdout.write(filename + ", ")
                sys.stdout.flush()
        if verbose:
            dt = time.time() - t0
            sys.stdout.write("\n")
            sys.stdout.write("Data is of shape : " + str(data.shape))
            sys.stdout.write(' - done in %.2fs.' % dt)
            sys.stdout.flush()
    else:
        import os
        fmatname = os.path.join(data_cache, matname)
        if not (os.path.isfile(fmatname + '_data.npy')):
            if not (os.path.isfile(fmatname + '_data' + '_lock')):
                touch(fmatname + '_data' + '_lock')
                try:
                    if verbose:
                        print('No cache found {}: Extracting data...'.format(
                            fmatname + '_data'),
                              end=' ')
                    print(datapath)
                    data = get_data(height=height,
                                    width=width,
                                    n_image=n_image,
                                    patch_size=patch_size,
                                    datapath=datapath,
                                    name_database=name_database,
                                    max_patches=max_patches,
                                    seed=seed,
                                    patch_norm=patch_norm,
                                    verbose=verbose,
                                    matname=None)
                    np.save(fmatname + '_data.npy', data)
                finally:
                    try:
                        os.remove(fmatname + '_data' + '_lock')
                    except:
                        print('Coud not remove ', fmatname + '_data')
            else:
                print('the data extraction is locked', fmatname + '_data')
                return 'lock'
        else:
            if verbose:
                print("loading the data called : {0}".format(fmatname +
                                                             '_data'))
            # Une seule fois mp ici
            data = np.load(fmatname + '_data.npy')
    return data