示例#1
0
def get_processed_dataset():

    train_path = 'pp_cifar10_train.pkl'
    test_path = 'pp_cifar10_test.pkl'

    if os.path.exists(train_path) and os.path.exists(
            test_path) and not new_params:
        print 'loading preprocessed data'
        trainset = serial.load(train_path)
        testset = serial.load(test_path)

    else:
        print 'loading raw data...'

        pipeline = preprocessing.Pipeline()
        pipeline.items.append(
            preprocessing.ExtractPatchesWithPosition(
                patch_shape=patch_shape, patches_per_image=patches_per_image))
        pipeline.items.append(
            preprocessing.GlobalContrastNormalization(sqrt_bias=10.,
                                                      use_std=True))
        pipeline.items.append(
            preprocessing.PCA(num_components=num_components,
                              keep_var_fraction=keep_var_fraction))
        pipeline.items.append(
            preprocessing.ExtractPatchPairs(
                patches_per_image=patches_per_image,
                num_images=train_size,
                input_width=input_width))

        trainset = cifar10.CIFAR10(which_set="train", start=start, stop=stop)
        testset = cifar10.CIFAR10(which_set="test")

        trainset.preprocessor = pipeline

        trainset.apply_preprocessor(preprocessor=pipeline, can_fit=True)

        # the pkl-ing is having issues, the dataset is maybe too big.
        serial.save(train_path, trainset)
        serial.save(test_path, testset)

        # this path will be used for visualizing weights after training is done
        trainset.yaml_src = '!pkl: "%s"' % train_path
        testset.yaml_src = '!pkl: "%s"' % test_path

    return trainset, testset
示例#2
0
    patch_width = 5
    num_channels = 3
    #num_components = patch_width**2 * num_channels
    num_components = 20
    keep_var_fraction = 1e4
    patches_per_image = 4


    # We'd like to do several operations on them, so we'll set up a pipeline to
    # do so.
    pipeline = preprocessing.Pipeline()

    # First we want to pull out small patches of the images, since it's easier
    # to train an RBM on these
    pipeline.items.append(
        preprocessing.ExtractPatchesWithPosition(patch_shape=(patch_width, patch_width), patches_per_image = patches_per_image)
    )

    # Next we contrast normalize the patches. The default arguments use the
    # same "regularization" parameters as those used in Adam Coates, Honglak
    # Lee, and Andrew Ng's paper "An Analysis of Single-Layer Networks in
    # Unsupervised Feature Learning"
    pipeline.items.append(preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))

    # Finally we whiten the data using ZCA. Again, the default parameters to
    # ZCA are set to the same values as those used in the previously mentioned
    # paper.
    pipeline.items.append(preprocessing.PCA(num_components = num_components, keep_var_fraction = keep_var_fraction))

    pipeline.items.append(preprocessing.ExtractPatchPairs(patches_per_image = patches_per_image, num_images = num_images)
示例#3
0
# We'll need the preprocessing module to preprocess the dataset
from pylearn2.datasets import preprocessing

if __name__ == "__main__":
    # Our raw training set is 32x32 color images
    train = cifar10.CIFAR10(which_set="train")

    # We'd like to do several operations on them, so we'll set up a pipeline to
    # do so.
    pipeline = preprocessing.Pipeline()

    # First we want to pull out small patches of the images, since it's easier
    # to train an RBM on these
    pipeline.items.append(
        preprocessing.ExtractPatchesWithPosition(patch_shape=(8, 8),
                                                 patches_per_image=3))

    # Next we contrast normalize the patches. The default arguments use the
    # same "regularization" parameters as those used in Adam Coates, Honglak
    # Lee, and Andrew Ng's paper "An Analysis of Single-Layer Networks in
    # Unsupervised Feature Learning"
    pipeline.items.append(
        preprocessing.GlobalContrastNormalization(sqrt_bias=10., use_std=True))

    # Finally we whiten the data using ZCA. Again, the default parameters to
    # ZCA are set to the same values as those used in the previously mentioned
    # paper.
    pipeline.items.append(preprocessing.PCA(keep_var_fraction=.99))

    # Here we apply the preprocessing pipeline to the dataset. The can_fit
    # argument indicates that data-driven preprocessing steps (such as the ZCA