Пример #1
0
def compute_caltech_features():
    caltech = datasets.TwoLayerDataset(FLAGS.root, ['jpg'], max_size=300)
    conv = pipeline.ConvLayer([
        dsift.DsiftExtractor(FLAGS.sift_size, FLAGS.sift_stride),
        pipeline.LLCEncoder({'k': FLAGS.llc_k},
                            trainer=pipeline.KmeansTrainer(
                                {'k': FLAGS.dict_size})),
        pipeline.PyramidPooler({
            'level': 3,
            'method': 'max'
        })
    ])
    conv.train(caltech, 400000)
    feat = conv.process_dataset(caltech, as_2d=True)

    mpi.mkdir(FLAGS.feature_dir)
    if mpi.is_root():
        with (open(os.path.join(FLAGS.feature_dir, FLAGS.model_file),
                   'w')) as fid:
            pickle.dump(conv, fid)

    mpi.dump_matrix_multi(feat,
                          os.path.join(FLAGS.feature_dir, FLAGS.feature_file))
    mpi.dump_matrix_multi(caltech.labels(),
                          os.path.join(FLAGS.feature_dir, FLAGS.label_file))
Пример #2
0
 def dump(self, target_folder):
     """Dump the current images to the target folder
     """
     mpi.mkdir(target_folder)
     for idx in range(self.size()):
         name = self._raw_name[idx]
         mpi.mkdir(os.path.join(target_folder, os.path.dirname(name)))
         misc.imsave(os.path.join(target_folder, name),\
                     self._read(idx))
Пример #3
0
 def dump(self, target_folder):
     """Dump the current images to the target folder
     """
     mpi.mkdir(target_folder)
     for idx in range(self.size()):
         name = self._raw_name[idx]
         mpi.mkdir(os.path.join(target_folder, os.path.dirname(name)))
         misc.imsave(os.path.join(target_folder, name),\
                     self._read(idx))
Пример #4
0
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([6,6], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})), # Does whitening
        pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': True},
                    trainer = pipeline.OMPTrainer(
                            {'k': 800, 'max_iter':100})), # does encoding
            pipeline.SpatialPooler({'grid': (2,2), 'method': 'ave'}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 50000)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'r') as fid:
        conv = pickle.load(fid)
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = True)
    mpi.dump_matrix_multi(Xtrain,
                          os.path.join(FLAGS.output_dir, 
                                       FLAGS.feature_file+'_train'))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = True)
    mpi.dump_matrix_multi(Xtest,
                          os.path.join(FLAGS.output_dir, 
                                       FLAGS.feature_file+'_test'))
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \
            / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \
            / float(len(Ytest))
    
    logging.info('Training accuracy: %f' % accu)
    logging.info('Testing accuracy: %f' % accu_test)
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info("Loading cifar data...")
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    # try: use sub images
    # cifar = datasets.SubImageSet(cifar, [28,28], 1)
    # cifar_test = datasets.CenterRegionSet(cifar_test, [28,28])

    conv = pipeline.ConvLayer(
        [
            pipeline.PatchExtractor([6, 6], 1),  # extracts patches
            pipeline.MeanvarNormalizer({"reg": 10}),  # normalizes the patches
            pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({"reg": 0.1})),  # Does whitening
            pipeline.ThresholdEncoder(
                {"alpha": 0.25, "twoside": True}, trainer=pipeline.OMPTrainer({"k": 1600, "max_iter": 100})
            ),  # does encoding
            pipeline.SpatialPooler({"grid": (4, 4), "method": "max"}),  # average pool
        ]
    )
    logging.info("Training the pipeline...")
    conv.train(cifar, 400000)
    logging.info("Dumping the pipeline...")
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), "w") as fid:
            pickle.dump(conv, fid)
            fid.close()
    logging.info("Extracting features...")
    Xtrain = conv.process_dataset(cifar, as_2d=True)
    mpi.dump_matrix_multi(Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_train"))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=True)
    mpi.dump_matrix_multi(Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + "_test"))
    Ytest = cifar_test.labels().astype(np.int)
    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.005)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), "w") as fid:
            pickle.dump({"m": m, "std": std, "w": w, "b": b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain, w) + b).argmax(axis=1)) / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest, w) + b).argmax(axis=1)) / float(len(Ytest))

    logging.info("Training accuracy: %f" % accu)
    logging.info("Testing accuracy: %f" % accu_test)
def compute_caltech_features():
    caltech = datasets.TwoLayerDataset(FLAGS.root, ["jpg"], max_size=300)
    conv = pipeline.ConvLayer(
        [
            dsift.DsiftExtractor(FLAGS.sift_size, FLAGS.sift_stride),
            pipeline.LLCEncoder({"k": FLAGS.llc_k}, trainer=pipeline.KmeansTrainer({"k": FLAGS.dict_size})),
            pipeline.PyramidPooler({"level": 3, "method": "max"}),
        ]
    )
    conv.train(caltech, 400000)
    feat = conv.process_dataset(caltech, as_2d=True)

    mpi.mkdir(FLAGS.feature_dir)
    if mpi.is_root():
        with (open(os.path.join(FLAGS.feature_dir, FLAGS.model_file), "w")) as fid:
            pickle.dump(conv, fid)

    mpi.dump_matrix_multi(feat, os.path.join(FLAGS.feature_dir, FLAGS.feature_file))
    mpi.dump_matrix_multi(caltech.labels(), os.path.join(FLAGS.feature_dir, FLAGS.label_file))
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    
    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1})
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = trainer),
            pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False},
                    trainer = pipeline.OMPTrainer(
                         {'k': 100, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = True)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = True)
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    """
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    logging.info("Dump oriol")
    mpi.root_pickle((eigval, eigvec), 'cifar_dump_oriol.pickle')
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002,
                                     fminargs={'maxfun': 4000})
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
Пример #8
0
 def testMkdir(self):
     mpi.mkdir(_MPI_TEST_DIR)
     self.assertTrue(os.path.exists(_MPI_TEST_DIR))
import cPickle as pickle
from matplotlib import pyplot
from iceberk import visualize, mpi
from scipy import misc
import numpy as np

import matplotlib
matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True

mpi.mkdir('distribution')

pyplot.ion()

dictionary, before_pooling, after_pooling = pickle.load(open('distribution_before_after_pooling.pickle'))
after_pooling -= after_pooling.min(0)

corr_before = np.corrcoef(before_pooling.T)
corr_after = np.corrcoef(after_pooling.T)

# do random sampling for visualization
before_pooling = before_pooling[np.random.randint(before_pooling.shape[0], size=1000)]
after_pooling = after_pooling[np.random.randint(after_pooling.shape[0], size=1000)]

vis = visualize.PatchVisualizer()
im = vis.show_multiple(dictionary)
misc.imsave('distribution/1.png', im[:8,:8])
misc.imsave('distribution/2.png', im[:8,-8:])
misc.imsave('distribution/3.png', im[-8:,:8])
misc.imsave('distribution/4.png', im[-8:,-8:])
Пример #10
0
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({
            'size': (FLAGS.patch, FLAGS.patch),
            'reg': 0.1
        })
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch],
                                1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=trainer),
        pipeline.ThresholdEncoder({
            'alpha': 0.0,
            'twoside': False
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': FLAGS.fromdim,
                                      'max_iter': 100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)
    if False:
        # directly do dimensionality reduction
        eigval, eigvec = np.linalg.eigh(covmat)
        U = eigvec[:, -FLAGS.todim:]
        Xtrain = np.dot(Xtrain, U)
        Xtest = np.dot(Xtest, U)
    else:
        # do subsampling
        import code_ap
        temp = code_ap.code_af(Xtrain, FLAGS.todim)
        sel = temp[0]
        sel = mpi.COMM.bcast(sel)
        Cpred = covmat[sel]
        Csel = Cpred[:, sel]
        W = np.linalg.solve(Csel, Cpred)
        # perform svd
        U, D, _ = np.linalg.svd(W, full_matrices=0)
        U *= D
        Xtrain = np.dot(Xtrain[:, sel], U)
        Xtest = np.dot(Xtest[:, sel], U)
    Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0])
    Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0])
    """
    # This part is used to do post-pooling over all features nystrom subsampling
    # normalization
    Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))
    Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):]
    #U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """

    w, b = classifier.l2svm_onevsall(Xtrain,
                                     Ytrain,
                                     0.002,
                                     fminargs={
                                         'disp': 0,
                                         'maxfun': 1000
                                     })
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
Пример #11
0
     pipeline.MeanvarNormalizer({'reg': 0.01}),
     pipeline.LinearEncoder({},
                trainer = pipeline.ZcaTrainer({'reg': 0.01})),
     pipeline.ReLUEncoder({'twoside': False},
                trainer = pipeline.NormalizedKmeansTrainer({'k': 1600, 'max_iter': 100})),
     pipeline.PyramidPooler({'level': 3, 'method': 'max'})
    ], prev = CONV)

CONV_SPM_GAMMA = 0.01

logging.debug('Loading data...')
train_data = visiondata.CUBDataset(ROOT, True, crop = CROP, subset = SUBSET,
                                  target_size = TARGET_SIZE, prefetch = True)
test_data = visiondata.CUBDataset(ROOT, False, crop = CROP, subset = SUBSET,
                                 target_size = TARGET_SIZE, prefetch = True)
mpi.mkdir(CONVOLUTION_OUTPUT)
if MIRRORED:
    train_data = datasets.MirrorSet(train_data)
    # note that we do not mirror test data.
logging.debug('Training convolutional NN...')
CONV.train(train_data, 400000, exhaustive = True)
CONV2.train(train_data, 400000, exhaustive = True)

mpi.root_pickle(CONV2, CONVOLUTION_FILE)
Xtrain = CONV2.process_dataset(train_data)
Ytrain = train_data.labels().astype(np.int)
Xtest = CONV2.process_dataset(test_data)
Ytest = test_data.labels().astype(np.int)

# normalization
m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
Пример #12
0
CONV_SPM_GAMMA = 0.01

logging.debug('Loading data...')
train_data = visiondata.CUBDataset(ROOT,
                                   True,
                                   crop=CROP,
                                   subset=SUBSET,
                                   target_size=TARGET_SIZE,
                                   prefetch=True)
test_data = visiondata.CUBDataset(ROOT,
                                  False,
                                  crop=CROP,
                                  subset=SUBSET,
                                  target_size=TARGET_SIZE,
                                  prefetch=True)
mpi.mkdir(CONVOLUTION_OUTPUT)
if MIRRORED:
    train_data = datasets.MirrorSet(train_data)
    # note that we do not mirror test data.
logging.debug('Training convolutional NN...')
CONV.train(train_data, 400000, exhaustive=True)
CONV2.train(train_data, 400000, exhaustive=True)

mpi.root_pickle(CONV2, CONVOLUTION_FILE)
Xtrain = CONV2.process_dataset(train_data)
Ytrain = train_data.labels().astype(np.int)
Xtest = CONV2.process_dataset(test_data)
Ytest = test_data.labels().astype(np.int)

# normalization
m, std = classifier.feature_meanstd(Xtrain, reg=0.01)
Пример #13
0
import cPickle as pickle
from iceberk import visualize, mpi
from scipy import misc
import numpy as np

import matplotlib
from matplotlib import pyplot

matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True

mpi.mkdir('centroids')
mpi.mkdir('distribution')

dictionary, ap_result = pickle.load(open('cvpr_exemplar_centroids.pickle'))
vis = visualize.PatchVisualizer()
im = vis.show_multiple(dictionary, bg_func=np.max)
misc.imsave('centroids/dictionary.png', im)

im = vis.show_multiple(dictionary[ap_result[0]], bg_func=np.max)
misc.imsave('centroids/dictionary_major.png', im)

eigval, eigval_rec, eigval_random = pickle.load(
    open('cvpr_exemplar_centroids_covmat_eigvals.pickle'))
eigval = np.sort(eigval)[::-1]
eigval_rec = np.sort(eigval_rec)[::-1]
eigval_random = np.sort(eigval_random)[::-1]
fig = pyplot.figure()
pyplot.plot(np.log(eigval[:600]), 'g-', lw=2)
pyplot.plot(np.log(eigval[:256]), 'b-.', lw=2)
Пример #14
0
from jiayq.experiments.feature_selection import pcfs
import logging
import numpy as np
import os

if mpi.is_root():
    logging.basicConfig(level=logging.DEBUG)

stl_folder = '/u/vis/x1/common/STL_10/stl10_matlab'

NUM_REDUCED_DICT = 64
model_file_first = '/u/vis/ttmp/jiayq/stl/conv.pickle'
model_file_second = '/u/vis/ttmp/jiayq/stl/conv_second.pickle'
order_file = '/u/vis/ttmp/jiayq/stl/order.npy'
covmat_file = '/u/vis/ttmp/jiayq/stl/covmat.npy'
mpi.mkdir('/u/vis/ttmp/jiayq/stl/')

logging.info("Loading stl dataset...")
stl = visiondata.STL10Dataset(stl_folder, 'unlabeled')

################################################################################
# Train the first layer 
################################################################################
if os.path.exists(model_file_first):
    logging.info("skipping the first layer training...")
    conv = pickle.load(open(model_file_first,'r'))
else:
    logging.info("Setting up the convolutional layer...")
    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([5, 5], 1),
            pipeline.MeanvarNormalizer({'reg': 10}),
import cPickle as pickle
from iceberk import visualize, mpi
from scipy import misc
import numpy as np

import matplotlib
from matplotlib import pyplot

matplotlib.rcParams["ps.useafm"] = True
matplotlib.rcParams["pdf.use14corefonts"] = True
matplotlib.rcParams["text.usetex"] = True

mpi.mkdir("centroids")
mpi.mkdir("distribution")

dictionary, ap_result = pickle.load(open("cvpr_exemplar_centroids.pickle"))
vis = visualize.PatchVisualizer()
im = vis.show_multiple(dictionary, bg_func=np.max)
misc.imsave("centroids/dictionary.png", im)

im = vis.show_multiple(dictionary[ap_result[0]], bg_func=np.max)
misc.imsave("centroids/dictionary_major.png", im)

eigval, eigval_rec, eigval_random = pickle.load(open("cvpr_exemplar_centroids_covmat_eigvals.pickle"))
eigval = np.sort(eigval)[::-1]
eigval_rec = np.sort(eigval_rec)[::-1]
eigval_random = np.sort(eigval_random)[::-1]
fig = pyplot.figure()
pyplot.plot(np.log(eigval[:600]), "g-", lw=2)
pyplot.plot(np.log(eigval[:256]), "b-.", lw=2)
pyplot.plot(np.log(eigval_rec[:256]), "r--", lw=2)
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    
    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({'size': (FLAGS.patch, FLAGS.patch), 'reg': 0.1})
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = trainer),
            pipeline.ThresholdEncoder({'alpha': 0.0, 'twoside': False},
                    trainer = pipeline.OMPTrainer(
                         {'k': FLAGS.fromdim, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = False)
    Ytest = cifar_test.labels().astype(np.int)
    
    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])
    
    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    if False:
        # directly do dimensionality reduction
        eigval, eigvec = np.linalg.eigh(covmat)
        U = eigvec[:, -FLAGS.todim:]
        Xtrain = np.dot(Xtrain, U)
        Xtest = np.dot(Xtest, U)
    else:
        # do subsampling
        import code_ap
        temp = code_ap.code_af(Xtrain, FLAGS.todim)
        sel = temp[0]
        sel = mpi.COMM.bcast(sel)
        Cpred = covmat[sel]
        Csel = Cpred[:,sel]
        W = np.linalg.solve(Csel, Cpred)
        # perform svd
        U, D, _ = np.linalg.svd(W, full_matrices = 0)
        U *= D
        Xtrain = np.dot(Xtrain[:, sel], U)
        Xtest = np.dot(Xtest[:, sel], U)
    Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0])
    Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0])
    
    
    """
    # This part is used to do post-pooling over all features nystrom subsampling
    # normalization
    Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))
    Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):]
    #U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """
    
    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.002,
                                     fminargs={'disp': 0, 'maxfun': 1000})
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
Пример #17
0
 def testMkdir(self):
     mpi.mkdir(_MPI_TEST_DIR)
     self.assertTrue(os.path.exists(_MPI_TEST_DIR))
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    conv = pipeline.ConvLayer([
            pipeline.PatchExtractor([6, 6], 1), # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
            pipeline.LinearEncoder({},
                    trainer = pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False},
                    trainer = pipeline.NormalizedKmeansTrainer(
                         {'k': FLAGS.fromdim, 'max_iter':100})),
            pipeline.SpatialPooler({'grid': (FLAGS.grid, FLAGS.grid), 'method': FLAGS.method}) # average pool
            ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive = True)
    
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d = False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d = False)
    Ytest = cifar_test.labels().astype(np.int)
    
    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])
    
    # not only do we remove the mean of each feature, we remove the mean of each data point similar to
    # contrast normalization
    Xtrain -= Xtrain.mean(axis=1)[:, np.newaxis]
    Xtest -= Xtest.mean(axis=1)[:, np.newaxis]
    
    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    
    covmat = mathutil.mpi_cov(Xtrain)
    
    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(Xtrain, current_dim)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                sel = mpi.COMM.bcast(sel)
                Cpred = covmat[sel]
                Csel = Cpred[:,sel]
                W = np.linalg.solve(Csel, Cpred)
                # perform svd
                U, D, _ = np.linalg.svd(W, full_matrices = 0)
                U *= D
                Xtrain_red = np.dot(Xtrain[:, sel], U)
                Xtest_red = np.dot(Xtest[:, sel], U)
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0], Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
            
        w, b = classifier.l2svm_onevsall(Xtrain_red, Ytrain, 0.005,
                                         fminargs={'disp': 0, 'maxfun': 1000})
        accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' % (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' % (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
Пример #19
0
gflags.DEFINE_string("model", "", "The model file")
gflags.DEFINE_string("folder", "", "The input folder that contains the data")
gflags.DEFINE_string("output", "", "The output folder")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

if FLAGS.folder == "" or FLAGS.model == "":
    sys.exit(1)

model = np.load(FLAGS.model)
w = model['w']
b = model['b']

if not os.path.exists(FLAGS.output):
    mpi.mkdir(FLAGS.output)

files = glob.glob(os.path.join(FLAGS.folder, '*.mat'))
files.sort()

for i in range(mpi.RANK, len(files), mpi.SIZE):
    file = files[i]
    print '%d / %d: %s' % (i, len(files), file)
    fid = h5py.File(file, 'r')
    features = fid['features']
    pred = np.dot(features, w)
    pred += b
    fidout = h5py.File(os.path.join(FLAGS.output, os.path.basename(file)), 'w')
    fidout['pred'] = pred
    fid.close()
    fidout.close()
Пример #20
0
from jiayq.experiments.feature_selection import pcfs
import logging
import numpy as np
import os

if mpi.is_root():
    logging.basicConfig(level=logging.DEBUG)

stl_folder = '/u/vis/x1/common/STL_10/stl10_matlab'

NUM_REDUCED_DICT = 64
model_file_first = '/u/vis/ttmp/jiayq/stl/conv.pickle'
model_file_second = '/u/vis/ttmp/jiayq/stl/conv_second.pickle'
order_file = '/u/vis/ttmp/jiayq/stl/order.npy'
covmat_file = '/u/vis/ttmp/jiayq/stl/covmat.npy'
mpi.mkdir('/u/vis/ttmp/jiayq/stl/')

logging.info("Loading stl dataset...")
stl = visiondata.STL10Dataset(stl_folder, 'unlabeled')

################################################################################
# Train the first layer
################################################################################
if os.path.exists(model_file_first):
    logging.info("skipping the first layer training...")
    conv = pickle.load(open(model_file_first, 'r'))
else:
    logging.info("Setting up the convolutional layer...")
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([5, 5], 1),
        pipeline.MeanvarNormalizer({'reg': 10}),
Пример #21
0
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer(
            {'reg': 0.1})),  # Does whitening
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': True
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': 800,
                                      'max_iter': 100
                                  })),  # does encoding
        pipeline.SpatialPooler({
            'grid': (2, 2),
            'method': 'ave'
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 50000)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),
                  'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), 'r') as fid:
        conv = pickle.load(fid)
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=True)
    mpi.dump_matrix_multi(
        Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_train'))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=True)
    mpi.dump_matrix_multi(
        Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_test'))
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \
            / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \
            / float(len(Ytest))

    logging.info('Training accuracy: %f' % accu)
    logging.info('Testing accuracy: %f' % accu_test)
Пример #22
0
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      FLAGS.fromdim,
                                      'max_iter':
                                      100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)

    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(Xtrain, current_dim)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                sel = mpi.COMM.bcast(sel)
                Cpred = covmat[sel]
                Csel = Cpred[:, sel]
                W = np.linalg.solve(Csel, Cpred)
                # perform svd
                U, D, _ = np.linalg.svd(W, full_matrices=0)
                U *= D
                Xtrain_red = np.dot(Xtrain[:, sel], U)
                Xtest_red = np.dot(Xtest[:, sel], U)
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])

        w, b = classifier.l2svm_onevsall(Xtrain_red,
                                         Ytrain,
                                         0.005,
                                         fminargs={
                                             'disp': 0,
                                             'maxfun': 1000
                                         })
        accu_train = classifier.Evaluator.accuracy(Ytrain,
                                                   np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest,
                                                  np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2