示例#1
0
def compute_caltech_features():
    caltech = datasets.TwoLayerDataset(FLAGS.root, ['jpg'], max_size=300)
    conv = pipeline.ConvLayer([
        dsift.DsiftExtractor(FLAGS.sift_size, FLAGS.sift_stride),
        pipeline.LLCEncoder({'k': FLAGS.llc_k},
                            trainer=pipeline.KmeansTrainer(
                                {'k': FLAGS.dict_size})),
        pipeline.PyramidPooler({
            'level': 3,
            'method': 'max'
        })
    ])
    conv.train(caltech, 400000)
    feat = conv.process_dataset(caltech, as_2d=True)

    mpi.mkdir(FLAGS.feature_dir)
    if mpi.is_root():
        with (open(os.path.join(FLAGS.feature_dir, FLAGS.model_file),
                   'w')) as fid:
            pickle.dump(conv, fid)

    mpi.dump_matrix_multi(feat,
                          os.path.join(FLAGS.feature_dir, FLAGS.feature_file))
    mpi.dump_matrix_multi(caltech.labels(),
                          os.path.join(FLAGS.feature_dir, FLAGS.label_file))
示例#2
0
Xtrain += m
sampler = mathutil.ReservoirSampler(2000)
for i in range(covmat.shape[0]):
    label = ap_result[1][i]
    centroid_id = ap_result[0][label]
    if centroid_id != i:
        sampler.consider(Xtrain[:, [i, centroid_id]])
mpi.dump_matrix(sampler.get(), 'cvpr_exemplar_centroids_distribution_within_cluster_postpooling.npy')
sampler = mathutil.ReservoirSampler(2000)
for i in range(len(ap_result[0])):
    for j in range(i+1, len(ap_result[0])):
        sampler.consider(Xtrain[:,[ap_result[0][i],ap_result[0][j]]])
mpi.dump_matrix(sampler.get(), 'cvpr_exemplar_centroids_distribution_between_cluster_postpooling.npy')

# clean up something for the large sampler
del Xtrain
del Cpred
del Csel
del Crecon
del sampler

sampler = mathutil.ReservoirSampler(2000)
temp = pipeline.ConvLayer(conv[:-1]).sample(cifar, 200000, True)
for i in range(covmat.shape[0]):
    label = ap_result[1][i]
    centroid_id = ap_result[0][label]
    if centroid_id != i:
        sampler.consider(temp[:, [i, centroid_id]])
mpi.dump_matrix(sampler.get(), 'cvpr_exemplar_centroids_distribution_within_cluster_prepooling.npy')

示例#3
0
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    if FLAGS.trainer == "pink":
        trainer = pinker.SpatialPinkTrainer({
            'size': (FLAGS.patch, FLAGS.patch),
            'reg': 0.1
        })
    else:
        trainer = pipeline.ZcaTrainer({'reg': 0.1})

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch],
                                1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=trainer),
        pipeline.ThresholdEncoder({
            'alpha': 0.0,
            'twoside': False
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': FLAGS.fromdim,
                                      'max_iter': 100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)
    if False:
        # directly do dimensionality reduction
        eigval, eigvec = np.linalg.eigh(covmat)
        U = eigvec[:, -FLAGS.todim:]
        Xtrain = np.dot(Xtrain, U)
        Xtest = np.dot(Xtest, U)
    else:
        # do subsampling
        import code_ap
        temp = code_ap.code_af(Xtrain, FLAGS.todim)
        sel = temp[0]
        sel = mpi.COMM.bcast(sel)
        Cpred = covmat[sel]
        Csel = Cpred[:, sel]
        W = np.linalg.solve(Csel, Cpred)
        # perform svd
        U, D, _ = np.linalg.svd(W, full_matrices=0)
        U *= D
        Xtrain = np.dot(Xtrain[:, sel], U)
        Xtest = np.dot(Xtest[:, sel], U)
    Xtrain.resize(Ytrain.shape[0], Xtrain.size / Ytrain.shape[0])
    Xtest.resize(Ytest.shape[0], Xtest.size / Ytest.shape[0])
    """
    # This part is used to do post-pooling over all features nystrom subsampling
    # normalization
    Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))
    Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))
    m, std = classifier.feature_meanstd(Xtrain, reg = 0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std
    
    covmat = mathutil.mpi_cov(Xtrain)
    eigval, eigvec = np.linalg.eigh(covmat)
    U = eigvec[:, -(200*FLAGS.grid*FLAGS.grid):]
    #U = eigvec[:,-400:] * np.sqrt(eigval[-400:])
    Xtrain = np.dot(Xtrain, U)
    Xtest = np.dot(Xtest, U)
    """

    w, b = classifier.l2svm_onevsall(Xtrain,
                                     Ytrain,
                                     0.002,
                                     fminargs={
                                         'disp': 0,
                                         'maxfun': 1000
                                     })
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
import treereg

########
# Settings
########

ROOT = "/u/vis/x1/common/CUB_200_2011/"
FEATDIR = "/u/vis/ttmp/jiayq/birds/"
CROP = 1.2
MIRRORED = True
TARGET_SIZE = [128,128]
CONV = pipeline.ConvLayer(
    [pipeline.PatchExtractor([5,5], 1),
     pipeline.MeanvarNormalizer({'reg': 10}),
     pipeline.LinearEncoder({},
                trainer = pipeline.ZcaTrainer({'reg': 0.1})),
     pipeline.ThresholdEncoder({},
                trainer = pipeline.OMPTrainer({'k':1024, 'max_iter':100})),
     pipeline.PyramidPooler({'level': 3, 'method': 'max'})
    ], fixed_size = True)
#FLAT_REG = [0.0001] # tuned parameter
#HIER_REG = [0.01] # tuned parameter
#TREE_REG = [0.0001]

########
# Main script
########
gflags.DEFINE_bool("extract", False, 
                   "If set, train the feature extraction pipeline.")
gflags.DEFINE_bool("svm", False,
                   "If set, perform svm classification.")
示例#5
0
CONVOLUTION_FILE = '/u/vis/ttmp/jiayq/birds/conv/convolution.pickle'

CROP = 1.5
MIRRORED = True
SUBSET = None
TARGET_SIZE = [128, 128]

CONV = pipeline.ConvLayer([
    pipeline.PatchExtractor([5, 5], 1),
    pipeline.MeanvarNormalizer({'reg': 10}),
    pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
    pipeline.ReLUEncoder({'twoside': False},
                         trainer=pipeline.NormalizedKmeansTrainer({
                             'k':
                             200,
                             'max_iter':
                             100
                         })),
    #pipeline.PyramidPooler({'level': 2, 'method': 'ave'})
    pipeline.FixedSizePooler({
        'size': '5',
        'method': 'max'
    })
])
CONV2 = pipeline.ConvLayer([
    pipeline.PatchExtractor([4, 4], 1),
    pipeline.MeanvarNormalizer({'reg': 0.01}),
    pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.01})),
    pipeline.ReLUEncoder({'twoside': False},
                         trainer=pipeline.NormalizedKmeansTrainer(
                             {
示例#6
0
        is_training=False)

try:
    conv = pickle.load(open('cifar_conv.pickle'))
    logging.info('Skipping first layer training')
except Exception, e:
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer(
            {'reg': 0.1})),  # Does whitening
        pipeline.ThresholdEncoder({
            'alpha': 0.0,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      1600,
                                      'max_iter':
                                      100
                                  })),  # does encoding
        pipeline.SpatialPooler({
            'grid': (2, 2),
            'method': 'ave'
        })
    ])

    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)
    mpi.root_pickle(conv, 'cifar_conv.pickle')

# do pruning
def bird_demo():
    logging.info('Loading data...')
    bird = visiondata.CUBDataset(FLAGS.root,
                                 is_training=True,
                                 crop=FLAGS.crop,
                                 version=FLAGS.version,
                                 prefetch=True,
                                 target_size=TARGET_SIZE)
    bird_test = visiondata.CUBDataset(FLAGS.root,
                                      is_training=False,
                                      crop=FLAGS.crop,
                                      version=FLAGS.version,
                                      prefetch=True,
                                      target_size=TARGET_SIZE)
    if FLAGS.mirrored:
        bird = datasets.MirrorSet(bird)
    conv = pipeline.ConvLayer(
        [
            pipeline.PatchExtractor([FLAGS.patch, FLAGS.patch],
                                    1),  # extracts patches
            pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
            pipeline.LinearEncoder({},
                                   trainer=pipeline.ZcaTrainer({'reg': 0.1})),
            pipeline.ThresholdEncoder({
                'alpha': 0.25,
                'twoside': True
            },
                                      trainer=pipeline.OMPTrainer(
                                          {
                                              'k': FLAGS.k,
                                              'max_iter': 100
                                          })),
            pipeline.SpatialPooler({
                'grid': 4,
                'method': 'max'
            })
        ],
        fixed_size=True)
    logging.info('Training the pipeline...')
    conv.train(bird, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(bird, as_2d=True)
    Ytrain = bird.labels().astype(np.int)
    Xtest = conv.process_dataset(bird_test, as_2d=True)
    Ytest = bird_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain, reg=0.01)
    # to match Adam Coates' pipeline
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain,
                                     Ytrain,
                                     0.005,
                                     fminargs={'maxfun': 1000})
    accu_train = classifier.Evaluator.accuracy(Ytrain, np.dot(Xtrain, w) + b)
    accu_test = classifier.Evaluator.accuracy(Ytest, np.dot(Xtest, w) + b)
    logging.info('Training accuracy: %f' % accu_train)
    logging.info('Testing accuracy: %f' % accu_test)
    mpi.root_pickle((m, std, w, b, conv[-2].dictionary),
                    'debug_features.pickle')
示例#8
0
                                  False,
                                  crop=CROP,
                                  subset=SUBSET,
                                  target_size=TARGET_SIZE,
                                  prefetch=True)

conv = pipeline.ConvLayer([
    pipeline.PatchExtractor([6, 6], 1),  # extracts patches
    pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
    pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
    pipeline.ThresholdEncoder({
        'alpha': 0.25,
        'twoside': False
    },
                              trainer=pipeline.NormalizedKmeansTrainer({
                                  'k':
                                  FLAGS.fromdim,
                                  'max_iter':
                                  100
                              })),
    pipeline.SpatialPooler({
        'grid': (FLAGS.grid, FLAGS.grid),
        'method': FLAGS.method
    })  # average pool
])
logging.info("Total images: %d " % train_data.size_total())
print "local images: ", train_data.size()

logging.info('Training the pipeline...')
conv.train(train_data, 400000, exhaustive=True)
示例#9
0
def cifar_demo():
    """Performs a demo classification on cifar
    """
    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer(
            {'reg': 0.1})),  # Does whitening
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': True
        },
                                  trainer=pipeline.OMPTrainer({
                                      'k': 800,
                                      'max_iter': 100
                                  })),  # does encoding
        pipeline.SpatialPooler({
            'grid': (2, 2),
            'method': 'ave'
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 50000)
    logging.info('Dumping the pipeline...')
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.model_file),
                  'w') as fid:
            pickle.dump(conv, fid)
            fid.close()
    with open(os.path.join(FLAGS.output_dir, FLAGS.model_file), 'r') as fid:
        conv = pickle.load(fid)
    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=True)
    mpi.dump_matrix_multi(
        Xtrain, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_train'))
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=True)
    mpi.dump_matrix_multi(
        Xtest, os.path.join(FLAGS.output_dir, FLAGS.feature_file + '_test'))
    Ytest = cifar_test.labels().astype(np.int)

    # normalization
    m, std = classifier.feature_meanstd(Xtrain)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    w, b = classifier.l2svm_onevsall(Xtrain, Ytrain, 0.01)
    if mpi.is_root():
        with open(os.path.join(FLAGS.output_dir, FLAGS.svm_file), 'w') as fid:
            pickle.dump({'m': m, 'std': std, 'w': w, 'b': b}, fid)
    accu = np.sum(Ytrain == (np.dot(Xtrain,w)+b).argmax(axis=1)) \
            / float(len(Ytrain))
    accu_test = np.sum(Ytest == (np.dot(Xtest,w)+b).argmax(axis=1)) \
            / float(len(Ytest))

    logging.info('Training accuracy: %f' % accu)
    logging.info('Testing accuracy: %f' % accu_test)
示例#10
0
def stl_demo():
    """Performs a demo classification on stl
    """
    logging.info('Loading stl data...')
    stl = visiondata.STL10Dataset(FLAGS.root, 'unlabeled', target_size=32)
    stl_train = visiondata.STL10Dataset(FLAGS.root, 'train', target_size=32)
    stl_test = visiondata.STL10Dataset(FLAGS.root, 'test', target_size=32)

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      FLAGS.fromdim,
                                      'max_iter':
                                      100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(stl, 400000, exhaustive=True)

    logging.info('Extracting features...')
    X = conv.process_dataset(stl, as_2d=False)
    Xtrain = conv.process_dataset(stl_train, as_2d=False)
    Ytrain = stl_train.labels().astype(np.int)
    Xtest = conv.process_dataset(stl_test, as_2d=False)
    Ytest = stl_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    X.resize(np.prod(X.shape[:-1]), X.shape[-1])
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(X, 0.01)
    X -= m
    X /= std
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(X)

    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(X, current_dim, tol=current_dim * 0.01)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                sel = mpi.COMM.bcast(sel)
                Cpred = covmat[sel]
                Csel = Cpred[:, sel]
                W = np.linalg.solve(Csel, Cpred)
                # perform svd
                U, D, _ = np.linalg.svd(W, full_matrices=0)
                U *= D
                Xtrain_red = np.dot(Xtrain[:, sel], U)
                Xtest_red = np.dot(Xtest[:, sel], U)
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])

        w, b = classifier.l2svm_onevsall(Xtrain_red,
                                         Ytrain,
                                         0.005,
                                         fminargs={
                                             'disp': 0,
                                             'maxfun': 1000
                                         })
        accu_train = classifier.Evaluator.accuracy(Ytrain,
                                                   np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest,
                                                  np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2
示例#11
0
                                 target_size=[256, 256])
    regions = pipeline.PatchExtractor([25, 25], 1).sample(bird, 100000)
    regions.resize((regions.shape[0], ) + (25, 25, 3))
    regions_data = datasets.NdarraySet(regions)
    try:
        conv = pickle.load(open('conv.pickle'))
    except IOError:
        logging.info("Training the feature extraction pipeline...")
        conv = pipeline.ConvLayer([
                pipeline.PatchExtractor([5, 5], 1), # extracts patches
                pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
                pipeline.LinearEncoder({},
                        trainer = pipeline.ZcaTrainer({'reg': 0.1})),
                #pipeline.SpatialMeanNormalizer({'channels': 3}),
                pipeline.ThresholdEncoder({'alpha': 0.25, 'twoside': False},
                        trainer = pipeline.OMPTrainer(
                                {'k': 3200, 'max_iter':100})),
                pipeline.KernelPooler(\
                        {'kernel': pipeline.KernelPooler.kernel_uniform(15),
                         'method': 'max',
                         'stride': 1})
                ],
                fixed_size = True)
        conv.train(regions_data, 400000)
        mpi.root_pickle(conv, "conv.pickle")
    # so let's get the regions' features after pooling.
    regions_pooled = conv.process_dataset(regions_data)
    mpi.dump_matrix_multi(regions_pooled,
                          '/tscratch/tmp/jiayq/pooled_lda/regions_pooled')

logging.info("Feature shape:" + str(regions_pooled.shape[1:]))
示例#12
0
# Train the first layer
################################################################################
if os.path.exists(model_file_first):
    logging.info("skipping the first layer training...")
    conv = pickle.load(open(model_file_first, 'r'))
else:
    logging.info("Setting up the convolutional layer...")
    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([5, 5], 1),
        pipeline.MeanvarNormalizer({'reg': 10}),
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.01})),
        pipeline.ThresholdEncoder({
            'alpha': 0.0,
            'twoside': False
        },
                                  trainer=pipeline.OMPTrainer({'k': 1600})),
        pipeline.KernelPooler({
            'kernel':
            pipeline.KernelPooler.kernel_gaussian(10, 5),
            'stride':
            5,
            'method':
            'ave'
        })
    ])
    conv.train(stl, 400000)
    if mpi.is_root():
        fid = open(model_file_first, 'w')
        pickle.dump(conv, fid)
        fid.close()
    mpi.barrier()
def cifar_demo():
    """Performs a demo classification on cifar
    """

    mpi.mkdir(FLAGS.output_dir)
    logging.info('Loading cifar data...')
    cifar = visiondata.CifarDataset(FLAGS.root, is_training=True)
    cifar_test = visiondata.CifarDataset(FLAGS.root, is_training=False)

    conv = pipeline.ConvLayer([
        pipeline.PatchExtractor([6, 6], 1),  # extracts patches
        pipeline.MeanvarNormalizer({'reg': 10}),  # normalizes the patches
        pipeline.LinearEncoder({}, trainer=pipeline.ZcaTrainer({'reg': 0.1})),
        pipeline.ThresholdEncoder({
            'alpha': 0.25,
            'twoside': False
        },
                                  trainer=pipeline.NormalizedKmeansTrainer({
                                      'k':
                                      FLAGS.fromdim,
                                      'max_iter':
                                      100
                                  })),
        pipeline.SpatialPooler({
            'grid': (FLAGS.grid, FLAGS.grid),
            'method': FLAGS.method
        })  # average pool
    ])
    logging.info('Training the pipeline...')
    conv.train(cifar, 400000, exhaustive=True)

    logging.info('Extracting features...')
    Xtrain = conv.process_dataset(cifar, as_2d=False)
    Ytrain = cifar.labels().astype(np.int)
    Xtest = conv.process_dataset(cifar_test, as_2d=False)
    Ytest = cifar_test.labels().astype(np.int)

    # before we do feature computation, try to do dimensionality reduction
    Xtrain.resize(np.prod(Xtrain.shape[:-1]), Xtrain.shape[-1])
    Xtest.resize(np.prod(Xtest.shape[:-1]), Xtest.shape[-1])

    m, std = classifier.feature_meanstd(Xtrain, 0.01)
    Xtrain -= m
    Xtrain /= std
    Xtest -= m
    Xtest /= std

    covmat = mathutil.mpi_cov(Xtrain)

    current_dim = FLAGS.fromdim
    if FLAGS.svd == 1:
        eigval, eigvec = np.linalg.eigh(covmat)
    while current_dim >= 100:
        if current_dim < FLAGS.fromdim:
            if FLAGS.svd == 1:
                # directly do dimensionality reduction
                U = eigvec[:, -current_dim:]
                Xtrain_red = np.dot(Xtrain, U)
                Xtest_red = np.dot(Xtest, U)
            else:
                # do subsampling
                temp = code_ap.code_af(Xtrain, current_dim)
                logging.info("selected %d dims" % len(temp[0]))
                sel = temp[0]
                Xtrain_red = np.ascontiguousarray(Xtrain[:, sel])
                Xtest_red = np.ascontiguousarray(Xtest[:, sel])
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])
        else:
            Xtrain_red = Xtrain.copy()
            Xtest_red = Xtest.copy()
            Xtrain_red.resize(Ytrain.shape[0],
                              Xtrain_red.size / Ytrain.shape[0])
            Xtest_red.resize(Ytest.shape[0], Xtest_red.size / Ytest.shape[0])

        w, b = classifier.l2svm_onevsall(Xtrain_red,
                                         Ytrain,
                                         0.005,
                                         fminargs={
                                             'disp': 0,
                                             'maxfun': 1000
                                         })
        accu_train = classifier.Evaluator.accuracy(Ytrain,
                                                   np.dot(Xtrain_red, w) + b)
        accu_test = classifier.Evaluator.accuracy(Ytest,
                                                  np.dot(Xtest_red, w) + b)
        logging.info('%d - %d, Training accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_train))
        logging.info('%d - %d, Testing accuracy: %f' %
                     (FLAGS.fromdim, current_dim, accu_test))
        current_dim /= 2