Exemplo n.º 1
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(
         os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy'))
     data2 = mpi.load_matrix_multi(
         os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files'))
     np.testing.assert_array_equal(data1, data2)
Exemplo n.º 2
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'single_file.npy'))
     data2 = mpi.load_matrix_multi(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files'))
     files = glob.glob(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files*.npy'))
     files.sort()
     data3 = mpi.load_matrix_multi(files)
     files = glob.glob(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files*.mat'))
     files.sort()
     data4 = mpi.load_matrix_multi(files, name='data')
     np.testing.assert_array_equal(data1, data2)
     np.testing.assert_array_equal(data1, data3)
     np.testing.assert_array_equal(data1, data4)
Exemplo n.º 3
0
 def testDumpLoad(self):
     local_size = 2
     mat_sources = [np.random.rand(local_size), np.random.rand(local_size, 2), np.random.rand(local_size, 2, 3)]
     for mat in mat_sources:
         mpi.dump_matrix(mat, _MPI_DUMP_TEST_FILE)
         if mpi.is_root():
             mat_dumped = np.load(_MPI_DUMP_TEST_FILE)
             self.assertEqual(mat_dumped.shape, (local_size * mpi.SIZE,) + mat.shape[1:])
         mat_read = mpi.load_matrix(_MPI_DUMP_TEST_FILE)
         self.assertEqual(mat.shape, mat_read.shape)
Exemplo n.º 4
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'single_file.npy'))
     data2 = mpi.load_matrix_multi(os.path.join(testdir,
                                          'data',
                                          'dumploadmulti',
                                          'multiple_files'))
     np.testing.assert_array_equal(data1, data2)
Exemplo n.º 5
0
 def testDumpLoad(self):
     local_size = 2
     mat_sources = [
         np.random.rand(local_size),
         np.random.rand(local_size, 2),
         np.random.rand(local_size, 2, 3)
     ]
     for mat in mat_sources:
         mpi.dump_matrix(mat, _MPI_DUMP_TEST_FILE)
         if mpi.is_root():
             mat_dumped = np.load(_MPI_DUMP_TEST_FILE)
             self.assertEqual(mat_dumped.shape,
                              (local_size * mpi.SIZE, ) + mat.shape[1:])
         mat_read = mpi.load_matrix(_MPI_DUMP_TEST_FILE)
         self.assertEqual(mat.shape, mat_read.shape)
Exemplo n.º 6
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(
         os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy'))
     data2 = mpi.load_matrix_multi(
         os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files'))
     files = glob.glob(
         os.path.join(testdir, 'data', 'dumploadmulti',
                      'multiple_files*.npy'))
     files.sort()
     data3 = mpi.load_matrix_multi(files)
     files = glob.glob(
         os.path.join(testdir, 'data', 'dumploadmulti',
                      'multiple_files*.mat'))
     files.sort()
     data4 = mpi.load_matrix_multi(files, name='data')
     np.testing.assert_array_equal(data1, data2)
     np.testing.assert_array_equal(data1, data3)
     np.testing.assert_array_equal(data1, data4)
Exemplo n.º 7
0
for i, id in enumerate(ap_result[0]):
    # we will visualize each centroid and its 10 most related guys
    print i
    centroid = dictionary[id]
    neighbors = np.flatnonzero(ap_result[1] == i)
    idx = np.argsort(-ap_result[2][id, neighbors])
    # select the 10 closest
    neighbors = neighbors[idx][:11]
    if len(neighbors) > 0:
        im = vis.show_single(centroid)
        misc.imsave('centroids/%d.png' % (i), im)
        im = vis.show_multiple(dictionary[neighbors], 1, np.max)
        misc.imsave('centroids/%d-neighbors.png' % (i), im)

within_cluster_samples = mpi.load_matrix(\
        'cvpr_exemplar_centroids_distribution_within_cluster_postpooling.npy')
between_centroids_samples = mpi.load_matrix(\
        'cvpr_exemplar_centroids_distribution_between_cluster_postpooling.npy')
within_cluster_samples_prepooling = mpi.load_matrix(\
        'cvpr_exemplar_centroids_distribution_within_cluster_prepooling.npy')
corr_within_cluster = np.corrcoef(within_cluster_samples.T)
corr_between_centroids = np.corrcoef(between_centroids_samples.T)
corr_within_cluster_prepooling = np.corrcoef(
    within_cluster_samples_prepooling.T)
# subsample for visualization
within_cluster_samples = within_cluster_samples[\
        np.random.randint(within_cluster_samples.shape[0], size=2000)]
between_centroids_samples = between_centroids_samples[\
        np.random.randint(between_centroids_samples.shape[0], size=2000)]
within_cluster_samples_prepooling = within_cluster_samples_prepooling[\
        np.random.randint(within_cluster_samples_prepooling.shape[0], size=2000)]
Exemplo n.º 8
0
########
FEATDIR = "/tscratch/tmp/jiayq/imagenet-sbow/"
MODEL_NAME = 'script_train_imagenet_sbow_stochastic.py.flat.0.001.pickle'
NTRIAL = 10000
SETSIZE = 10
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval'))
Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval'))
Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:]))

Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest'))
Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest'))
Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1]
for i in range(Xval.shape[0]):
    Xval[i] /= np.sqrt(np.dot(Xval[i], Xval[i]) + 1e-8) / Xval.shape[1]
for i, id in enumerate(ap_result[0]):
    # we will visualize each centroid and its 10 most related guys
    print i
    centroid = dictionary[id]
    neighbors = np.flatnonzero(ap_result[1] == i)
    idx = np.argsort(-ap_result[2][id, neighbors])
    # select the 10 closest
    neighbors = neighbors[idx][:11]
    if len(neighbors) > 0:
        im = vis.show_single(centroid)
        misc.imsave("centroids/%d.png" % (i), im)
        im = vis.show_multiple(dictionary[neighbors], 1, np.max)
        misc.imsave("centroids/%d-neighbors.png" % (i), im)

within_cluster_samples = mpi.load_matrix("cvpr_exemplar_centroids_distribution_within_cluster_postpooling.npy")
between_centroids_samples = mpi.load_matrix("cvpr_exemplar_centroids_distribution_between_cluster_postpooling.npy")
within_cluster_samples_prepooling = mpi.load_matrix(
    "cvpr_exemplar_centroids_distribution_within_cluster_prepooling.npy"
)
corr_within_cluster = np.corrcoef(within_cluster_samples.T)
corr_between_centroids = np.corrcoef(between_centroids_samples.T)
corr_within_cluster_prepooling = np.corrcoef(within_cluster_samples_prepooling.T)
# subsample for visualization
within_cluster_samples = within_cluster_samples[np.random.randint(within_cluster_samples.shape[0], size=2000)]
between_centroids_samples = between_centroids_samples[np.random.randint(between_centroids_samples.shape[0], size=2000)]
within_cluster_samples_prepooling = within_cluster_samples_prepooling[
    np.random.randint(within_cluster_samples_prepooling.shape[0], size=2000)
]

Exemplo n.º 10
0
########
FEATDIR = "/u/vis/x1/common/ILSVRC-2010/SBOW/"
gflags.DEFINE_float("reg", 0.01, "The reg term")
gflags.DEFINE_integer("minibatch", 10000, "The minibatch size")
gflags.DEFINE_bool("svm", False, "If set, run SVM")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval'))
Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval'))
Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:]))

Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest'))
Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest'))
Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1]
for i in range(Xval.shape[0]):
    Xval[i] /= np.sqrt(np.dot(Xval[i],Xval[i]) + 1e-8) / Xval.shape[1]
Exemplo n.º 11
0
        + "/ILSVRC-2010-LLC-SIFT-subcategory-model.npz"

########
# Main script
########
np.random.seed(int(time.time()) + mpi.RANK * 100)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")

if FLAGS.preload:
    files = glob.glob(FEATDIR + 'n*.mat')
    files.sort()
    if mpi.is_root():
        print 'files', files
    Xtrain = mpi.load_matrix_multi(files, name='features')
    Ytrain = mpi.load_matrix(FEATDIR + 'labels_ascii_sorted.npy').astype(np.int)
    sampler = mathutil.NdarraySampler([Xtrain, Ytrain, None], copy=False)
else:
    base_sampler = mathutil.PrefetchFileSampler(\
            [FEATDIR + '*.mat',
             FEATDIR + 'labels_ascii_sorted.npy',
             None])
    sampler = mathutil.PostProcessSampler(\
            base_sampler,
            [lambda X: X,
            lambda Y: Y.astype(np.int),
            None])

loss = classifier.Loss2.loss_multiclass_logistic_yvector

callback = None
Exemplo n.º 12
0
         None])
    loss = classifier.Loss2.loss_multiclass_logistic
    DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model-hier.npz"
else:
    sampler = mathutil.PostProcessSampler(
        base_sampler,
        [lambda X: X.astype('float64') / np.sqrt(inner1d(X, X)[:, np.newaxis]),
         lambda Y: Y.astype(np.int),
         None])
    loss = classifier.Loss2.loss_multiclass_logistic_yvector


files = glob.glob(VALDIR + '/*.npy')
Xval = np.vstack([np.load(f) for f in sorted(files)]).astype('float64')
Xval /= np.sqrt(inner1d(Xval, Xval)[:, np.newaxis])
Yval = mpi.load_matrix(VAL_LABEL)
callback = [lambda wb: classifier.Evaluator.accuracy(
    Yval, (np.dot(Xval, wb[0]) + wb[1]).argmax(1))]

logging.info("Performing classification")
if os.path.exists(DUMPNAME):
    resume = DUMPNAME
else:
    resume = None

# Do search for reg, base_lr
param_grid = {
    'base_lr': [.1, .01, .001, .0001],
    'reg': [1e-3, 1e-5, 1e-8, 1e-10]
}
param_settings = [dict(zip(('base_lr', 'reg'), x)) for x in list(itertools.product(param_grid['base_lr'], param_grid['reg']))]
Exemplo n.º 13
0
FEATDIR = '/tscratch/tmp/sergeyk/imagenet-sbow/'
RESULTSDIR = '/u/vis/x1/sergeyk/imagenet-sbow/'
LABELS = '/u/vis/x1/jiayq/ILSVRC/{}_predict/labels_ascii_sorted.npy'

########
# Main script
########
# load the classifier weights
wb = pickle.load(open('/u/vis/jiayq/codes/python/imagenet_exp/script_train_imagenet_sbow_stochastic.py0.0001.pickle'))

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
for s in ['val', 'test', 'train']:
    logging.info("Loading data...")
    if s == 'train':
        print('Train accuracy is claimed to be: {:.3f}'.format(wb[-2]))
        # train is multiple matrices
        X = mpi.load_matrix_multi(os.path.join(FEATDIR, s, 'X{}'.format(s)))
    else:
        X = mpi.load_matrix(os.path.join(FEATDIR, s, 'X{}'.format(s)))
    X.resize(X.shape[0], np.prod(X.shape[1:]))
    # normalize to unit length
    X /= np.sqrt(inner1d(X, X)[:, np.newaxis] + 1e-8) / X.shape[1]
    Y = mpi.load_matrix(LABELS.format(s))
    print(X.shape)
    print(Y.shape)
    logging.info("Evaluating...")
    prob = np.dot(X, wb[0]) + wb[1]
    print('Accuracy on {}: {:.3f}'.format(s, classifier.Evaluator.accuracy(Y, prob.argmax(1))))
    np.save(os.path.join(RESULTSDIR, '{}_prob.npy'.format(s)), prob)
import cPickle as pickle
from iceberk import mpi, classifier, mathutil
import numpy as np
import logging
import os, sys

VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/"
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz"

mpi.root_log_level(logging.DEBUG)

Xval = mpi.load_matrix_multi(VALDIR + 'Xval')
Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy')
npzfile = np.load(DUMPNAME)
pred = np.dot(Xval, npzfile['w']) + npzfile['b']
accu = classifier.Evaluator.accuracy(Yval, 
        pred.argmax(1))
accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5)
logging.debug('accu: %f, %f', accu, accu5)

# perform training accuracy 
Exemplo n.º 15
0
import cPickle as pickle
from iceberk import mpi, classifier, mathutil
import numpy as np
import logging
import os, sys

VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/"
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz"

mpi.root_log_level(logging.DEBUG)

Xval = mpi.load_matrix_multi(VALDIR + 'Xval')
Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy')
npzfile = np.load(DUMPNAME)
pred = np.dot(Xval, npzfile['w']) + npzfile['b']
accu = classifier.Evaluator.accuracy(Yval, pred.argmax(1))
accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5)
logging.debug('accu: %f, %f', accu, accu5)

# perform training accuracy
Exemplo n.º 16
0
 def testLoadMulti(self):
     testdir = os.path.dirname(__file__)
     data1 = mpi.load_matrix(os.path.join(testdir, "data", "dumploadmulti", "single_file.npy"))
     data2 = mpi.load_matrix_multi(os.path.join(testdir, "data", "dumploadmulti", "multiple_files"))
     np.testing.assert_array_equal(data1, data2)
gflags.DEFINE_bool("svm", False, "If set, run SVM")
gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machines only."

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill=0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
                                     classifier.Loss2.loss_multiclass_logistic,
                                     classifier.Reg.reg_l2,
                                     args={
                                         'mode': 'lbfgs',
gflags.DEFINE_bool("svm", False, "If set, run SVM")
gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss")
FLAGS = gflags.FLAGS
FLAGS(sys.argv)

########
# Main script
########
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machines only."

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill = 0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
        classifier.Loss2.loss_multiclass_logistic,
        classifier.Reg.reg_l2,
        args = {'mode': 'lbfgs', 'minibatch': FLAGS.minibatch, 'num_iter': 20},
        fminargs = {'maxfun': 20, 'disp': 0})