def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) files = glob.glob(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.npy')) files.sort() data3 = mpi.load_matrix_multi(files) files = glob.glob(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.mat')) files.sort() data4 = mpi.load_matrix_multi(files, name='data') np.testing.assert_array_equal(data1, data2) np.testing.assert_array_equal(data1, data3) np.testing.assert_array_equal(data1, data4)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix( os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) np.testing.assert_array_equal(data1, data2)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix( os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) files = glob.glob( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.npy')) files.sort() data3 = mpi.load_matrix_multi(files) files = glob.glob( os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files*.mat')) files.sort() data4 = mpi.load_matrix_multi(files, name='data') np.testing.assert_array_equal(data1, data2) np.testing.assert_array_equal(data1, data3) np.testing.assert_array_equal(data1, data4)
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, 'data', 'dumploadmulti', 'single_file.npy')) data2 = mpi.load_matrix_multi(os.path.join(testdir, 'data', 'dumploadmulti', 'multiple_files')) np.testing.assert_array_equal(data1, data2)
''' Created on Jan 16, 2013 @author: jiayq ''' import cPickle as pickle import logging from matplotlib import pyplot from iceberk import mpi, visiondata, pipeline, datasets, mathutil, visualize import numpy as np mpi.root_log_level(logging.DEBUG) try: conv = pickle.load(open('conv.pickle')) regions_pooled = mpi.load_matrix_multi(\ '/tscratch/tmp/jiayq/pooled_lda/regions_pooled', N = 10) except IOError: # compute the features logging.info("Generating the data...") bird = visiondata.CUBDataset('/u/vis/x1/common/CUB_200_2011', is_training=True, crop = 1.2, prefetch=True, target_size = [256,256]) regions = pipeline.PatchExtractor([25,25], 1).sample(bird, 100000) regions.resize((regions.shape[0],) + (25,25,3)) regions_data = datasets.NdarraySet(regions) try: conv = pickle.load(open('conv.pickle')) except IOError: logging.info("Training the feature extraction pipeline...") conv = pipeline.ConvLayer([ pipeline.PatchExtractor([5, 5], 1), # extracts patches pipeline.MeanvarNormalizer({'reg': 10}), # normalizes the patches
gflags.DEFINE_integer("minibatch", 100000, "The minibatch size") gflags.DEFINE_bool("svm", False, "If set, run SVM") gflags.DEFINE_bool("hier", False, "If set, use hierarchical loss") FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## if mpi.SIZE > 1: raise RuntimeError, "This script runs on single machines only." np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1] logging.info("Performing classification") target = classifier.to_one_of_k_coding(Ytrain, fill=0) # stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs solver = classifier.SolverStochastic(FLAGS.reg, classifier.Loss2.loss_multiclass_logistic, classifier.Reg.reg_l2, args={
# local import import tax ######## # Settings ######## FEATDIR = "/u/vis/ttmp/jiayq/cifar100/" ######## # Main script ######## if mpi.SIZE > 1: raise RuntimeError, "This script runs on single machine only." Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR, 'Xtest')) Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR, 'Ytest')).astype(np.int) infogain = tax.cifar_info_gain() info_mean = infogain.mean(axis=1) info_max = infogain.max(axis=1) randguess = sum([info_mean[y] for y in Ytest]) / float(len(Ytest)) bestguess = sum([info_max[y] for y in Ytest]) / float(len(Ytest)) print 'Random guess baseline:', randguess print 'Best guess baseline:', bestguess for filename in sys.argv[1:]: data = pickle.load(open(filename))
def testLoadMulti(self): testdir = os.path.dirname(__file__) data1 = mpi.load_matrix(os.path.join(testdir, "data", "dumploadmulti", "single_file.npy")) data2 = mpi.load_matrix_multi(os.path.join(testdir, "data", "dumploadmulti", "multiple_files")) np.testing.assert_array_equal(data1, data2)
mpi.root_pickle(CONV, __file__ + ".conv.pickle") Xtrain = CONV.process_dataset(train_data, as_2d=True) Xtest = CONV.process_dataset(test_data, as_2d=True) Ytrain = train_data.labels() Ytest = test_data.labels() m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR, "Xtrain")) mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR, "Xtest")) mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR, "Ytrain")) mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR, "Ytest")) else: Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtrain")) Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtest")) Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytrain")) Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytest")) if FLAGS.flat: logging.info("Performing flat classification") solver = classifier.SolverMC( FLAGS.reg, classifier.Loss.loss_multiclass_logistic, classifier.Reg.reg_l2, fminargs={"maxfun": 1000} ) w, b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0)) pred = np.dot(Xtrain, w) + b accu_train = classifier.Evaluator.accuracy(Ytrain, pred) pred = np.dot(Xtest, w) + b accu_test = classifier.Evaluator.accuracy(Ytest, pred) logging.info("Reg %f, train accu %f, test accu %f" % (FLAGS.reg, accu_train, accu_test))
mpi.root_pickle(CONV, __file__ + ".conv.pickle") Xtrain = CONV.process_dataset(train_data, as_2d = True) Xtest = CONV.process_dataset(test_data, as_2d = True) Ytrain = train_data.labels() Ytest = test_data.labels() m, std = classifier.feature_meanstd(Xtrain) Xtrain -= m Xtrain /= std Xtest -= m Xtest /= std mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR,'Xtrain')) mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR,'Xtest')) mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR,'Ytrain')) mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR,'Ytest')) else: Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtrain')) Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtest')) Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytrain')) Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytest')) if FLAGS.flat: logging.info("Performing flat classification") solver = classifier.SolverMC(FLAGS.reg, classifier.Loss.loss_multiclass_logistic, classifier.Reg.reg_l2, fminargs = {'maxfun': 1000}) w,b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0)) pred = np.dot(Xtrain, w) + b accu_train = classifier.Evaluator.accuracy(Ytrain, pred) pred = np.dot(Xtest, w) + b accu_test = classifier.Evaluator.accuracy(Ytest, pred)
import cPickle as pickle from iceberk import mpi, classifier, mathutil import numpy as np import logging import os, sys VALDIR = "/tscratch/tmp/jiayq/ILSVRC-2010-LLC-SIFT-val/" DUMPNAME = "/u/vis/x1/jiayq/ILSVRC-2010-LLC-SIFT-model.npz" mpi.root_log_level(logging.DEBUG) Xval = mpi.load_matrix_multi(VALDIR + 'Xval') Yval = mpi.load_matrix(VALDIR + 'labels_ascii_sorted.npy') npzfile = np.load(DUMPNAME) pred = np.dot(Xval, npzfile['w']) + npzfile['b'] accu = classifier.Evaluator.accuracy(Yval, pred.argmax(1)) accu5 = classifier.Evaluator.top_k_accuracy(Yval, pred, 5) logging.debug('accu: %f, %f', accu, accu5) # perform training accuracy
# local import import tax ######## # Settings ######## FEATDIR = "/u/vis/ttmp/jiayq/cifar100/" ######## # Main script ######## if mpi.SIZE > 1: raise RuntimeError, "This script runs on single machine only." Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtest')) Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytest')).astype(np.int) infogain = tax.cifar_info_gain() info_mean = infogain.mean(axis=1) info_max = infogain.max(axis=1) randguess = sum([info_mean[y] for y in Ytest]) / float(len(Ytest)) bestguess = sum([info_max[y] for y in Ytest]) / float(len(Ytest)) print 'Random guess baseline:', randguess print 'Best guess baseline:', bestguess for filename in sys.argv[1:]: data = pickle.load(open(filename))
# Settings ######## FEATDIR = "/u/vis/x1/common/ILSVRC-2010/SBOW/" gflags.DEFINE_float("reg", 0.01, "The reg term") gflags.DEFINE_integer("minibatch", 10000, "The minibatch size") gflags.DEFINE_bool("svm", False, "If set, run SVM") FLAGS = gflags.FLAGS FLAGS(sys.argv) ######## # Main script ######## np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain')) Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy')) Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:])) Xval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Xval')) Yval = mpi.load_matrix(os.path.join(FEATDIR, 'val', 'Yval')) Xval.resize(Xval.shape[0], np.prod(Xval.shape[1:])) Xtest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Xtest')) Ytest = mpi.load_matrix(os.path.join(FEATDIR, 'test', 'Ytest')) Xtest.resize(Xtest.shape[0], np.prod(Xtest.shape[1:])) # normalize to unit length for i in range(Xtrain.shape[0]): Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1] for i in range(Xval.shape[0]):
DUMPNAME = "/u/vis/x1/jiayq/ILSVRC/subcategory/" + FLAGS.name \ + "/ILSVRC-2010-LLC-SIFT-subcategory-model.npz" ######## # Main script ######## np.random.seed(int(time.time()) + mpi.RANK * 100) mpi.root_log_level(level=logging.DEBUG) logging.info("Loading data...") if FLAGS.preload: files = glob.glob(FEATDIR + 'n*.mat') files.sort() if mpi.is_root(): print 'files', files Xtrain = mpi.load_matrix_multi(files, name='features') Ytrain = mpi.load_matrix(FEATDIR + 'labels_ascii_sorted.npy').astype(np.int) sampler = mathutil.NdarraySampler([Xtrain, Ytrain, None], copy=False) else: base_sampler = mathutil.PrefetchFileSampler(\ [FEATDIR + '*.mat', FEATDIR + 'labels_ascii_sorted.npy', None]) sampler = mathutil.PostProcessSampler(\ base_sampler, [lambda X: X, lambda Y: Y.astype(np.int), None]) loss = classifier.Loss2.loss_multiclass_logistic_yvector
''' Created on Jan 16, 2013 @author: jiayq ''' import cPickle as pickle import logging from matplotlib import pyplot from iceberk import mpi, visiondata, pipeline, datasets, mathutil, visualize import numpy as np mpi.root_log_level(logging.DEBUG) try: conv = pickle.load(open('conv.pickle')) regions_pooled = mpi.load_matrix_multi(\ '/tscratch/tmp/jiayq/pooled_lda/regions_pooled', N = 10) except IOError: # compute the features logging.info("Generating the data...") bird = visiondata.CUBDataset('/u/vis/x1/common/CUB_200_2011', is_training=True, crop=1.2, prefetch=True, target_size=[256, 256]) regions = pipeline.PatchExtractor([25, 25], 1).sample(bird, 100000) regions.resize((regions.shape[0], ) + (25, 25, 3)) regions_data = datasets.NdarraySet(regions) try: conv = pickle.load(open('conv.pickle')) except IOError: logging.info("Training the feature extraction pipeline...")
FEATDIR = '/tscratch/tmp/sergeyk/imagenet-sbow/' RESULTSDIR = '/u/vis/x1/sergeyk/imagenet-sbow/' LABELS = '/u/vis/x1/jiayq/ILSVRC/{}_predict/labels_ascii_sorted.npy' ######## # Main script ######## # load the classifier weights wb = pickle.load(open('/u/vis/jiayq/codes/python/imagenet_exp/script_train_imagenet_sbow_stochastic.py0.0001.pickle')) np.random.seed(42 + mpi.RANK) mpi.root_log_level(level=logging.DEBUG) for s in ['val', 'test', 'train']: logging.info("Loading data...") if s == 'train': print('Train accuracy is claimed to be: {:.3f}'.format(wb[-2])) # train is multiple matrices X = mpi.load_matrix_multi(os.path.join(FEATDIR, s, 'X{}'.format(s))) else: X = mpi.load_matrix(os.path.join(FEATDIR, s, 'X{}'.format(s))) X.resize(X.shape[0], np.prod(X.shape[1:])) # normalize to unit length X /= np.sqrt(inner1d(X, X)[:, np.newaxis] + 1e-8) / X.shape[1] Y = mpi.load_matrix(LABELS.format(s)) print(X.shape) print(Y.shape) logging.info("Evaluating...") prob = np.dot(X, wb[0]) + wb[1] print('Accuracy on {}: {:.3f}'.format(s, classifier.Evaluator.accuracy(Y, prob.argmax(1)))) np.save(os.path.join(RESULTSDIR, '{}_prob.npy'.format(s)), prob)