示例#1
0
def loss_multiclass_nn(X_feats, Y, nn, arch):
    prob = get_predictions_nn(X_feats, nn, arch)[0]
    Y2 = classifier.to_one_of_k_coding(Y, 0)
    local_likelihood = -np.dot(np.log(prob).flat, Y2.flat)
    likelihood = mpi.COMM.allreduce(local_likelihood)
    num_data = mpi.COMM.allreduce(len(Y))
    return float(likelihood) / num_data
示例#2
0
def l2logreg_onevsall(X, Y, gamma, weight = None, **kwargs):
    if Y.ndim == 1:
        Y = classifier.to_one_of_k_coding(Y,0)
    #solver = classifier.SolverStochastic(gamma,
    #    classifier.Loss.loss_multiclass_logreg,
    #    classifier.Reg.reg_l2,
    #    args = {'mode': 'adagrad', 'base_lr': 1e-3, 'minibatch': 100,
    #            'num_iter': 1000},
    #    **kwargs)
    solver = classifier.SolverMC(gamma, classifier.Loss.loss_multiclass_logistic, classifier.Reg.reg_l2, **kwargs)
    #sampler = mathutil.NdarraySampler((X, Y, None))
    return solver.solve(X, Y, weight)
示例#3
0
 def testLossLogistic(self):
     y = np.random.randint(5, size=100)
     while (y.max() < 4):
         y = np.random.randint(5, size=100)
     Y = classifier.to_one_of_k_coding(y)
     pred = np.random.rand(100,5)
     weight = np.random.rand(100)
     TestLoss2.basicTest(Y, pred, weight,
             classifier.Loss.loss_multiclass_logistic,
             classifier.Loss2.loss_multiclass_logistic)
     TestLoss2.basicTest(Y, pred * 10, weight,
             classifier.Loss.loss_multiclass_logistic,
             classifier.Loss2.loss_multiclass_logistic)
示例#4
0
def loss_multiclass_nn_old(X_feats, Y, nn):
    DS = ClassificationDataSet( X_feats.shape[1], 1, nb_classes=2 )
    #for i in range(X_feats.shape[0]):
    #    DS.addSample( X_feats[i,:], [0.0] )
    DS.setField('input', X_feats)
    DS.setField('target', np.zeros((X_feats.shape[0],1)))
    DS._convertToOneOfMany()
    prob = nn.activateOnDataset(DS)
    Y2 = classifier.to_one_of_k_coding(Y, 0)
    local_likelihood = -np.dot(np.log(prob).flat, Y2.flat)
    likelihood = mpi.COMM.allreduce(local_likelihood)
    num_data = mpi.COMM.allreduce(len(Y))
    return float(likelihood) / num_data
示例#5
0
 def testLossHinge(self):
     y = np.random.randint(5, size=100)
     while (y.max() < 4):
         y = np.random.randint(5, size=100)
     Y = classifier.to_one_of_k_coding(y)
     pred = np.random.rand(100,5)
     weight = np.random.rand(100)
     TestLoss2.basicTest(Y, pred, weight,
             classifier.Loss.loss_hinge, classifier.Loss2.loss_hinge)
     TestLoss2.basicTest(Y, pred, weight,
             classifier.Loss.loss_squared_hinge,
             classifier.Loss2.loss_squared_hinge)
     TestLoss2.basicTest(Y, pred * 10, weight,
             classifier.Loss.loss_hinge, classifier.Loss2.loss_hinge)
     TestLoss2.basicTest(Y, pred * 10, weight,
             classifier.Loss.loss_squared_hinge,
             classifier.Loss2.loss_squared_hinge)
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machines only."

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, 'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR, 'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill=0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
                                     classifier.Loss2.loss_multiclass_logistic,
                                     classifier.Reg.reg_l2,
                                     args={
                                         'mode': 'lbfgs',
                                         'minibatch': FLAGS.minibatch,
                                         'num_iter': 20
                                     },
                                     fminargs={
                                         'maxfun': 20,
                                         'disp': 0
                                     })
sampler = mathutil.NdarraySampler((Xtrain, target, None))
示例#7
0
def loss_multiclass_logreg(Y, X, weights):
    pred = mathutil.dot(X,weights[0])+weights[1]
    local_likelihood = classifier.Loss.loss_multiclass_logistic(classifier.to_one_of_k_coding(Y, 0), pred, None)[0]
    likelihood = mpi.COMM.allreduce(local_likelihood)
    num_data = mpi.COMM.allreduce(len(Y))
    return float(likelihood) / num_data
    mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR,'Xtest'))
    mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR,'Ytrain'))
    mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR,'Ytest'))
else:
    Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtrain'))
    Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Xtest'))
    Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytrain'))
    Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR,'Ytest'))

if FLAGS.flat:
    logging.info("Performing flat classification")
    solver = classifier.SolverMC(FLAGS.reg,
                                 classifier.Loss.loss_multiclass_logistic,
                                 classifier.Reg.reg_l2,
                                 fminargs = {'maxfun': 1000})
    w,b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0))
    pred = np.dot(Xtrain, w) + b
    accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
    pred = np.dot(Xtest, w) + b
    accu_test = classifier.Evaluator.accuracy(Ytest, pred)
    logging.info("Reg %f, train accu %f, test accu %f" % \
            (FLAGS.reg, accu_train, accu_test))
    mpi.root_pickle((w, b, FLAGS.reg, accu_train, accu_test),
                    __file__ + str(FLAGS.reg) + ".flat.pickle")

if FLAGS.svm:
    logging.info("Performing svm classification")
    solver = classifier.SolverMC(FLAGS.reg,
                                 classifier.Loss.loss_hinge,
                                 classifier.Reg.reg_l2,
                                 fminargs = {'maxfun': 1000})
for i in range(Xval.shape[0]):
    Xval[i] /= np.sqrt(np.dot(Xval[i],Xval[i]) + 1e-8) / Xval.shape[1]
for i in range(Xtest.shape[0]):
    Xtest[i] /= np.sqrt(np.dot(Xtest[i],Xtest[i]) + 1e-8) / Xtest.shape[1]

callback = \
        [lambda wb: classifier.Evaluator.accuracy(Yval, 
                (np.dot(Xval, wb[0]) + wb[1]).argmax(1)),
         lambda wb: classifier.Evaluator.accuracy(Ytest, 
                (np.dot(Xtest, wb[0]) + wb[1]).argmax(1))]

logging.info("Performing classification")

if FLAGS.svm:
    # do svm
    target = classifier.to_one_of_k_coding(Ytrain, fill = -1)
    loss = classifier.Loss2.loss_hinge
else:
    target = Ytrain.astype(np.int)
    loss = classifier.Loss2.loss_multiclass_logistic_yvector

solver = classifier.SolverStochastic(FLAGS.reg,
        loss,
        classifier.Reg.reg_l2,
        args = {'mode': 'adagrad', 'base_lr': 1e-7, 'minibatch': FLAGS.minibatch,
                'num_iter': 1000, 'callback': callback})
sampler = mathutil.NdarraySampler((Xtrain, target, None))
w,b = solver.solve(sampler, None, K = 1000)

pred = (np.dot(Xtrain, w) + b).argmax(1)
accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
    mpi.dump_matrix_multi(Xtrain, os.path.join(FEATDIR, "Xtrain"))
    mpi.dump_matrix_multi(Xtest, os.path.join(FEATDIR, "Xtest"))
    mpi.dump_matrix_multi(Ytrain, os.path.join(FEATDIR, "Ytrain"))
    mpi.dump_matrix_multi(Ytest, os.path.join(FEATDIR, "Ytest"))
else:
    Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtrain"))
    Xtest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Xtest"))
    Ytrain = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytrain"))
    Ytest = mpi.load_matrix_multi(os.path.join(FEATDIR, "Ytest"))

if FLAGS.flat:
    logging.info("Performing flat classification")
    solver = classifier.SolverMC(
        FLAGS.reg, classifier.Loss.loss_multiclass_logistic, classifier.Reg.reg_l2, fminargs={"maxfun": 1000}
    )
    w, b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=0))
    pred = np.dot(Xtrain, w) + b
    accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
    pred = np.dot(Xtest, w) + b
    accu_test = classifier.Evaluator.accuracy(Ytest, pred)
    logging.info("Reg %f, train accu %f, test accu %f" % (FLAGS.reg, accu_train, accu_test))
    mpi.root_pickle((w, b, FLAGS.reg, accu_train, accu_test), __file__ + str(FLAGS.reg) + ".flat.pickle")

if FLAGS.svm:
    logging.info("Performing svm classification")
    solver = classifier.SolverMC(
        FLAGS.reg, classifier.Loss.loss_hinge, classifier.Reg.reg_l2, fminargs={"maxfun": 1000}
    )
    w, b = solver.solve(Xtrain, classifier.to_one_of_k_coding(Ytrain, fill=-1))
    pred = np.dot(Xtrain, w) + b
    accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
示例#11
0
for i in range(Xval.shape[0]):
    Xval[i] /= np.sqrt(np.dot(Xval[i], Xval[i]) + 1e-8) / Xval.shape[1]
for i in range(Xtest.shape[0]):
    Xtest[i] /= np.sqrt(np.dot(Xtest[i], Xtest[i]) + 1e-8) / Xtest.shape[1]

callback = \
        [lambda wb: classifier.Evaluator.accuracy(Yval,
                (np.dot(Xval, wb[0]) + wb[1]).argmax(1)),
         lambda wb: classifier.Evaluator.accuracy(Ytest,
                (np.dot(Xtest, wb[0]) + wb[1]).argmax(1))]

logging.info("Performing classification")

if FLAGS.svm:
    # do svm
    target = classifier.to_one_of_k_coding(Ytrain, fill=-1)
    loss = classifier.Loss2.loss_hinge
else:
    target = Ytrain.astype(np.int)
    loss = classifier.Loss2.loss_multiclass_logistic_yvector

solver = classifier.SolverStochastic(FLAGS.reg,
                                     loss,
                                     classifier.Reg.reg_l2,
                                     args={
                                         'mode': 'adagrad',
                                         'base_lr': 1e-7,
                                         'minibatch': FLAGS.minibatch,
                                         'num_iter': 1000,
                                         'callback': callback
                                     })
示例#12
0
FLAGS(sys.argv)

#
# Main script
#
np.random.seed(int(time.time()) + mpi.RANK * 100)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")

base_sampler = mathutil.FileSampler([TRAINDIR + '*.npy', TRAIN_LABEL, None])

if FLAGS.svm:
    sampler = mathutil.PostProcessSampler(
        base_sampler,
        [lambda X: X.astype('float64'),
         lambda Y: classifier.to_one_of_k_coding(Y, fill=-1, K=1000),
         None])
    loss = classifier.Loss2.loss_hinge
if FLAGS.hier:
    logging.info('Perform hierarchical loss.')
    from birdmix import tax
    graph = tax.get_imagenet_taxonomy(1000)
    leaves = [n for n in graph.nodes() if len(graph.successors(n)) == 0]
    leaves.sort()
    leaf2id = dict((n, i) for i, n in enumerate(leaves))
    infogain = tax.pairwise_info_gain(graph)
    # convert to mat
    igmat = np.zeros((1000, 1000))
    for key in infogain:
        igmat[leaf2id[key[0]], leaf2id[key[1]]] = infogain[key]
    np.exp(igmat, igmat)
from iceberk import classifier
import numpy as np

X = np.random.rand(100,2)
X = np.vstack((X + [1, 1],
               X + [1, -1],
               X + [-1, 1],
               X + [-1, -1]))
Y = np.tile(np.arange(4),(100, 1)).T.flatten()
Y = classifier.to_one_of_k_coding(Y, fill = 0)

solver = classifier.SolverMC(0.01, 
                             classifier.Loss.loss_multiclass_logistic,
                             classifier.Reg.reg_l2)

w, b = solver.solve(X, Y)
示例#14
0
from iceberk import classifier
import numpy as np

X = np.random.rand(100, 2)
X = np.vstack((X + [1, 1], X + [1, -1], X + [-1, 1], X + [-1, -1]))
Y = np.tile(np.arange(4), (100, 1)).T.flatten()
Y = classifier.to_one_of_k_coding(Y, fill=0)

solver = classifier.SolverMC(0.01, classifier.Loss.loss_multiclass_logistic,
                             classifier.Reg.reg_l2)

w, b = solver.solve(X, Y)
if mpi.SIZE > 1:
    raise RuntimeError, "This script runs on single machines only."

np.random.seed(42 + mpi.RANK)
mpi.root_log_level(level=logging.DEBUG)
logging.info("Loading data...")
Xtrain = mpi.load_matrix_multi(os.path.join(FEATDIR,'train', 'Xtrain'))
Ytrain = mpi.load_matrix(os.path.join(FEATDIR,'train', 'Ytrain.npy'))
Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i],Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill = 0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
        classifier.Loss2.loss_multiclass_logistic,
        classifier.Reg.reg_l2,
        args = {'mode': 'lbfgs', 'minibatch': FLAGS.minibatch, 'num_iter': 20},
        fminargs = {'maxfun': 20, 'disp': 0})
sampler = mathutil.NdarraySampler((Xtrain, target, None))
w,b = solver.solve(sampler)
logging.info("Stochastic LBFGS done.")

skf = StratifiedKFold(Ytrain, k = 10)
skf_results = []
for train_index, test_index in skf:
    param_init = (w,b)