Xtrain.resize(Xtrain.shape[0], np.prod(Xtrain.shape[1:]))

# normalize to unit length
for i in range(Xtrain.shape[0]):
    Xtrain[i] /= np.sqrt(np.dot(Xtrain[i], Xtrain[i]) + 1e-8) / Xtrain.shape[1]

logging.info("Performing classification")
target = classifier.to_one_of_k_coding(Ytrain, fill=0)

# stochastic lbfgs - we play a little trick by using all the training data to do initial lbfgs
solver = classifier.SolverStochastic(FLAGS.reg,
                                     classifier.Loss2.loss_multiclass_logistic,
                                     classifier.Reg.reg_l2,
                                     args={
                                         'mode': 'lbfgs',
                                         'minibatch': FLAGS.minibatch,
                                         'num_iter': 20
                                     },
                                     fminargs={
                                         'maxfun': 20,
                                         'disp': 0
                                     })
sampler = mathutil.NdarraySampler((Xtrain, target, None))
w, b = solver.solve(sampler)
logging.info("Stochastic LBFGS done.")

skf = StratifiedKFold(Ytrain, k=10)
skf_results = []
for train_index, test_index in skf:
    param_init = (w, b)
    solver = classifier.SolverStochastic(
        FLAGS.reg,
Exemplo n.º 2
0
    sys.exit(0)

if FLAGS.speedtest > 0:
    logging.info("Testing speed")
    logging.info("minibatch size: %d" % FLAGS.minibatch)
    from iceberk.util import Timer
    timer = Timer()
    for i in range(FLAGS.speedtest):
        batch = sampler.sample(FLAGS.minibatch)
        logging.info("Local size: %d" % batch[0].shape[0])
        total_size = mpi.COMM.allreduce(batch[0].shape[0])
        logging.info("Total size: %d" % total_size)
        logging.info("Sampling took %s secs" % timer.lap())
    sys.exit(0)

logging.info("Performing classification")
if os.path.exists(DUMPNAME):
    resume = DUMPNAME
else:
    resume = None
# adagrad
solver = classifier.SolverStochastic(FLAGS.reg,
        loss,
        classifier.Reg.reg_l2,
        args = {'mode': 'adagrad', 'base_lr': FLAGS.base_lr, 'minibatch': FLAGS.minibatch,
                'num_iter': 1000, 'callback': callback, 'eta': 1e-8,
                'dump_every': 25, 'dump_name': DUMPNAME})
w,b = solver.solve(sampler, resume = resume)

mpi.root_savez(DUMPNAME[:-4] + ".final.npz", w = w, b = b)
Exemplo n.º 3
0
else:
    resume = None

# Do search for reg, base_lr
param_grid = {
    'base_lr': [.1, .01, .001, .0001],
    'reg': [1e-3, 1e-5, 1e-8, 1e-10]
}
param_settings = [dict(zip(('base_lr', 'reg'), x)) for x in list(itertools.product(param_grid['base_lr'], param_grid['reg']))]

accuracies = []
for setting in param_settings:
    print(setting)
    solver = classifier.SolverStochastic(
        setting['reg'], loss, classifier.Reg.reg_l2, args={
            'mode': 'adagrad', 'base_lr': setting['base_lr'],
            'minibatch': FLAGS.minibatch,
            'num_iter': 20, 'callback': callback, 'eta': 1e-8,
            'dump_every': 100, 'dump_name': DUMPNAME})
    w, b = solver.solve(sampler, resume=resume)
    accuracies.append(classifier.Evaluator.accuracy(
        Yval, (np.dot(Xval, w) + b).argmax(1)))
print(accuracies)
max_ind = np.argmax(accuracies)
best_setting = param_settings[max_ind]
with open(DUMPNAME[:-4] + '_params.json', 'w') as f:
    json.dump(best_setting, f)

solver = classifier.SolverStochastic(
    best_setting['reg'], loss, classifier.Reg.reg_l2, args={
        'mode': 'adagrad', 'base_lr': best_setting['base_lr'],
        'minibatch': FLAGS.minibatch,
Exemplo n.º 4
0
logging.info("Performing classification")

if FLAGS.svm:
    # do svm
    target = classifier.to_one_of_k_coding(Ytrain, fill=-1)
    loss = classifier.Loss2.loss_hinge
else:
    target = Ytrain.astype(np.int)
    loss = classifier.Loss2.loss_multiclass_logistic_yvector

solver = classifier.SolverStochastic(FLAGS.reg,
                                     loss,
                                     classifier.Reg.reg_l2,
                                     args={
                                         'mode': 'adagrad',
                                         'base_lr': 1e-7,
                                         'minibatch': FLAGS.minibatch,
                                         'num_iter': 1000,
                                         'callback': callback
                                     })
sampler = mathutil.NdarraySampler((Xtrain, target, None))
w, b = solver.solve(sampler, None, K=1000)

pred = (np.dot(Xtrain, w) + b).argmax(1)
accu_train = classifier.Evaluator.accuracy(Ytrain, pred)
logging.info("Reg %f, train accu %f" % \
            (FLAGS.reg, accu_train))
if FLAGS.hier:
    mpi.root_pickle((w, b, FLAGS.reg, accu_train),
                    __file__ + str(FLAGS.reg) + ".hier.pickle")
elif FLAGS.svm:
    target = classifier.to_one_of_k_coding(Ytrain, fill=-1)
    loss = classifier.Loss2.loss_hinge
else:
    target = Ytrain.astype(np.int)
    loss = classifier.Loss2.loss_multiclass_logistic_yvector

# stochastic lbfgs
use_lbfgs = True
if use_lbfgs:
    solver = classifier.SolverStochastic(FLAGS.reg,
                                         loss,
                                         classifier.Reg.reg_l2,
                                         args={
                                             'mode': 'lbfgs',
                                             'minibatch': FLAGS.minibatch,
                                             'num_iter': 10,
                                             'callback': callback
                                         },
                                         fminargs={
                                             'maxfun': 10,
                                             'disp': 0
                                         })
    sampler = mathutil.NdarraySampler((Xtrain, target, None))
    w, b = solver.solve(sampler, K=1000)
    logging.info("Stochastic LBFGS done.")

# adagrad
if use_lbfgs:
    param_init = (w, b)
else:
    param_init = None