def dotrain(model, crit, aug, Xtr, ytr, nepochs=50, batchsize=100, title=None): opt = df.AdaDelta(rho=.95, eps=1e-7, lr=1) # progress = IntProgress(value=0, min=0, max=nepochs, description='Training:') # display(progress) model.training() costs = [] for e in range(nepochs): batchcosts = [] for Xb, yb in batched(batchsize, Xtr, ytr, shuf=True): if aug is not None: Xb, yb = aug.augbatch_train(Xb, yb) model.zero_grad_parameters() cost = model.accumulate_gradients(Xb, yb, crit) opt.update_parameters(model) # print("batch cost: %f" % cost) batchcosts.append(cost) costs.append(np.mean(batchcosts)) print("mean batch cost: %f" % costs[-1]) # progress.value = e+1 # liveplot(plotcost, costs, title) return costs
def dotrain(model, crit, aug, Xtr, ytr, nepochs=3, batchsize=128, title=None): opt = df.AdaDelta(rho=.95, eps=1e-7, lr=1) model.training() costs = [] print("Training in progress...") for e in range(nepochs): print("Current epoch: {0} out of {1}".format(e + 1, nepochs)) batchcosts = [] for Xb, yb in batched(batchsize, Xtr, ytr, shuf=True): if aug is not None: Xb, yb = aug.augbatch_train(Xb, yb) model.zero_grad_parameters() cost = model.accumulate_gradients(Xb, yb, crit) opt.update_parameters(model) batchcosts.append(cost) costs.append(np.mean(batchcosts)) return costs
(Xtrain, ytrain), (Xval, yval), (Xtest, ytest) = load_mnist() criterion = df.ClassNLLCriterion() def run(optim): progress = make_progressbar('Training with ' + str(optim), 5) progress.start() model = net() model.training() for epoch in range(5): train(Xtrain, ytrain, model, optim, criterion, batch_size, 'train') train(Xtrain, ytrain, model, optim, criterion, batch_size, 'stats') progress.update(epoch + 1) progress.finish() model.evaluate() nll, _ = test(Xtrain, ytrain, model, batch_size) _, nerr = test(Xval, yval, model, batch_size) print("Trainset NLL: {:.2f}".format(nll)) print("Testset errors: {}".format(nerr)) run(df.SGD(lr=1e-1)) run(df.Momentum(lr=1e-2, momentum=0.95)) run(df.Nesterov(lr=1e-2, momentum=0.90)) run(df.AdaGrad(lr=1e-2, eps=1e-4)) run(df.RMSProp(lr=1e-3, rho=0.90, eps=1e-5)) run(df.AdaDelta(rho=0.99, lr=5e-1, eps=1e-4))