Пример #1
0
def train_test(depth, growth_rate, dropout, augment, validate, epochs,
               eta, save_weights, save_errors, batchsize=64):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    import densenet_fast as densenet  # or "import densenet" for slower version
    import cifar10
    import progress

    seed = 42
    lambda_clean = 0.1
    np.random.seed(seed)
    # Logging operations

    output_path = '/results/' + os.getcwd().split('/')[-1] + '/' + os.path.basename(__file__).split('.')[0] + '/' + time.strftime(
        "%d-%m-%Y_") + time.strftime("%H:%M:%S") + '/'
    pyscript_name = os.path.basename(__file__)
    create_result_dirs(output_path, pyscript_name)
    sys.stdout = Logger(output_path)
    print('seed: ', seed)

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = densenet.build_densenet(input_var=input_var, depth=depth,
                                      growth_rate=growth_rate, dropout=dropout)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = cifar10.load_dataset(
            path=os.path.join(os.path.dirname(__file__), 'data'))
    if validate == 'test':
        X_val, y_val = X_test, y_test
    elif validate:
        X_val, y_val = X_train[-5000:], y_train[-5000:]
        X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = lasagne.layers.get_output(network)
    prediction_clean = lasagne.layers.get_output(network, deterministic=True)
    # note: The Keras implementation clips predictions for the categorical
    #       cross-entropy. This doesn't seem to have a positive effect here.
    # prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction,
                                                       target_var).mean()
    loss_squared = lambda_clean * lasagne.objectives.squared_error(prediction, prediction_clean).mean()
    # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere.
    #       However, 1e-4 seems to work better than 5e-5, so we use 1e-4.
    # note: Torch includes biases in L2 decay. This seems to be important! So
    #       we decay all 'trainable' parameters, not just 'regularizable' ones.
    l2_loss = 1e-4 * lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2, {'trainable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    updates = lasagne.updates.nesterov_momentum(
            loss + loss_squared + l2_loss, params, learning_rate=eta, momentum=0.9)
    train_fn = theano.function([input_var, target_var], [loss, loss_squared], updates=updates)
    l2_fn = theano.function([], l2_loss)

    # define validation/testing function
    print("Compiling testing function...")
    test_loss = lasagne.objectives.categorical_crossentropy(prediction_clean,
                                                            target_var).mean()
    test_err = 1 - lasagne.objectives.categorical_accuracy(prediction_clean,
                                                           target_var).mean(
                                                  dtype=theano.config.floatX)
    test_fn = theano.function([input_var, target_var], [test_loss, test_err])

    # Finally, launch the training loop.
    print("Starting training...")
    if save_errors:
        errors = []
    for epoch in range(epochs):
        # shrink learning rate at 50% and 75% into training
        if epoch == (epochs // 2) or epoch == (epochs * 3 // 4):
            eta.set_value(eta.get_value() * lasagne.utils.floatX(0.1))

        # In each epoch, we do a full pass over the training data:
        train_loss = 0
        clean_loss = 0
        train_batches = len(X_train) // batchsize
        batches = cifar10.iterate_minibatches(X_train, y_train, batchsize,
                                              shuffle=True)
        if augment:
            batches = cifar10.augment_minibatches(batches)
            batches = generate_in_background(batches)
        batches = progress.progress(
                batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
                total=train_batches)
        for inputs, targets in batches:
            tr_loss, cl_loss = train_fn(inputs, targets)
            train_loss += tr_loss
            clean_loss += cl_loss

        # And possibly a full pass over the validation data:
        if validate:
            val_loss = 0
            val_err = 0
            val_batches = len(X_val) // batchsize
            for inputs, targets in cifar10.iterate_minibatches(X_val, y_val,
                                                               batchsize,
                                                               shuffle=False):
                loss, err = test_fn(inputs, targets)
                val_loss += loss
                val_err += err

        # Then we print the results for this epoch:
        train_loss /= train_batches
        l2_loss = l2_fn()
        print("  training loss:\t%.6f" % train_loss)
        print("  clean loss:    \t%.6f" % clean_loss)
        print("  L2 loss:      \t%.6f" % l2_loss)
        if save_errors:
            errors.extend([train_loss, l2_loss])
        if validate:
            val_loss /= val_batches
            val_err /= val_batches
            print("  validation loss:\t%.6f" % val_loss)
            print("  validation error:\t%.2f%%" % (val_err * 100))
            if save_errors:
                errors.extend([val_loss, val_err])

        test_loss = 0
        test_err = 0
        test_batches = len(X_test) // batchsize
        for inputs, targets in cifar10.iterate_minibatches(X_test, y_test,
                                                           batchsize,
                                                           shuffle=False):
            loss, err = test_fn(inputs, targets)
            test_loss += loss
            test_err += err
        print("  test loss:\t\t%.6f" % (test_loss / test_batches))
        print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))



    # After training, we compute and print the test error:
    test_loss = 0
    test_err = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in cifar10.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        loss, err = test_fn(inputs, targets)
        test_loss += loss
        test_err += err
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_loss / test_batches))
    print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))

    # Optionally, we dump the network weights to a file
    if save_weights:
        np.savez(save_weights, *lasagne.layers.get_all_param_values(network))

    # Optionally, we dump the learning curves to a file
    if save_errors:
        errors = np.asarray(errors).reshape(epochs, -1)
        np.savez(save_errors, errors=errors)
Пример #2
0
def train_test(epochs, eta, save_weights, save_errors, resume,
               init_name, nonlinearity_name, use_cifar10, batchsize=128):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    if use_cifar10 is True:
        print('Using CIFAR-10')
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')

    if nonlinearity_name == 'relu':
        f = lasagne.nonlinearities.rectify
    elif nonlinearity_name == 'elu':
        f = lasagne.nonlinearities.elu
    elif nonlinearity_name == 'gelu':
        def gelu(x):
            return 0.5 * x * (1 + T.tanh(T.sqrt(2 / np.pi) * (x + 0.044715 * T.pow(x, 3))))
        f = gelu

    network = build_vgg(input_var, num_classes, f, init_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
        path=os.path.join(os.path.dirname(__file__), 'data'))
    # if validate == 'test':
    X_val, y_val = X_test, y_test
    # elif validate:
    #     X_val, y_val = X_train[-5000:], y_train[-5000:]
    #     X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = ll.get_output(network)
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean()
    l2_loss = 5e-4 * lasagne.regularization.regularize_network_params(
        network, lasagne.regularization.l2, {'regularizable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    # updates = lasagne.updates.nesterov_momentum(
    #     loss + l2_loss, params, learning_rate=eta)
    updates = lasagne.updates.adam(
        loss + l2_loss, params, learning_rate=eta)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    l2_fn = theano.function([], l2_loss)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var).mean()
    test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction,
                                                           target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_err])

    start_epoch = 0
    if save_errors:
        errors = []

    if resume is True:
        errors = list(np.load(save_errors)['errors'].reshape(-1))
        for i in range(epochs-1,-1,-1):
            try:
                with np.load(save_weights+'_'+str(i)+'.npz') as f:
                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
                lasagne.layers.set_all_param_values(network, param_values)
                start_epoch = i+1
                print('Restored!', i, start_epoch)
                break
            except:
                True
        if start_epoch == 0:
            assert False, "could not resume"

    # Finally, launch the training loop.
    print("Starting training...")

    orig_lr = eta.get_value()
    for epoch in range(start_epoch, epochs):
        # eta.set_value(lasagne.utils.floatX(orig_lr * max(0.1 ** (epoch//25), 1e-7)))

        # restoration friendly code
        # drop at half and then at three fourths through training
        if 100 <= epoch < 125:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.1))
        elif epoch >= 125:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.01))

        # In each epoch, we do a full pass over the training data:
        train_loss = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True)
        # augmentation is mandatory!
        batches = dataset.augment_minibatches(batches)
        batches = generate_in_background(batches)
        batches = progress.progress(
            batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
            total=train_batches)
        for inputs, targets in batches:
            train_loss += train_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        # if validate:
        #     val_loss = 0
        #     val_err = 0
        #     val_batches = len(X_val) // batchsize
        #     for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False):
        #         loss, err = test_fn(inputs, targets)
        #         val_loss += loss
        #         val_err += err
        # else:
        test_loss = 0
        test_err = 0
        test_batches = len(X_test) // batchsize
        for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False):
            loss, err = test_fn(inputs, targets)
            test_loss += loss
            test_err += err

        # Then we print the results for this epoch:
        train_loss /= train_batches
        l2_loss = l2_fn()
        print("  CE loss:\t%.6f" % train_loss)
        print("  L2 loss:      \t%.6f" % l2_loss)
        print("  Loss:      \t%.6f" % (train_loss+l2_loss))
        if save_errors:
            errors.extend([train_loss, l2_loss])

        # if validate:
        #     val_loss /= val_batches
        #     val_err /= val_batches
        #     print("  validation loss:\t%.6f" % val_loss)
        #     print("  validation error:\t%.2f%%" % (val_err * 100))
        #     if save_errors:
        #         errors.extend([val_loss, val_err])
        # else:
        test_loss /= test_batches
        test_err /= test_batches
        print("  test loss:\t%.6f" % test_loss)
        print("  test error:\t%.2f%%" % (test_err * 100))
        if save_errors:
            errors.extend([test_loss, test_err])

        if epoch % 25 == 0 and epoch > 100:
            # Optionally, we dump the network weights to a file
            if save_weights:
                np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network))

            # Optionally, we dump the learning curves to a file
            if save_errors:
                np.savez(save_errors, errors=np.asarray(errors).reshape(epoch+1, -1))

    # After training, we compute and print the test error:
    test_loss = 0
    test_err = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        loss, err = test_fn(inputs, targets)
        test_loss += loss
        test_err += err
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_loss / test_batches))
    print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))

    # we dump the network weights to a file
    np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
    # Optionally, we dump the learning curves to a file
    np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
Пример #3
0
def train_test(depth, growth_rate, dropout, augment, validate, epochs,
               eta, save_weights, save_errors, resume, nonlinearity_name,
               use_cifar10, batchsize):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    import densenet_fast as densenet  # or "import densenet" for slower version
    if use_cifar10 is True:
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes,
                                      growth_rate=growth_rate, dropout=dropout,
                                      nonlinearity_name=nonlinearity_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
            path=os.path.join(os.path.dirname(__file__), 'data'))
    if validate == 'test':
        X_val, y_val = X_test, y_test
    elif validate:
        X_val, y_val = X_train[-5000:], y_train[-5000:]
        X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = lasagne.layers.get_output(network)
    # note: The Keras implementation clips predictions for the categorical
    #       cross-entropy. This doesn't seem to have a positive effect here.
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var).mean()
    # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere.
    #       However, 1e-4 seems to work better than 5e-5, so we use 1e-4.
    # note: Torch includes biases in L2 decay. This seems to be important! So
    #       we decay all 'trainable' parameters, not just 'regularizable' ones.
    l2_loss = 1e-4 * lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2, {'trainable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    updates = lasagne.updates.nesterov_momentum(
            loss + l2_loss, params, learning_rate=eta, momentum=0.9)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)
    l2_fn = theano.function([], l2_loss)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var).mean()
    test_err = 1 - lasagne.objectives.categorical_accuracy(test_prediction,
                                                           target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_err])

    start_epoch = 0
    if save_errors:
        errors = []

    if resume is True:
        errors = list(np.load(save_errors)['errors'].reshape(-1))
        for i in range(epochs-1,-1,-1):
            try:
                with np.load(save_weights+'_'+str(i)+'.npz') as f:
                    param_values = [f['arr_%d' % i] for i in range(len(f.files))]
                lasagne.layers.set_all_param_values(network, param_values)
                start_epoch = i+1
                print(i, start_epoch)
                break
            except:
                True
        if start_epoch == 0:
            assert False, "could not resume"

    # Finally, launch the training loop.
    print("Starting training...")

    orig_lr = eta.get_value()
    for epoch in range(start_epoch, epochs):
        # shrink learning rate at 50% and 75% into training
        if epochs // 2 <= epoch < epochs * 3 // 4:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.1))
        elif epoch >= epochs * 3 // 4:
            eta.set_value(orig_lr * lasagne.utils.floatX(0.01))

        # In each epoch, we do a full pass over the training data:
        train_loss = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize, shuffle=True)
        if augment:
            batches = dataset.augment_minibatches(batches)
            batches = generate_in_background(batches)
        batches = progress.progress(
                batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
                total=train_batches)
        for inputs, targets in batches:
            train_loss += train_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        if validate:
            val_loss = 0
            val_err = 0
            val_batches = len(X_val) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_val, y_val, batchsize, shuffle=False):
                loss, err = test_fn(inputs, targets)
                val_loss += loss
                val_err += err
        else:
            test_loss = 0
            test_err = 0
            test_batches = len(X_test) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_test, y_test, batchsize, shuffle=False):
                loss, err = test_fn(inputs, targets)
                test_loss += loss
                test_err += err

        # Then we print the results for this epoch:
        train_loss /= train_batches
        l2_loss = l2_fn()
        print("  training loss:\t%.6f" % train_loss)
        print("  L2 loss:      \t%.6f" % l2_loss)
        if save_errors:
            errors.extend([train_loss, l2_loss])

        if validate:
            val_loss /= val_batches
            val_err /= val_batches
            print("  validation loss:\t%.6f" % val_loss)
            print("  validation error:\t%.2f%%" % (val_err * 100))
            if save_errors:
                errors.extend([val_loss, val_err])
        else:
            test_loss /= test_batches
            test_err /= test_batches
            print("  test loss:\t%.6f" % test_loss)
            print("  test error:\t%.2f%%" % (test_err * 100))
            if save_errors:
                errors.extend([test_loss, test_err])

        if epoch % 1 == 0:
            # Optionally, we dump the network weights to a file
            if save_weights:
                np.savez(save_weights+'_'+str(epoch), *lasagne.layers.get_all_param_values(network))

            # Optionally, we dump the learning curves to a file
            if save_errors:
                np.savez(save_errors, errors=np.asarray(errors).reshape(-1, 4))

    # After training, we compute and print the test error:
    test_loss = 0
    test_err = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        loss, err = test_fn(inputs, targets)
        test_loss += loss
        test_err += err
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_loss / test_batches))
    print("  test error:\t\t%.2f%%" % (test_err / test_batches * 100))

    # Optionally, we dump the network weights to a file
    if save_weights:
        np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
    # Optionally, we dump the learning curves to a file
    if save_errors:
        np.savez(save_errors, errors=np.asarray(errors).reshape(epochs, -1))
if args.model == 'convnet':
    x = T.ftensor4('x')
elif args.model == 'mlp':
    x = T.matrix('x')
else:
    raise AttributeError
y = T.matrix('y')
lr_ele = T.fscalar('lr_ele')

lr_ele_true = np.array(args.lrEle, theano.config.floatX)
mom = args.momEle #momentum
lr_hyper = T.fscalar('lr_hyper')
grad_valid_weight = T.tensor4('grad_valid_weight')

X_elementary, Y_elementary, X_test, Y_test = load_dataset(args) #normalized
#Use a large validation set (as in CPU experiments) to avoid overfitting the hyperparameters
X_hyper = X_elementary[0:5000]
Y_hyper = Y_elementary[0:5000]
X_elementary = X_elementary[5000:]
Y_elementary = Y_elementary[5000:]


#TODO: seeds for dropout, reinitialize BN layers
#import lasagne.random
#np.random = np.random.RandomState(args.seed)
#rand = np.random.RandomState(args.seed)
#lasagne.random.set_rng(rand)
model = DenseNet(x=x, y=y, args=args)
#model = ConvNet(x=x, y=y, args=args)
Пример #5
0
def train_test(depth, growth_rate, dropout, augment, validate, epochs,
               eta, save_weights, save_errors, resume, nonlinearity_name,
               use_cifar10, batchsize=64):
    # import (deferred until now to make --help faster)
    import numpy as np
    import theano
    import theano.tensor as T
    import lasagne

    import densenet_fast_custom as densenet  # or "import densenet" for slower version
    if use_cifar10 is True:
        import cifar10 as dataset
        num_classes = 10
    else:
        print('Using CIFAR-100')
        import cifar100 as dataset
        num_classes = 100
    import progress

    # instantiate network
    print("Instantiating network...")
    input_var = T.tensor4('inputs')
    target_var = T.ivector('targets')
    network = densenet.build_densenet(input_var=input_var, depth=depth, classes=num_classes,
                                      growth_rate=growth_rate, dropout=dropout,
                                      nonlinearity_name=nonlinearity_name)
    print("%d layers with weights, %d parameters" %
          (sum(hasattr(l, 'W')
               for l in lasagne.layers.get_all_layers(network)),
           lasagne.layers.count_params(network, trainable=True)))

    # load dataset
    print("Loading dataset...")
    X_train, y_train, X_test, y_test = dataset.load_dataset(
        path=os.path.join(os.path.dirname(__file__), 'data'))
    if validate == 'test':
        X_val, y_val = X_test, y_test
    elif validate:
        X_val, y_val = X_train[-5000:], y_train[-5000:]
        X_train, y_train = X_train[:-5000], y_train[:-5000]

    # define training function
    print("Compiling training function...")
    prediction = lasagne.layers.get_output(network)
    prediction = T.clip(prediction, 1e-7, 1 - 1e-7)
    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = loss.mean()
    # note: The paper says 1e-4 decay, but 1e-4 in Torch is 5e-5 elsewhere
    l2_loss = 1e-4 * lasagne.regularization.regularize_network_params(
            network, lasagne.regularization.l2, {'trainable': True})
    params = lasagne.layers.get_all_params(network, trainable=True)
    eta = theano.shared(lasagne.utils.floatX(eta), name='eta')
    updates = lasagne.updates.nesterov_momentum(
            loss + l2_loss, params, learning_rate=eta, momentum=0.9)
    # updates = lasagne.updates.adam(
    #         loss + l2_loss, params, learning_rate=eta)
    train_fn = theano.function([input_var, target_var], loss, updates=updates)

    # define validation/testing function
    print("Compiling testing function...")
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
                                                            target_var)
    update_var_prediction = lasagne.layers.get_output(network, deterministic=True, batch_norm_update_averages=True,
                                                      batch_norm_use_averages=False)
    loss_var_update = lasagne.objectives.categorical_crossentropy(update_var_prediction, target_var)
    loss_var_update = loss_var_update.mean()
    update_var_fn = theano.function([input_var, target_var], loss_var_update)
    test_loss = test_loss.mean()
    test_acc = lasagne.objectives.categorical_accuracy(test_prediction,
                                                       target_var).mean()
    test_fn = theano.function([input_var, target_var], [test_loss, test_acc])
    l2_fn = theano.function([], l2_loss)

    with np.load("./wider_07_100.npz") as f:
        param_values = [f['arr_%d' % i] for i in range(len(f.files))]
    lasagne.layers.set_all_param_values(network, param_values)

    # Finally, launch the training loop.
    print("Starting training...")
    if save_errors:
        errors = []

    val_err = 0
    val_acc = 0
    val_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        err, acc = test_fn(inputs, targets)
        val_err += err
        val_acc += acc
    if validate or True:  # HACK: validate on test set, for debugging
        print("  validation loss:\t%.6f" % (val_err / val_batches))
        print("  validation error:\t%.2f%%" % (
            100 - val_acc / val_batches * 100))

    for epoch in range(5):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = len(X_train) // batchsize
        batches = dataset.iterate_minibatches(X_train, y_train, batchsize,
                                              shuffle=True)
        if augment:
            batches = dataset.augment_minibatches(batches)
            batches = generate_in_background(batches)
        batches = progress.progress(
                batches, desc='Epoch %d/%d, Batch ' % (epoch + 1, epochs),
                total=train_batches)
        for inputs, targets in batches:
            train_err += update_var_fn(inputs, targets)

        # And possibly a full pass over the validation data:
        if validate:
            val_err = 0
            val_acc = 0
            val_batches = len(X_val) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_val, y_val,
                                                               batchsize,
                                                               shuffle=False):
                err, acc = test_fn(inputs, targets)
                val_err += err
                val_acc += acc
        else:
            # HACK: validate on test set, for debugging
            val_err = 0
            val_acc = 0
            val_batches = len(X_test) // batchsize
            for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                               batchsize,
                                                               shuffle=False):
                err, acc = test_fn(inputs, targets)
                val_err += err
                val_acc += acc

        # Then we print the results for this epoch:
        print("  training loss:\t%.6f" % (train_err / train_batches))
        l2_err = l2_fn()
        print("  L2 loss:      \t%.6f" % l2_err)
        if save_errors:
            errors.extend([train_err / train_batches, l2_err])
        if validate or True:  # HACK: validate on test set, for debugging
            print("  validation loss:\t%.6f" % (val_err / val_batches))
            print("  validation error:\t%.2f%%" % (
                100 - val_acc / val_batches * 100))
            if save_errors:
                errors.extend([val_err / val_batches,
                               100 - val_acc / val_batches * 100])

        if save_weights and epoch % 20 == 0:
            np.savez(save_weights, *lasagne.layers.get_all_param_values(network))
            print('Saved')

    # After training, we compute and print the test error:
    test_err = 0
    test_acc = 0
    test_batches = len(X_test) // batchsize
    for inputs, targets in dataset.iterate_minibatches(X_test, y_test,
                                                       batchsize,
                                                       shuffle=False):
        err, acc = test_fn(inputs, targets)
        test_err += err
        test_acc += acc
    print("Final results:")
    print("  test loss:\t\t%.6f" % (test_err / test_batches))
    print("  test error:\t\t%.2f%%" % (
        100 - test_acc / test_batches * 100))