pretrain_lr = pretrain_lr_zae
        layer_cost = layer_dropout.cost()

    trainer = GraddescentMinibatch(
        varin=model.varin, data=train_x,
        cost=layer_cost,
        params=layer_dropout.params_private,
        supervised=False,
        batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum,
        rng=npy_rng
    )

    prev_cost = numpy.inf
    patience = 0
    for epoch in xrange(pretrain_epc):
        cost = trainer.epoch()
        if prev_cost <= cost:
            patience += 1 
            if patience > 10:
                patience = 0
                trainer.set_learningrate(0.9 * trainer.learningrate)
            if trainer.learningrate < 1e-10:
                break
        prev_cost = cost
    save_params(model, 'ZLIN_4000_1000_4000_1000_4000_1000_4000_normhid_nolinb_cae1_dropout.npy')
print "Done."


#########################
# BUILD FINE-TUNE MODEL #
#########################
    supervised=True,
    cost=model.models_stack[-1].cost(), 
    params=model.params,
    batchsize=batchsize, learningrate=finetune_lr, momentum=momentum,
    rng=npy_rng
)

init_lr = trainer.learningrate
prev_cost = numpy.inf
for epoch in xrange(finetune_epc):
    cost = 0.
    for ipart in range(train_smp/part_size):
        print "part %d " % ipart,
        train_x.set_value(train_x_np[ipart * part_size : (ipart + 1) * part_size])
        train_y.set_value(train_y_np[ipart * part_size : (ipart + 1) * part_size])
        cost += trainer.epoch()
        print "       ",
        cost += trainer.epoch()
    print "part + ",
    train_x.set_value(train_x_np[-part_size :])
    train_y.set_value(train_y_np[-part_size :])
    cost += trainer.epoch()
    print "       ",
    cost += trainer.epoch()
    cost = cost / (train_smp/part_size + 1) / 2.
    if prev_cost <= cost:
        if trainer.learningrate < (init_lr * 1e-7):
            break
        trainer.set_learningrate(trainer.learningrate*0.8)
    prev_cost = cost
    if epoch % 10 == 0:
        layer_cost = layer_dropout.cost()

    trainer = GraddescentMinibatch(varin=model.varin,
                                   data=train_x,
                                   cost=layer_cost,
                                   params=layer_dropout.params_private,
                                   supervised=False,
                                   batchsize=batchsize,
                                   learningrate=pretrain_lr,
                                   momentum=momentum,
                                   rng=npy_rng)

    prev_cost = numpy.inf
    patience = 0
    for epoch in xrange(pretrain_epc):
        cost = trainer.epoch()
        if prev_cost <= cost:
            patience += 1
            if patience > 10:
                patience = 0
                trainer.set_learningrate(0.9 * trainer.learningrate)
            if trainer.learningrate < 1e-10:
                break
        prev_cost = cost
    save_params(
        model,
        'ZLIN_4000_1000_4000_1000_4000_1000_4000_normhid_nolinb_cae1_dropout.npy'
    )
print "Done."

#########################
        varin=model.varin, data=train_x,
        cost=layer_cost,
        params=layer_dropout.params_private,
        supervised=False,
        batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum,
        rng=npy_rng
    )

    prev_cost = numpy.inf
    patience = 0
    origin_x = train_x.get_value()
    reshaped_x = pcaback(origin_x).reshape((50000, 32, 32, 3), order='F')
    for epoch in xrange(pretrain_epc):
        cost = 0.
        # original data
        cost += trainer.epoch()
        
        # data augmentation: horizontal flip
        flipped_x = reshaped_x[:, ::-1, :, :].reshape((50000, 3072), order='F')
        train_x.set_value(pcamapping(flipped_x))
        cost += trainer.epoch()

        # random rotation
        movies = numpy.zeros(reshaped_x.shape, dtype=theano.config.floatX)
        for i, im in enumerate(reshaped_x):
            angle_delta = (npy_rng.vonmises(0.0, 1.0)/(4 * numpy.pi)) * 180
            movies[i] = ndimage.rotate(im, angle_delta, reshape=False, mode='wrap')
        train_x.set_value(pcamapping(movies.reshape((50000, 3072), order='F')))
        cost += trainer.epoch()

        # random shift