print "Done."

#############
# FINE-TUNE #
#############

normal_cost = model_ft.models_stack[-1].cost() + \
              model_ft.models_stack[-1].weightdecay(weightdecay)
# gradients = T.grad(normal_cost, model_ft.models_stack[0].w)
# grad_cost = T.sum((gradients * (1 - mask_l0_theano))**2)

trainer = GraddescentMinibatch(
    varin=model_ft.varin, data=train_x, 
    truth=model_ft.models_stack[-1].vartruth, truth_data=train_y,
    supervised=True,
    cost=normal_cost,
    params=model_ft.params, 
    batchsize=batchsize, learningrate=finetune_lr, momentum=momentum,
    rng=npy_rng
)

# trainer2 = GraddescentMinibatch(
#     varin=model_ft.varin, data=train_x, 
#     truth=model_ft.models_stack[-1].vartruth, truth_data=train_y,
#     supervised=True,
#     cost=grad_cost,
#     params=model_ft.params, 
#     batchsize=batchsize, learningrate=finetune_lr * 1e-5, momentum=momentum,
#     rng=npy_rng
# )
Exemplo n.º 2
0
)
def test_error():
    return numpy.mean([test_set_error_rate(i) for i in xrange(test_smp/batchsize)])
print "Done."
print "Initial error rate: train: %f, test: %f" % (train_error(), test_error())

#############
# FINE-TUNE #
#############

print "\n\n... BP through the whole network"
trainer = GraddescentMinibatch(
    varin=model.varin, data=train_x, 
    truth=model.models_stack[-1].vartruth, truth_data=train_y,
    supervised=True,
    cost=model.models_stack[-1].cost(), 
    params=model.params,
    batchsize=batchsize, learningrate=finetune_lr, momentum=momentum,
    rng=npy_rng
)

init_lr = trainer.learningrate
prev_cost = numpy.inf
for epoch in xrange(finetune_epc):
    cost = trainer.epoch()
    if prev_cost <= cost:
        if trainer.learningrate < (init_lr * 1e-7):
            break
        trainer.set_learningrate(trainer.learningrate*0.8)
    prev_cost = cost
    if epoch % 10 == 0:
    print "\n\nPre-training layer %d:" % i
    layer_dropout = Dropout(model.models_stack[i], droprates=[0.2, 0.5], theano_rng=theano_rng).dropout_model
    layer_dropout.varin = model.models_stack[i].varin

    if (i + 2) % 4 == 0:
        model.models_stack[i-2].threshold = 0.
        pretrain_lr = pretrain_lr_lin
        layer_cost = layer_dropout.cost() + layer_dropout.weightdecay(weightdecay)
    else:
        pretrain_lr = pretrain_lr_zae
        layer_cost = layer_dropout.cost()

    trainer = GraddescentMinibatch(
        varin=model.varin, data=train_x,
        cost=layer_cost,
        params=layer_dropout.params_private,
        supervised=False,
        batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum,
        rng=npy_rng
    )

    prev_cost = numpy.inf
    patience = 0
    for epoch in xrange(pretrain_epc):
        cost = trainer.epoch()
        if prev_cost <= cost:
            patience += 1 
            if patience > 10:
                patience = 0
                trainer.set_learningrate(0.9 * trainer.learningrate)
            if trainer.learningrate < 1e-10:
                break
    layer_dropout.varin = model.models_stack[i].varin

    if (i + 2) % 4 == 0:
        model.models_stack[i - 2].threshold = 0.
        pretrain_lr = pretrain_lr_lin
        layer_cost = layer_dropout.cost() + layer_dropout.weightdecay(
            weightdecay)
    else:
        pretrain_lr = pretrain_lr_zae
        layer_cost = layer_dropout.cost()

    trainer = GraddescentMinibatch(varin=model.varin,
                                   data=train_x,
                                   cost=layer_cost,
                                   params=layer_dropout.params_private,
                                   supervised=False,
                                   batchsize=batchsize,
                                   learningrate=pretrain_lr,
                                   momentum=momentum,
                                   rng=npy_rng)

    prev_cost = numpy.inf
    patience = 0
    for epoch in xrange(pretrain_epc):
        cost = trainer.epoch()
        if prev_cost <= cost:
            patience += 1
            if patience > 10:
                patience = 0
                trainer.set_learningrate(0.9 * trainer.learningrate)
            if trainer.learningrate < 1e-10:
        total_mean += numpy.mean([test_set_error_rate(i) for i in xrange(25)])
    test_dg.partidx = mem
    return total_mean / 2.
print "Done."
print "Initial error rate: train: %f, test: %f" % (train_error(), test_error())

#############
# FINE-TUNE #
#############

print "\n\n... fine-tuning the whole network"
trainer = GraddescentMinibatch(
    varin=model.varin, data=train_x, 
    truth=model.models_stack[-1].vartruth, truth_data=train_y,
    supervised=True,
    cost=model.models_stack[-1].cost(), 
    params=model.params,
    batchsize=batchsize, learningrate=finetune_lr, momentum=momentum,
    rng=npy_rng
)

init_lr = trainer.learningrate
prev_cost = numpy.inf
for epoch in xrange(finetune_epc):
    i = 0
    cost = 0.
    for ipart in train_dg:
        print "part %d " % i,
        noise_mask = numpy.tile(npy_rng.binomial(1, 1-noise, (2500, 1, 250, 250)).astype(theano.config.floatX), (1, 3, 1, 1))
        train_x.set_value(noise_mask * ipart[0])
        train_y.set_value(ipart[1])
Exemplo n.º 6
0
    ##| Instantiating model
    ##`--------------------

    model = cAE(n_visible=n_visible, n_hiddens=n_hiddens,
                contraction=contraction,
                activation=activation, n_samples=n_samples,
                BINARY=BINARY, MINIBATCH_SIZE=MINIBATCH_SIZE, corruptF=corruptF)

    ##,----------------------
    ##| Instantiating trainer
    ##`----------------------

    gd = GraddescentMinibatch(model=model, data=data_s,
                              batchsize=MINIBATCH_SIZE,
                              learningrate=learningrate,
                              momentum=momentum,
                              normalizefilters=normalizefilters,
                              rng=rng,
                              verbose=verbose)

    ##,---------
    ##| Training
    ##`---------

    #for epoch in np.arange(epochs):
    #    gd.step()
    while (model.cost(data_s.get_value()) > eps) and gd.epochcount < epochs:
        gd.step()

    ##################################
    ## Stage 4: Visualizing results ##
Exemplo n.º 7
0
error_rate = theano.function(
    [], 
    T.mean(T.neq(model.models_stack[-1].predict(), test_set_y)),
    givens = {model.models_stack[0].varin : test_set_x},
)

#############
# PRE-TRAIN #
#############

for i in range(len(model.models_stack)-1):
    print "\n\nPre-training layer %d:" % i
    trainer = GraddescentMinibatch(
        varin=model.varin, data=train_set_x,
        cost=model.models_stack[i].cost(),
        params=model.models_stack[i].params_private,
        supervised=False,
        batchsize=1, learningrate=0.001, momentum=0., rng=npy_rng
    )

    layer_analyse = model.models_stack[i].encoder()
    layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100)
    layer_analyse.hist_weight()
    
    for epoch in xrange(15):
        trainer.step()
        layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100)
        layer_analyse.hist_weight()

save_params(model=model, filename="mnist_sae_784_784_784_10.npy")
Exemplo n.º 8
0
    [],
    T.mean(T.neq(model.models_stack[-1].predict(), test_set_y)),
    givens={model.models_stack[0].varin: test_set_x},
)

#############
# PRE-TRAIN #
#############

for i in range(len(model.models_stack) - 1):
    print "\n\nPre-training layer %d:" % i
    trainer = GraddescentMinibatch(varin=model.varin,
                                   data=train_set_x,
                                   cost=model.models_stack[i].cost(),
                                   params=model.models_stack[i].params_private,
                                   supervised=False,
                                   batchsize=1,
                                   learningrate=0.001,
                                   momentum=0.,
                                   rng=npy_rng)

    layer_analyse = model.models_stack[i].encoder()
    layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100)
    layer_analyse.hist_weight()

    for epoch in xrange(15):
        trainer.step()
        layer_analyse.draw_weight(patch_shape=(28, 28, 1), npatch=100)
        layer_analyse.hist_weight()

save_params(model=model, filename="mnist_sae_784_784_784_10.npy")
)
print "Done."

#############
# FINE-TUNE #
#############

pdb.set_trace()

print "\n\n... fine-tuning the whole network"
trainer = GraddescentMinibatch(varin=model.varin,
                               data=train_x,
                               truth=model.models_stack[-1].vartruth,
                               truth_data=train_y,
                               supervised=True,
                               cost=model.models_stack[-1].cost(),
                               params=model.params,
                               batchsize=batchsize,
                               learningrate=lr,
                               momentum=momentum,
                               rng=npy_rng)

trainer_adam = Adam(varin=model_adam.varin,
                    data=train_x,
                    truth=model_adam.models_stack[-1].vartruth,
                    truth_data=train_y,
                    supervised=True,
                    cost=model_adam.models_stack[-1].cost(),
                    params=model_adam.params,
                    batchsize=batchsize,
                    learningrate=lr / 5.,
    print "\n\nPre-training layer %d:" % i
    layer_dropout = Dropout(model.models_stack[i], droprates=[0.2, 0.5], theano_rng=theano_rng).dropout_model
    layer_dropout.varin = model.models_stack[i].varin

    if (i + 2) % 4 == 0:
        model.models_stack[i-2].threshold = 0.
        pretrain_lr = pretrain_lr_lin
        layer_cost = layer_dropout.cost() + layer_dropout.contraction(weightdecay)
    else:
        pretrain_lr = pretrain_lr_zae
        layer_cost = layer_dropout.cost()

    trainer = GraddescentMinibatch(
        varin=model.varin, data=train_x,
        cost=layer_cost,
        params=layer_dropout.params_private,
        supervised=False,
        batchsize=batchsize, learningrate=pretrain_lr, momentum=momentum,
        rng=npy_rng
    )

    prev_cost = numpy.inf
    patience = 0
    origin_x = train_x.get_value()
    reshaped_x = pcaback(origin_x).reshape((50000, 32, 32, 3), order='F')
    for epoch in xrange(pretrain_epc):
        cost = 0.
        # original data
        cost += trainer.epoch()
        
        # data augmentation: horizontal flip
        flipped_x = reshaped_x[:, ::-1, :, :].reshape((50000, 3072), order='F')