Exemplo n.º 1
0
def get_loss(a, b, c):
    n_plus = T.sqrt(0.01 + T.sum((a - b)**2, axis=1))
    n_minus = T.sqrt(0.01 + T.sum((a - c)**2, axis=1))
    z = T.concatenate([n_minus.dimshuffle(0, 'x'),
                       n_plus.dimshuffle(0, 'x')],
                      axis=1)
    z = nn.log_sum_exp(z, axis=1)
    return n_plus, n_minus, z
x_unl2 = T.tensor4()
genz_lab2 = ll.get_output(genz_layers[-1], {x_input: x_lab2})
genz_unl2 = ll.get_output(genz_layers[-1], {x_input: x_unl2})
output_before_softmax_lab2 = ll.get_output(disc_layers[-1], {
    x_input: x_lab2,
    z_input: genz_lab2
},
                                           deterministic=False)
output_before_softmax_unl2 = ll.get_output(disc_layers[-1], {
    x_input: x_unl2,
    z_input: genz_unl2
},
                                           deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_lab = -T.mean(l_lab) + T.mean(
    T.mean(nn.log_sum_exp(output_before_softmax_lab)))
loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
    T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab, axis=1), labels))

# test error
output_before_softmax = ll.get_output(disc_layers[-1], {
    x_input: x_lab,
    z_input: genz_lab
},
                                      deterministic=True)
test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))
layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True))

# costs
labels = T.ivector()
x_lab = T.matrix()
x_unl = T.matrix()

#temp = LL.get_output(gen_layers[-1], init=True)
temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False)
output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False)
output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False)

z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
#loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)
loss_lab = -T.mean(l_lab)+z_exp_lab
loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels))

mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
loss_gen = T.mean(T.square(mom_gen - mom_real))

# test error
disc_params = ll.get_all_params(disc_layers, trainable=True)

# costs
labels = T.ivector()
x_lab = T.tensor4()
x_unl = T.tensor4()
temp = ll.get_output(gen_layers[-1], deterministic=False, init=True)
temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in gen_layers+disc_layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False)
output_before_softmax_unl = ll.get_output(disc_layers[-1], x_unl, deterministic=False)
output_before_softmax_gen = ll.get_output(disc_layers[-1], gen_dat, deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab)))
loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) + 0.5*T.mean(T.nnet.softplus(l_gen))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels))

# test error
output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True)
test_err = T.mean(T.neq(T.argmax(output_before_softmax,axis=1),labels))

# Theano functions for training the disc net
lr = T.scalar()
disc_params = ll.get_all_params(disc_layers, trainable=True)
disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight*loss_unl, lr=lr, mom1=0.5)
disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in disc_params]
Exemplo n.º 5
0
# costs
labels = T.ivector()
x_lab = T.matrix()
x_unl = T.matrix()

temp = LL.get_output(gen_layers[-1], init=True)
temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False)
output_before_softmax_unl,output_before_softmax_unl_ = LL.get_output([layers[-1],layers[-2]], x_unl, deterministic=False)
output_before_softmax_unl2,output_before_softmax_unl2_ = LL.get_output([layers[-1],layers[-2]], x_unl, deterministic=False)
output_before_softmax_fake, output_before_softmax_fake_= LL.get_output([layers[-1],layers[-2]], gen_dat, deterministic=False)
output_before_softmax_fake_2,output_before_softmax_fake_2_ = LL.get_output([layers[-1],layers[-2]], gen_dat, deterministic=False)

z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)



#loss_ct = lasagne.objectives.squared_error(T.sum(T.exp(output_before_softmax_unl),axis =1)/(T.sum(T.exp(output_before_softmax_unl),axis =1)+1),T.sum(T.exp(output_before_softmax_unl2),axis=1)/(T.sum(T.exp(output_before_softmax_unl2),axis =1)+1))
loss_ct = T.mean(lasagne.objectives.squared_error(T.nnet.softmax(output_before_softmax_unl),T.nnet.softmax(output_before_softmax_unl2)),axis=1)
 
loss_ct_ = T.mean(lasagne.objectives.squared_error(output_before_softmax_unl_,output_before_softmax_unl2_),axis=1) # no normalization, this term makes the model unstable

CT = LAMBDA_2*(loss_ct+0.0*loss_ct_)-factor_M
CT_ = T.mean(T.maximum(CT,0.0*CT),axis=0)
Exemplo n.º 6
0
                                num_units=10,
                                W=Normal(0.02),
                                nonlinearity=None)
disc0_layers.append(disc0_layer_adv)
''' forward pass '''

output_before_softmax_real0 = LL.get_output(disc0_layer_adv,
                                            x,
                                            deterministic=False)
output_before_softmax_gen0, recon_z0 = LL.get_output(
    [disc0_layer_adv, disc0_layer_z_recon], gen_x, deterministic=False
)  # discriminator's predicted probability that gen_x is real
''' loss for discriminator and Q '''

l_lab0 = output_before_softmax_real0[T.arange(args.batch_size), y]
l_unl0 = nn.log_sum_exp(output_before_softmax_real0)
l_gen0 = nn.log_sum_exp(output_before_softmax_gen0)
loss_disc0_class = -T.mean(l_lab0) + T.mean(
    T.mean(nn.log_sum_exp(output_before_softmax_real0))
)  # loss for not correctly classifying the category of real images
loss_real0 = -T.mean(l_unl0) + T.mean(
    T.nnet.softplus(l_unl0))  # loss for classifying real as fake
loss_fake0 = T.mean(
    T.nnet.softplus(l_gen0))  # loss for classifying fake as real
loss_disc0_adv = 0.5 * loss_real0 + 0.5 * loss_fake0
loss_gen0_ent = T.mean((recon_z0 - z0)**2)
loss_disc0 = args.labloss_weight * loss_disc0_class + args.advloss_weight * loss_disc0_adv + args.entloss_weight * loss_gen0_ent
''' loss for generator '''

recon_fc3 = LL.get_output(
    enc_layer_fc3, gen_x,
Exemplo n.º 7
0
# output_before_softmax_real0 = LL.get_output(disc0_layer_adv, x, deterministic=False)
# output_before_softmax_gen0, recon_z0 = LL.get_output([disc0_layer_adv, disc0_layer_z_recon], gen_x, deterministic=False) # discriminator's predicted probability that gen_x is real

''' loss for discriminator and Q '''
# loss_real1 = T.mean(T.nnet.binary_crossentropy(prob_real1, T.ones(prob_real1.shape)))
# loss_fake1 = T.mean(T.nnet.binary_crossentropy(prob_gen1, T.zeros(prob_gen1.shape)))
# loss_gen1_ent = T.mean((recon_z1 - z1)**2)
# loss_disc1 = args.advloss_weight * (0.5*loss_real1  + 0.5*loss_fake1) + args.entloss_weight * loss_gen1_ent

# loss_real0 = T.mean(T.nnet.binary_crossentropy(prob_real0, T.ones(prob_real0.shape)))
# loss_fake0 = T.mean(T.nnet.binary_crossentropy(prob_gen0, T.zeros(prob_gen0.shape)))
# loss_gen0_ent = T.mean((recon_z0 - z0)**2)
# loss_disc0 = args.advloss_weight * (0.5*loss_real0  + 0.5*loss_fake0) + args.entloss_weight * loss_gen0_ent

l_lab1 = output_before_softmax_real1[T.arange(args.batch_size),y]
l_unl1 = nn.log_sum_exp(output_before_softmax_real1)
l_gen1 = nn.log_sum_exp(output_before_softmax_gen1)
loss_disc1_class = -T.mean(l_lab1) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real1))) # loss for not correctly classifying the category of real images
loss_real1 = -T.mean(l_unl1) + T.mean(T.nnet.softplus(l_unl1)) # loss for classifying real as fake
loss_fake1 = T.mean(T.nnet.softplus(l_gen1)) # loss for classifying fake as real
loss_disc1_adv = 0.5*loss_real1  + 0.5*loss_fake1
loss_gen1_ent = T.mean((recon_z1 - z1)**2)
loss_disc1 = args.labloss_weight * loss_disc1_class + args.advloss_weight * loss_disc1_adv + args.entloss_weight * loss_gen1_ent

# l_lab0 = output_before_softmax_real0[T.arange(args.batch_size),y]
# l_unl0 = nn.log_sum_exp(output_before_softmax_real0)
# l_gen0 = nn.log_sum_exp(output_before_softmax_gen0)
# loss_disc0_class = -T.mean(l_lab0) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real0))) # loss for not correctly classifying the category of real images
# loss_real0 = -T.mean(l_unl0) + T.mean(T.nnet.softplus(l_unl0)) # loss for classifying real as fake
# loss_fake0 = T.mean(T.nnet.softplus(l_gen0)) # loss for classifying fake as real
# loss_disc0_adv = 0.5*loss_real0  + 0.5*loss_fake0
Exemplo n.º 8
0
disc_layers.append(ll.GlobalPoolLayer(disc_layers[-1]))
disc_layers.append(nn.weight_norm(ll.DenseLayer(disc_layers[-1], num_units=2, W=Normal(0.05), nonlinearity=None), train_g=True, init_stdv=0.1))
disc_params = ll.get_all_params(disc_layers, trainable=True)

print("DISCRIMINATOR CREATED")

# costs
labels = T.ivector()
x_lab = T.tensor4()
temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in disc_layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]
loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab)))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels))

# test error
output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True)
test_err = T.mean(T.neq(T.argmax(output_before_softmax,axis=1),labels))

print("ERROR FUNCTIONS CREATED")

# Theano functions for training the disc net
lr = T.scalar()
disc_params = ll.get_all_params(disc_layers, trainable=True)
disc_param_updates = lupd.adam(loss_lab, disc_params, learning_rate=lr, beta1=0.5).items()
disc_param_avg =  []
for p in disc_params:
Exemplo n.º 9
0
disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared))
disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None)
disc_x_layers.append(disc_x_layer_adv)

#output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False)
#output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False)

# temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True)
# temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True)
# init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])]

output_before_softmax_real = LL.get_output(disc_x_layer_adv, x, deterministic=False) 
output_before_softmax_gen, recon_z = LL.get_output([disc_x_layer_adv, disc_x_layer_z_recon], gen_x, deterministic=False) # discriminator's predicted probability that gen_x is real

l_lab = output_before_softmax_real[T.arange(args.batch_size),y]
l_unl = nn.log_sum_exp(output_before_softmax_real)
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_class_x = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real))) # loss for not correctly classifying the category of real images
loss_real_x = -T.mean(l_unl) + T.mean(T.nnet.softplus(l_unl)) # loss for classifying real as fake
loss_fake_x = T.mean(T.nnet.softplus(l_gen)) # loss for classifying fake as real
loss_disc_x_adv = 0.5*loss_real_x  + 0.5*loss_fake_x
loss_z_recon = T.mean((recon_z - z)**2)
loss_disc_x = args.labloss_weight * loss_class_x + args.advloss_weight * loss_disc_x_adv + args.zloss_weight * loss_z_recon

# loss for generator
y_recon = LL.get_output(enc_layer_fc4, gen_x, deterministic=True) # reconstructed pool3 activations
#loss_gen_x_adv = -loss_fake_x  # adversarial loss
loss_gen_x_adv = -T.mean(T.nnet.softplus(l_gen))
# loss_gen_x_fea = T.mean((recon_fc3 - real_fc3)**2) # feature loss, euclidean distance in feature space
loss_gen_x_fea = T.mean(T.nnet.categorical_crossentropy(y_recon, y_1hot)) # feature loss
loss_gen_x = args.advloss_weight * loss_gen_x_adv + args.fealoss_weight * loss_gen_x_fea + args.zloss_weight * loss_z_recon
loss_lab = T.mean(T.sum(T.pow(output_lab - label_matrix, 2),
                        axis=1))  # Squared Error
#l_gen = T.mean(T.sum(T.pow(output_gen, 2), axis=1)) # L2 norm
#l_unl = T.mean(T.pow(T.max(output_unl) - 1, 2))
#l_unl = T.mean(T.pow(T.max(output_unl) - 1, 2)) + T.mean(T.sum(T.pow(output_unl, 2), axis=1))
#loss_unl = 0.5*l_unl + 0.5*l_gen
"""
log_gen = output_gen - nn.log_sum_exp(output_gen).dimshuffle(0,'x')
ent_gen = T.mean(T.sum(T.exp(log_gen) * log_gen, axis=1))
log_fx = output_unl - nn.log_sum_exp(output_unl).dimshuffle(0,'x')
ent_fx = T.mean(T.sum(T.exp(log_fx) * log_fx, axis=1)) # Entropy loss
loss_unl = -0.5*ent_fx + 0.5*ent_gen
"""

l_unl = nn.log_sum_exp(output_unl)
l_gen = nn.log_sum_exp(output_gen)
loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
    T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen))

train_err = T.mean(T.neq(T.argmax(output_lab, axis=1), labels))

# test error
output_test = ll.get_output(disc_layers[-1], x_lab, deterministic=True)
test_err = T.mean(T.neq(T.argmax(output_test, axis=1), labels))

# Theano functions for training the disc net
lr = T.scalar()
disc_params = ll.get_all_params(disc_layers, trainable=True)
disc_param_updates = nn.adam_updates(disc_params,
                                     loss_lab +
Exemplo n.º 11
0
    n_plus = T.sqrt(T.sum((a - b)**2, axis=1))
    n_minus = T.sqrt(T.sum((a - c)**2, axis=1))
    z = T.concatenate([n_minus.dimshuffle(0, 'x'),
                       n_plus.dimshuffle(0, 'x')],
                      axis=1)
    z = nn.log_sum_exp(z, axis=1)
    return n_plus, n_minus, z


n_plus_lab, n_minus_lab, z_lab = loss_labeled(a_lab, b_lab, c_lab)

# defning triplet loss function
loss_lab = -T.mean(n_minus_lab) + T.mean(z_lab)

# defining unlabelled loss
loss_unl = -0.5 * T.mean(nn.log_sum_exp(output_unl)) + 0.5 * T.mean(
    T.nnet.softplus(nn.log_sum_exp(output_unl))) + 0.5 * T.mean(
        T.nnet.softplus(nn.log_sum_exp(output_fake)))

# defining feature matching loss for generator training
mom_gen = LL.get_output(layers[-1], gen_dat)
mom_real = LL.get_output(layers[-1], x_unl)
loss_gen = T.mean(T.square(T.mean(mom_gen, axis=0) - T.mean(mom_real, axis=0)))

# Theano functions for training and testing
lr = T.scalar()
disc_params = LL.get_all_params(layers, trainable=True)
disc_param_updates = nn.adam_updates(disc_params,
                                     loss_lab +
                                     args.unlabeled_weight * loss_unl,
                                     lr=lr,
Exemplo n.º 12
0
print("DISCRIMINATOR CREATED")

# costs
labels = T.ivector()
x_lab = T.tensor4()
temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in disc_layers for u in getattr(l, 'init_updates', [])]

output_before_softmax_lab = ll.get_output(disc_layers[-1],
                                          x_lab,
                                          deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
loss_lab = -T.mean(l_lab) + T.mean(
    T.mean(nn.log_sum_exp(output_before_softmax_lab)))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab, axis=1), labels))

# test error
output_before_softmax = ll.get_output(disc_layers[-1],
                                      x_lab,
                                      deterministic=True)
test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))

print("ERROR FUNCTIONS CREATED")

# Theano functions for training the disc net
lr = T.scalar()
disc_params = ll.get_all_params(disc_layers, trainable=True)
disc_param_updates = lupd.adam(loss_lab,
Exemplo n.º 13
0
    u for l in gen_layers + disc_layers
    for u in getattr(l, 'init_updates', [])
]

output_before_softmax_lab = ll.get_output(disc_layers[-1],
                                          x_lab,
                                          deterministic=False)
output_before_softmax_unl = ll.get_output(disc_layers[-1],
                                          x_unl,
                                          deterministic=False)
output_before_softmax_gen = ll.get_output(disc_layers[-1],
                                          gen_dat,
                                          deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_lab = -T.mean(l_lab) + T.mean(
    T.mean(nn.log_sum_exp(output_before_softmax_lab)))
loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
    T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen))

# Gradient for disc

z_delta_disc = T.tile(z_jacobian, (args.batch_size, 1)) * args.z_delta
z_d_disc = T.sum(z_jacobian, axis=1).dimshuffle('x', 0) * args.z_delta

x_disc_jacobian_lab = x_lab.repeat(sample_dim, axis=0)
labels_jacobian = labels.repeat(sample_dim)
gen_dat_del_lab = ll.get_output(gen_layers[-1], {
    gen_img_input: x_disc_jacobian_lab,
Exemplo n.º 14
0
def getLossFuction(a,b,c):
    n_plus = T.sqrt(T.sum((a - b)**2, axis=1))
    n_minus = T.sqrt(T.sum((a - c)**2, axis=1))
    z = T.concatenate([n_minus.dimshuffle(0,'x'),n_plus.dimshuffle(0,'x')],axis=1)
    z = nn.log_sum_exp(z,axis=1)
    return n_plus,n_minus,z

n_plus_lab,n_minus_lab,z_lab = getLossFuction(a_lab,b_lab,c_lab)

loss_lab = -T.mean(n_minus_lab) + T.mean(z_lab)


l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]

l_unl = nn.log_sum_exp(output_before_softmax_unl)
l_gen = nn.log_sum_exp(output_before_softmax_gen)

loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) + 0.5*T.mean(T.nnet.softplus(l_gen))

# Theano functions for training the disc net
lr = T.scalar()
disc_params = ll.get_all_params(disc_layers, trainable=True)
disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight*loss_unl, lr=lr, mom1=0.5)
disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in disc_params]
disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(disc_params,disc_param_avg)]
disc_avg_givens = [(p,a) for p,a in zip(disc_params,disc_param_avg)] # data based initialization
train_batch_disc = th.function(inputs=[x_lab,x_unl,lr], outputs=[loss_lab, loss_unl], updates=disc_param_updates+disc_avg_updates)
samplefun = th.function(inputs=[],outputs=gen_dat)

# Theano functions for training the gen net
Exemplo n.º 15
0
# costs
labels = T.ivector()
x_lab = T.tensor4()
x_unl = T.tensor4()

temp = ll.get_output(gen_layers[-1], deterministic=False, init=True)
temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True)#no use
init_updates = [u for l in gen_layers+disc_layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) 
output_before_softmax_unl,output_before_softmax_unl_ = ll.get_output([disc_layers[-1],disc_layers[-2]], x_unl, deterministic=False)  # no softmax 
output_before_softmax_unl2,output_before_softmax_unl2_ = ll.get_output([disc_layers[-1],disc_layers[-2]], x_unl, deterministic=False)  # no softmax 
output_before_softmax_gen = ll.get_output(disc_layers[-1], gen_dat, deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] 
l_unl = nn.log_sum_exp(output_before_softmax_unl) 
l_unl2 = nn.log_sum_exp(output_before_softmax_unl2) 
l_unl_ = nn.log_sum_exp(output_before_softmax_unl_)
l_unl2_ = nn.log_sum_exp(output_before_softmax_unl2_) 
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab)))


loss_comp = T.mean(lasagne.objectives.squared_error(T.nnet.softmax(output_before_softmax_unl),T.nnet.softmax(output_before_softmax_unl2)))
loss_comp_ = T.mean(lasagne.objectives.squared_error(output_before_softmax_unl_,output_before_softmax_unl2_))


loss_unl = 0.05*loss_comp_ + 0.5*loss_comp -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) -0.5*np.log(1) + 0.5*T.mean(T.nnet.softplus(l_gen))  


zeros = np.zeros(100)
Exemplo n.º 16
0
    def train(self):

        self.G_weights_layer = nn.softmax_weights(self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input))
        self.D_weights_layer = nn.softmax_weights(self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input))

        self.G_weights = LL.get_output(self.G_weights_layer, None, deterministic=True)
        self.D_weights = LL.get_output(self.D_weights_layer, None, deterministic=True)

        self.Disc_weights_entropy = T.sum((-1./self.args.nd) * T.log(self.D_weights + 0.000001), [0,1])
        self.Gen_weights_entropy = T.sum((-1./self.args.ng) * T.log(self.G_weights + 0.000001), [0,1]) 

        for i in range(self.args.ng):
            gen_layers_i, gen_x_i = self.get_generator(self.meanx, self.z, self.y_1hot)
            self.G_layers.append(gen_layers_i)
            self.Gen_x_list.append(gen_x_i)
        self.Gen_x = T.concatenate(self.Gen_x_list, axis=0)

        for i in range(self.args.nd):
            disc_layers_i, disc_layer_adv_i, disc_layer_z_recon_i = self.get_discriminator()
            self.D_layers.append(disc_layers_i)
            self.D_layer_adv.append(disc_layer_adv_i)
            self.D_layer_z_recon.append(disc_layer_z_recon_i)
            #T.set_subtensor(self.Gen_x[i*self.args.batch_size:(i+1)*self.args.batch_size], gen_x_i)

            #self.samplers.append(self.sampler(self.z[i], self.y))
        ''' forward pass '''
        loss_gen0_cond_list = []
        loss_disc0_class_list = []
        loss_disc0_adv_list = []
        loss_gen0_ent_list = []
        loss_gen0_adv_list = []
        #loss_disc_list
        
        for i in range(self.args.ng):
            self.y_recon_list.append(LL.get_output(self.enc_layer_fc4, self.Gen_x_list[i], deterministic=True)) # reconstructed pool3 activations

        for i in range(self.args.ng):
            #loss_gen0_cond = T.mean((recon_fc3_list[i] - self.real_fc3)**2) # feature loss, euclidean distance in feature space
            loss_gen0_cond = T.mean(T.nnet.categorical_crossentropy(self.y_recon_list[i], self.y))
            loss_disc0_class = 0
            loss_disc0_adv = 0
            loss_gen0_ent = 0
            loss_gen0_adv = 0
            for j in range(self.args.nd):
                output_before_softmax_real0 = LL.get_output(self.D_layer_adv[j], self.x, deterministic=False) 
                output_before_softmax_gen0, recon_z0 = LL.get_output([self.D_layer_adv[j], self.D_layer_z_recon[j]], self.Gen_x_list[i], deterministic=False) # discriminator's predicted probability that gen_x is real
                ''' loss for discriminator and Q '''
                l_lab0 = output_before_softmax_real0[T.arange(self.args.batch_size),self.y]
                l_unl0 = nn.log_sum_exp(output_before_softmax_real0)
                l_gen0 = nn.log_sum_exp(output_before_softmax_gen0)
                loss_disc0_class += T.dot(self.D_weights[0,j], -T.mean(l_lab0) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real0)))) # loss for not correctly classifying the category of real images
                loss_real0 = -T.mean(l_unl0) + T.mean(T.nnet.softplus(l_unl0)) # loss for classifying real as fake
                loss_fake0 = T.mean(T.nnet.softplus(l_gen0)) # loss for classifying fake as real
                loss_disc0_adv += T.dot(self.D_weights[0,j], 0.5*loss_real0 + 0.5*loss_fake0)
                loss_gen0_ent += T.dot(self.D_weights[0,j], T.mean((recon_z0 - self.z)**2))
                #loss_gen0_ent = T.mean((recon_z0 - self.z)**2)
                ''' loss for generator '''
                loss_gen0_adv += T.dot(self.D_weights[0,j], -T.mean(T.nnet.softplus(l_gen0)))

            loss_gen0_cond_list.append(T.dot(self.G_weights[0,i], loss_gen0_cond))
            loss_disc0_class_list.append(T.dot(self.G_weights[0,i], loss_disc0_class))
            loss_disc0_adv_list.append(T.dot(self.G_weights[0,i], loss_disc0_adv))
            loss_gen0_ent_list.append(T.dot(self.G_weights[0,i], loss_gen0_ent))
            loss_gen0_adv_list.append(T.dot(self.G_weights[0,i], loss_gen0_adv))

        self.loss_gen0_cond = sum(loss_gen0_cond_list)
        self.loss_disc0_class = sum(loss_disc0_class_list)
        self.loss_disc0_adv = sum(loss_disc0_adv_list)
        self.loss_gen0_ent = sum(loss_gen0_ent_list)
        self.loss_gen0_adv = sum(loss_gen0_adv_list)

        self.loss_disc = self.args.labloss_weight * self.loss_disc0_class + self.args.advloss_weight * self.loss_disc0_adv + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Disc_weights_entropy
        self.loss_gen = self.args.advloss_weight * self.loss_gen0_adv + self.args.condloss_weight * self.loss_gen0_cond + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Gen_weights_entropy

        if self.args.load_epoch is not None:
            print("loading model")
            self.load_model(self.args.load_epoch)
            print("success")

        ''' collect parameter updates for discriminators '''
        Disc_params = LL.get_all_params(self.D_weights_layer, trainable=True)
        Disc_bn_updates = []
        Disc_bn_params = []

        self.threshold = self.mincost + self.args.labloss_weight * self.loss_disc0_class + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Disc_weights_entropy
        #threshold = mincost + self.args.labloss_weight * self.loss_disc0_class + self.args.entloss_weight * self.loss_gen0_ent

        for i in range(self.args.nd):
            Disc_params.extend(LL.get_all_params(self.D_layers[i], trainable=True))
            Disc_bn_updates.extend([u for l in LL.get_all_layers(self.D_layers[i][-1]) for u in getattr(l,'bn_updates',[])])
            for l in LL.get_all_layers(self.D_layers[i][-1]):
                if hasattr(l, 'avg_batch_mean'):
                    Disc_bn_params.append(l.avg_batch_mean)
                    Disc_bn_params.append(l.avg_batch_var)
        Disc_param_updates = nn.adam_conditional_updates(Disc_params, self.loss_disc, mincost=self.threshold, lr=self.disc_lr, mom1=0.5) # if loss_disc_x < mincost, don't update the discriminator
        Disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in Disc_params] # initialized with 0
        Disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(Disc_params, Disc_param_avg)] # online update of historical parameters

        """
        #Disc_param_updates = nn.adam_updates(Disc_params, self.loss_disc, lr=self.lr, mom1=0.5) 
        # collect parameters
        #Disc_params = LL.get_all_params(self.D_layers[-1], trainable=True)
        Disc_params = LL.get_all_params(self.D_layers, trainable=True)
        #Disc_param_updates = nn.adam_updates(Disc_params, loss_disc_x, lr=lr, mom1=0.5) # loss for discriminator = supervised_loss + unsupervised loss
        Disc_param_updates = nn.adam_conditional_updates(Disc_params, self.loss_disc, mincost=threshold, lr=self.disc_lr, mom1=0.5) # if loss_disc_x < mincost, don't update the discriminator
        Disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in Disc_params] # initialized with 0
        Disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(Disc_params,Disc_param_avg)] # online update of historical parameters
        #Disc_avg_givens = [(p,a) for p,a in zip(Disc_params,Disc_param_avg)]
        Disc_bn_updates = [u for l in LL.get_all_layers(self.D_layers[-1]) for u in getattr(l,'bn_updates',[])]
        Disc_bn_params = []
        for l in LL.get_all_layers(self.D_layers[-1]):
            if hasattr(l, 'avg_batch_mean'):
                Disc_bn_params.append(l.avg_batch_mean)
                Disc_bn_params.append(l.avg_batch_var)
        """


        ''' collect parameter updates for generators '''
        Gen_params = LL.get_all_params(self.G_weights_layer, trainable=True)
        Gen_params_updates = []
        Gen_bn_updates = []
        Gen_bn_params = []

        for i in range(self.args.ng):
            Gen_params.extend(LL.get_all_params(self.G_layers[i][-1], trainable=True))
            Gen_bn_updates.extend([u for l in LL.get_all_layers(self.G_layers[i][-1]) for u in getattr(l,'bn_updates',[])])
            for l in LL.get_all_layers(self.G_layers[i][-1]):
                if hasattr(l, 'avg_batch_mean'):
                    Gen_bn_params.append(l.avg_batch_mean)
                    Gen_bn_params.append(l.avg_batch_var)
        Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.gen_lr, mom1=0.5) 
        """
        #print(Gen_params)
        #train_batch_gen = th.function(inputs=[self.x, self.meanx, self.z, self.y_1hot, self.lr], outputs=[self.loss_gen], on_unused_input='warn')
        #theano.printing.debugprint(train_batch_gen) 
        Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.lr, mom1=0.5) 
        Gen_params = LL.get_all_params(self.G_layers[-1], trainable=True)
        Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.gen_lr, mom1=0.5)
        Gen_bn_updates = [u for l in LL.get_all_layers(self.G_layers[-1]) for u in getattr(l,'bn_updates',[])]
        Gen_bn_params = []
        for l in LL.get_all_layers(self.G_layers[-1]):
            if hasattr(l, 'avg_batch_mean'):
                Gen_bn_params.append(l.avg_batch_mean)
                Gen_bn_params.append(l.avg_batch_var)
         """

        ''' define training and testing functions '''
        #train_batch_disc = th.function(inputs=[x, meanx, y, lr], outputs=[loss_disc0_class, loss_disc0_adv, gen_x, x], 
        #    updates=disc0_param_updates+disc0_bn_updates) 
        #th.printing.debugprint(self.loss_disc)  
        train_batch_disc = th.function(inputs=[self.dummy_input, self.meanx, self.x, self.y, self.y_1hot, self.mincost, self.disc_lr], outputs=[self.loss_disc0_class, self.loss_disc0_adv], updates=Disc_param_updates+Disc_bn_updates+Disc_avg_updates) 
        #th.printing.pydotprint(train_batch_disc, outfile="logreg_pydotprint_prediction.png", var_with_name_simple=True)  
        #train_batch_gen = th.function(inputs=[x, meanx, y_1hot, lr], outputs=[loss_gen0_adv, loss_gen0_cond, loss_gen0_ent], 
        #    updates=gen0_param_updates+gen0_bn_updates)
        #train_batch_gen = th.function(inputs=gen_inputs, outputs=gen_outputs, updates=gen0_param_updates+gen0_bn_updates)
        #train_batch_gen = th.function(inputs=[self.dummy_input, self.x, self.meanx, self.z, self.y_1hot, self.lr], outputs=[self.loss_gen0_adv, self.loss_gen0_cond, self.loss_gen0_ent], updates=Gen_param_updates+Gen_bn_updates)
        train_batch_gen = th.function(inputs=[self.dummy_input, self.meanx, self.y, self.y_1hot, self.gen_lr], outputs=[self.loss_gen0_adv, self.loss_gen0_cond, self.loss_gen0_ent], updates=Gen_param_updates+Gen_bn_updates)
        

        # samplefun = th.function(inputs=[meanx, y_1hot], outputs=gen_x_joint)   # sample function: generating images by stacking all generators
        reconfun = th.function(inputs=[self.meanx, self.y_1hot], outputs=self.Gen_x)       # reconstruction function: use the bottom generator 
                                                                # to generate images conditioned on real fc3 features
        mix_weights = th.function(inputs=[self.dummy_input], outputs=[self.D_weights, self.Disc_weights_entropy, self.G_weights, self.Gen_weights_entropy])

        ''' load data '''
        print("Loading data...")
        meanimg, data = load_cifar_data(self.args.data_dir)
        trainx = data['X_train']
        trainy = data['Y_train']
        nr_batches_train = int(trainx.shape[0]/self.args.batch_size)
        # testx = data['X_test']
        # testy = data['Y_test']
        # nr_batches_test = int(testx.shape[0]/self.args.batch_size)

        ''' perform training  ''' 
        #logs = {'loss_gen0_adv': [], 'loss_gen0_cond': [], 'loss_gen0_ent': [], 'loss_disc0_class': [], 'var_gen0': [], 'var_real0': []} # training logs
        logs = {'loss_gen0_adv': [], 'loss_gen0_cond': [], 'loss_gen0_ent': [], 'loss_disc0_class': []} # training logs
        for epoch in range(self.args.load_epoch+1, self.args.num_epoch):
            begin = time.time()

            ''' shuffling '''
            inds = rng.permutation(trainx.shape[0])
            trainx = trainx[inds]
            trainy = trainy[inds]

            for t in range(nr_batches_train):
            #for t in range(1):
                ''' construct minibatch '''
                #batchz = np.random.uniform(size=(self.args.batch_size, self.args.z0dim)).astype(np.float32)
                batchx = trainx[t*self.args.batch_size:(t+1)*self.args.batch_size]
                batchy = trainy[t*self.args.batch_size:(t+1)*self.args.batch_size]
                batchy_1hot = np.zeros((self.args.batch_size, 10), dtype=np.float32)
                batchy_1hot[np.arange(self.args.batch_size), batchy] = 1 # convert to one-hot label
                # randomy = np.random.randint(10, size = (self.args.batch_size,))
                # randomy_1hot = np.zeros((self.args.batch_size, 10),dtype=np.float32)
                # randomy_1hot[np.arange(self.args.batch_size), randomy] = 1

                ''' train discriminators '''
                l_disc0_class, l_disc0_adv = train_batch_disc(0.0, meanimg, batchx, batchy, batchy_1hot, self.args.mincost, self.args.disc_lr)

                ''' train generators '''
                #prob_gen0 = np.exp()
                if l_disc0_adv > 0.65:
                    n_iter = 1
                elif l_disc0_adv > 0.5:
                    n_iter = 3
                elif l_disc0_adv > 0.3:
                    n_iter = 5
                else:
                    n_iter = 7
                for i in range(n_iter):
                    #l_gen0_adv, l_gen0_cond, l_gen0_ent = train_batch_gen(0.0, batchx, meanimg, batchz, batchy_1hot, self.args.gen_lr)
                    l_gen0_adv, l_gen0_cond, l_gen0_ent = train_batch_gen(0.0, meanimg, batchy, batchy_1hot, self.args.gen_lr)

                d_mix_weights, d_entloss, g_mix_weights, g_entloss = mix_weights(0.0)


                ''' store log information '''
                # logs['loss_gen1_adv'].append(l_gen1_adv)
                # logs['loss_gen1_cond'].append(l_gen1_cond)
                # logs['loss_gen1_ent'].append(l_gen1_ent)
                # logs['loss_disc1_class'].append(l_disc1_class)
                # logs['var_gen1'].append(np.var(np.array(g1)))
                # logs['var_real1'].append(np.var(np.array(r1)))

                logs['loss_gen0_adv'].append(l_gen0_adv)
                logs['loss_gen0_cond'].append(l_gen0_cond)
                logs['loss_gen0_ent'].append(l_gen0_ent)
                logs['loss_disc0_class'].append(l_disc0_class)
                #logs['var_gen0'].append(np.var(np.array(g0)))
                #logs['var_real0'].append(np.var(np.array(r0)))
                
                print("---Epoch %d, time = %ds" % (epoch, time.time()-begin))
                print("D_weights=[%.6f, %.6f, %.6f, %.6f, %.6f] loss = %0.6f" % (d_mix_weights[0,0], d_mix_weights[0,1], d_mix_weights[0,2], d_mix_weights[0,3], d_mix_weights[0,4], d_entloss))
                print("G_weights=[%.6f, %.6f, %.6f, %.6f, %.6f] loss = %0.6f" % (g_mix_weights[0,0], g_mix_weights[0,1], g_mix_weights[0,2], g_mix_weights[0,3], g_mix_weights[0,4], g_entloss))
                #print("G_weights=[%.6f]" % (g_mix_weights[0,0]))
                print("loss_disc0_adv = %.4f, loss_gen0_adv = %.4f,  loss_gen0_cond = %.4f, loss_gen0_ent = %.4f, loss_disc0_class = %.4f" % (l_disc0_adv, l_gen0_adv, l_gen0_cond, l_gen0_ent, l_disc0_class))
            # ''' sample images by stacking all generators'''
            # imgs = samplefun(meanimg, refy_1hot)
            # imgs = np.transpose(np.reshape(imgs[:100,], (100, 3, 32, 32)), (0, 2, 3, 1))
            # imgs = [imgs[i] for i in range(100)]
            # rows = []
            # for i in range(10):
            #     rows.append(np.concatenate(imgs[i::10], 1))
            # imgs = np.concatenate(rows, 0)
            # scipy.misc.imsave(self.args.out_dir + "/mnist_sample_epoch{}.png".format(epoch), imgs)

            """
            ''' original images in the training set'''
            orix = np.transpose(np.reshape(batchx[:100,], (100, 3, 32, 32)), (0, 2, 3, 1))
            orix = [orix[i] for i in range(100)]
            rows = []
            for i in range(10):
                rows.append(np.concatenate(orix[i::10], 1))
            orix = np.concatenate(rows, 0)
            scipy.misc.imsave(self.args.out_dir + "/mnist_ori_epoch{}.png".format(epoch), orix)
            """

            if epoch%self.args.save_interval==0:
                # np.savez(self.args.out_dir + "/disc1_params_epoch{}.npz".format(epoch), *LL.get_all_param_values(disc1_layers[-1]))
                # np.savez(self.args.out_dir + '/gen1_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(gen1_layers[-1]))
                #np.savez(self.args.out_dir + "/disc0_params_epoch{}.npz".format(epoch), *LL.get_all_param_values(disc0_layers))
                #np.savez(self.args.out_dir + '/gen0_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(gen0_layers))
                np.savez(self.args.out_dir + '/Dweights_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(self.D_weights_layer))
                np.savez(self.args.out_dir + '/Gweights_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(self.G_weights_layer))
                for i in range(self.args.ng):
                    np.savez(self.args.out_dir + ("/disc%d_params_epoch%d.npz" % (i,epoch)), *LL.get_all_param_values(self.D_layers[i]))
                    np.savez(self.args.out_dir + ("/gen%d_params_epoch%d.npz" % (i,epoch)), *LL.get_all_param_values(self.G_layers[i]))
                np.save(self.args.out_dir + '/logs.npy',logs)

            ''' reconstruct images '''
            reconx = reconfun(meanimg, batchy_1hot) + meanimg
            width = np.round(np.sqrt(self.args.batch_size)).astype(int)
            for i in range(self.args.ng):
                reconx_i = np.transpose(np.reshape(reconx[i*self.args.batch_size:(i+1)*self.args.batch_size], (self.args.batch_size, 3, 32, 32)), (0, 2, 3, 1))
                reconx_i = [reconx_i[j] for j in range(self.args.batch_size)]
                rows = []
                for j in range(width):
                    rows.append(np.concatenate(reconx_i[j::width], 1))
                reconx_i = np.concatenate(rows, 0)
                scipy.misc.imsave(self.args.out_dir + ("/cifar_recon_%d_epoch%d.png"%(i,epoch)), reconx_i) 
Exemplo n.º 17
0
layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True))

# costs
labels = T.ivector()
x_lab = T.matrix()
x_unl = T.matrix()

temp = LL.get_output(gen_layers[-1], init=True)
temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])]

output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False)
output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False)
output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False)

z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)
loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels))

mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
loss_gen = T.mean(T.square(mom_gen - mom_real))

# test error
output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True)
Exemplo n.º 18
0
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size,
                           train_ex_cnt):
    trainy = trainy.astype(np.int32)
    testy = testy.astype(np.int32)
    trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX)
    testx = testx.reshape((-1, inp_size)).astype(th.config.floatX)
    assert train_ex_cnt == trainx.shape[0]

    # settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--seed_data', type=int, default=1)
    parser.add_argument('--unlabeled_weight', type=float, default=1.)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--count', type=int, default=10)
    parser.add_argument('--iter_limit', type=int, default=300)
    args = parser.parse_args()
    print(args)

    # fixed random seeds
    rng = np.random.RandomState(args.seed)
    theano_rng = MRG_RandomStreams(rng.randint(2**15))
    lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
    data_rng = np.random.RandomState(args.seed_data)

    # npshow(trainx.reshape((-1, 27, 32))[0])

    trainx_unl = trainx.copy()
    trainx_unl2 = trainx.copy()
    nr_batches_train = int(trainx.shape[0] / args.batch_size)
    nr_batches_test = int(testx.shape[0] / args.batch_size)

    # select labeled data
    inds = data_rng.permutation(trainx.shape[0])
    trainx = trainx[inds]
    trainy = trainy[inds]
    txs = []
    tys = []
    for _j in range(10):
        j = _j % lab_cnt
        txs.append(trainx[trainy == j][:args.count])
        tys.append(trainy[trainy == j][:args.count])
    txs = np.concatenate(txs, axis=0)
    tys = np.concatenate(tys, axis=0)

    # specify generative model
    noise = theano_rng.uniform(size=(args.batch_size, 100))
    gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)]
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.l2normalize(
            LL.DenseLayer(gen_layers[-1],
                          num_units=inp_size,
                          nonlinearity=T.nnet.sigmoid)))
    gen_dat = LL.get_output(gen_layers[-1], deterministic=False)

    # specify supervised model
    layers = [LL.InputLayer(shape=(None, inp_size))]
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
    layers.append(nn.DenseLayer(layers[-1], num_units=1000))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=500))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=lab_cnt,
                      nonlinearity=None,
                      train_scale=True))

    # costs
    labels = T.ivector()
    x_lab = T.matrix()
    x_unl = T.matrix()

    temp = LL.get_output(gen_layers[-1], init=True)
    temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
    init_updates = [
        u for l in gen_layers + layers for u in getattr(l, 'init_updates', [])
    ]

    output_before_softmax_lab = LL.get_output(layers[-1],
                                              x_lab,
                                              deterministic=False)
    output_before_softmax_unl = LL.get_output(layers[-1],
                                              x_unl,
                                              deterministic=False)
    output_before_softmax_fake = LL.get_output(layers[-1],
                                               gen_dat,
                                               deterministic=False)

    z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
    z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
    z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
    l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
    l_unl = nn.log_sum_exp(output_before_softmax_unl)
    loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)
    loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
        T.nnet.softplus(
            nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean(
                T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

    train_err = T.mean(
        T.neq(T.argmax(output_before_softmax_lab, axis=1), labels))

    mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
    mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
    loss_gen = T.mean(T.square(mom_gen - mom_real))

    # test error
    output_before_softmax = LL.get_output(layers[-1],
                                          x_lab,
                                          deterministic=True)
    test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))

    # Theano functions for training and testing
    lr = T.scalar()
    disc_params = LL.get_all_params(layers, trainable=True)
    disc_param_updates = nn.adam_updates(disc_params,
                                         loss_lab +
                                         args.unlabeled_weight * loss_unl,
                                         lr=lr,
                                         mom1=0.5)
    disc_param_avg = [
        th.shared(np.cast[th.config.floatX](0. * p.get_value()))
        for p in disc_params
    ]
    disc_avg_updates = [(a, a + 0.0001 * (p - a))
                        for p, a in zip(disc_params, disc_param_avg)]
    disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)]
    gen_params = LL.get_all_params(gen_layers[-1], trainable=True)
    gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5)
    init_param = th.function(inputs=[x_lab],
                             outputs=None,
                             updates=init_updates)
    train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr],
                                   outputs=[loss_lab, loss_unl, train_err],
                                   updates=disc_param_updates +
                                   disc_avg_updates)
    train_batch_gen = th.function(inputs=[x_unl, lr],
                                  outputs=[loss_gen],
                                  updates=gen_param_updates)
    test_batch = th.function(inputs=[x_lab, labels],
                             outputs=test_err,
                             givens=disc_avg_givens)

    init_param(trainx[:500])  # data dependent initialization

    # //////////// perform training //////////////
    lr = 0.003
    for epoch in range(args.iter_limit):
        begin = time.time()

        # construct randomly permuted minibatches
        trainx = []
        trainy = []
        for t in range(trainx_unl.shape[0] / txs.shape[0]):
            inds = rng.permutation(txs.shape[0])
            trainx.append(txs[inds])
            trainy.append(tys[inds])
        trainx = np.concatenate(trainx, axis=0)
        trainy = np.concatenate(trainy, axis=0)
        trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])]
        trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])]

        # train
        loss_lab = 0.
        loss_unl = 0.
        train_err = 0.
        for t in range(nr_batches_train):
            ll, lu, te = train_batch_disc(
                trainx[t * args.batch_size:(t + 1) * args.batch_size],
                trainy[t * args.batch_size:(t + 1) * args.batch_size],
                trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr)
            loss_lab += ll
            loss_unl += lu
            train_err += te
            e = train_batch_gen(
                trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr)
        loss_lab /= nr_batches_train
        loss_unl /= nr_batches_train
        train_err /= nr_batches_train

        # test
        test_err = 0.
        for t in range(nr_batches_test):
            test_err += test_batch(
                testx[t * args.batch_size:(t + 1) * args.batch_size],
                testy[t * args.batch_size:(t + 1) * args.batch_size])
        test_err /= nr_batches_test

        # report
        print(
            "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f"
            % (epoch, time.time() - begin, loss_lab, loss_unl, train_err,
               test_err))
        sys.stdout.flush()
temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
init_updates = [
    u for l in gen_layers + layers for u in getattr(l, 'init_updates', [])
]

output_before_softmax_lab = LL.get_output(layers[-1],
                                          x_lab,
                                          deterministic=False)
output_before_softmax_unl = LL.get_output(layers[-1],
                                          x_unl,
                                          deterministic=False)
output_before_softmax_fake = LL.get_output(layers[-1],
                                           gen_dat,
                                           deterministic=False)

log_fx = output_before_softmax_lab - nn.log_sum_exp(
    output_before_softmax_lab).dimshuffle(0, 'x')
loss_entropy = -0.1 * T.mean(T.sum(T.exp(log_fx) * log_fx, axis=1))
l_unl = nn.log_sum_exp(output_before_softmax_unl)
loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
    T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean(
        T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
loss_gen = T.mean(T.square(mom_gen - mom_real))

# Theano functions for training and testing
lr = T.scalar()
disc_params = LL.get_all_params(layers, trainable=True)
disc_param_updates = nn.adam_updates(disc_params,
                                     loss_entropy +