def get_loss(a, b, c): n_plus = T.sqrt(0.01 + T.sum((a - b)**2, axis=1)) n_minus = T.sqrt(0.01 + T.sum((a - c)**2, axis=1)) z = T.concatenate([n_minus.dimshuffle(0, 'x'), n_plus.dimshuffle(0, 'x')], axis=1) z = nn.log_sum_exp(z, axis=1) return n_plus, n_minus, z
x_unl2 = T.tensor4() genz_lab2 = ll.get_output(genz_layers[-1], {x_input: x_lab2}) genz_unl2 = ll.get_output(genz_layers[-1], {x_input: x_unl2}) output_before_softmax_lab2 = ll.get_output(disc_layers[-1], { x_input: x_lab2, z_input: genz_lab2 }, deterministic=False) output_before_softmax_unl2 = ll.get_output(disc_layers[-1], { x_input: x_unl2, z_input: genz_unl2 }, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_lab = -T.mean(l_lab) + T.mean( T.mean(nn.log_sum_exp(output_before_softmax_lab))) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen)) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) # test error output_before_softmax = ll.get_output(disc_layers[-1], { x_input: x_lab, z_input: genz_lab }, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))
layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() #temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) #loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_lab = -T.mean(l_lab)+z_exp_lab loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error
disc_params = ll.get_all_params(disc_layers, trainable=True) # costs labels = T.ivector() x_lab = T.tensor4() x_unl = T.tensor4() temp = ll.get_output(gen_layers[-1], deterministic=False, init=True) temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in gen_layers+disc_layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) output_before_softmax_unl = ll.get_output(disc_layers[-1], x_unl, deterministic=False) output_before_softmax_gen = ll.get_output(disc_layers[-1], gen_dat, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab))) loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) + 0.5*T.mean(T.nnet.softplus(l_gen)) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels)) # test error output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax,axis=1),labels)) # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight*loss_unl, lr=lr, mom1=0.5) disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in disc_params]
# costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl,output_before_softmax_unl_ = LL.get_output([layers[-1],layers[-2]], x_unl, deterministic=False) output_before_softmax_unl2,output_before_softmax_unl2_ = LL.get_output([layers[-1],layers[-2]], x_unl, deterministic=False) output_before_softmax_fake, output_before_softmax_fake_= LL.get_output([layers[-1],layers[-2]], gen_dat, deterministic=False) output_before_softmax_fake_2,output_before_softmax_fake_2_ = LL.get_output([layers[-1],layers[-2]], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) #loss_ct = lasagne.objectives.squared_error(T.sum(T.exp(output_before_softmax_unl),axis =1)/(T.sum(T.exp(output_before_softmax_unl),axis =1)+1),T.sum(T.exp(output_before_softmax_unl2),axis=1)/(T.sum(T.exp(output_before_softmax_unl2),axis =1)+1)) loss_ct = T.mean(lasagne.objectives.squared_error(T.nnet.softmax(output_before_softmax_unl),T.nnet.softmax(output_before_softmax_unl2)),axis=1) loss_ct_ = T.mean(lasagne.objectives.squared_error(output_before_softmax_unl_,output_before_softmax_unl2_),axis=1) # no normalization, this term makes the model unstable CT = LAMBDA_2*(loss_ct+0.0*loss_ct_)-factor_M CT_ = T.mean(T.maximum(CT,0.0*CT),axis=0)
num_units=10, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_adv) ''' forward pass ''' output_before_softmax_real0 = LL.get_output(disc0_layer_adv, x, deterministic=False) output_before_softmax_gen0, recon_z0 = LL.get_output( [disc0_layer_adv, disc0_layer_z_recon], gen_x, deterministic=False ) # discriminator's predicted probability that gen_x is real ''' loss for discriminator and Q ''' l_lab0 = output_before_softmax_real0[T.arange(args.batch_size), y] l_unl0 = nn.log_sum_exp(output_before_softmax_real0) l_gen0 = nn.log_sum_exp(output_before_softmax_gen0) loss_disc0_class = -T.mean(l_lab0) + T.mean( T.mean(nn.log_sum_exp(output_before_softmax_real0)) ) # loss for not correctly classifying the category of real images loss_real0 = -T.mean(l_unl0) + T.mean( T.nnet.softplus(l_unl0)) # loss for classifying real as fake loss_fake0 = T.mean( T.nnet.softplus(l_gen0)) # loss for classifying fake as real loss_disc0_adv = 0.5 * loss_real0 + 0.5 * loss_fake0 loss_gen0_ent = T.mean((recon_z0 - z0)**2) loss_disc0 = args.labloss_weight * loss_disc0_class + args.advloss_weight * loss_disc0_adv + args.entloss_weight * loss_gen0_ent ''' loss for generator ''' recon_fc3 = LL.get_output( enc_layer_fc3, gen_x,
# output_before_softmax_real0 = LL.get_output(disc0_layer_adv, x, deterministic=False) # output_before_softmax_gen0, recon_z0 = LL.get_output([disc0_layer_adv, disc0_layer_z_recon], gen_x, deterministic=False) # discriminator's predicted probability that gen_x is real ''' loss for discriminator and Q ''' # loss_real1 = T.mean(T.nnet.binary_crossentropy(prob_real1, T.ones(prob_real1.shape))) # loss_fake1 = T.mean(T.nnet.binary_crossentropy(prob_gen1, T.zeros(prob_gen1.shape))) # loss_gen1_ent = T.mean((recon_z1 - z1)**2) # loss_disc1 = args.advloss_weight * (0.5*loss_real1 + 0.5*loss_fake1) + args.entloss_weight * loss_gen1_ent # loss_real0 = T.mean(T.nnet.binary_crossentropy(prob_real0, T.ones(prob_real0.shape))) # loss_fake0 = T.mean(T.nnet.binary_crossentropy(prob_gen0, T.zeros(prob_gen0.shape))) # loss_gen0_ent = T.mean((recon_z0 - z0)**2) # loss_disc0 = args.advloss_weight * (0.5*loss_real0 + 0.5*loss_fake0) + args.entloss_weight * loss_gen0_ent l_lab1 = output_before_softmax_real1[T.arange(args.batch_size),y] l_unl1 = nn.log_sum_exp(output_before_softmax_real1) l_gen1 = nn.log_sum_exp(output_before_softmax_gen1) loss_disc1_class = -T.mean(l_lab1) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real1))) # loss for not correctly classifying the category of real images loss_real1 = -T.mean(l_unl1) + T.mean(T.nnet.softplus(l_unl1)) # loss for classifying real as fake loss_fake1 = T.mean(T.nnet.softplus(l_gen1)) # loss for classifying fake as real loss_disc1_adv = 0.5*loss_real1 + 0.5*loss_fake1 loss_gen1_ent = T.mean((recon_z1 - z1)**2) loss_disc1 = args.labloss_weight * loss_disc1_class + args.advloss_weight * loss_disc1_adv + args.entloss_weight * loss_gen1_ent # l_lab0 = output_before_softmax_real0[T.arange(args.batch_size),y] # l_unl0 = nn.log_sum_exp(output_before_softmax_real0) # l_gen0 = nn.log_sum_exp(output_before_softmax_gen0) # loss_disc0_class = -T.mean(l_lab0) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real0))) # loss for not correctly classifying the category of real images # loss_real0 = -T.mean(l_unl0) + T.mean(T.nnet.softplus(l_unl0)) # loss for classifying real as fake # loss_fake0 = T.mean(T.nnet.softplus(l_gen0)) # loss for classifying fake as real # loss_disc0_adv = 0.5*loss_real0 + 0.5*loss_fake0
disc_layers.append(ll.GlobalPoolLayer(disc_layers[-1])) disc_layers.append(nn.weight_norm(ll.DenseLayer(disc_layers[-1], num_units=2, W=Normal(0.05), nonlinearity=None), train_g=True, init_stdv=0.1)) disc_params = ll.get_all_params(disc_layers, trainable=True) print("DISCRIMINATOR CREATED") # costs labels = T.ivector() x_lab = T.tensor4() temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in disc_layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab))) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels)) # test error output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax,axis=1),labels)) print("ERROR FUNCTIONS CREATED") # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = lupd.adam(loss_lab, disc_params, learning_rate=lr, beta1=0.5).items() disc_param_avg = [] for p in disc_params:
disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared)) disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None) disc_x_layers.append(disc_x_layer_adv) #output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False) #output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False) # temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True) # temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True) # init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])] output_before_softmax_real = LL.get_output(disc_x_layer_adv, x, deterministic=False) output_before_softmax_gen, recon_z = LL.get_output([disc_x_layer_adv, disc_x_layer_z_recon], gen_x, deterministic=False) # discriminator's predicted probability that gen_x is real l_lab = output_before_softmax_real[T.arange(args.batch_size),y] l_unl = nn.log_sum_exp(output_before_softmax_real) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_class_x = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real))) # loss for not correctly classifying the category of real images loss_real_x = -T.mean(l_unl) + T.mean(T.nnet.softplus(l_unl)) # loss for classifying real as fake loss_fake_x = T.mean(T.nnet.softplus(l_gen)) # loss for classifying fake as real loss_disc_x_adv = 0.5*loss_real_x + 0.5*loss_fake_x loss_z_recon = T.mean((recon_z - z)**2) loss_disc_x = args.labloss_weight * loss_class_x + args.advloss_weight * loss_disc_x_adv + args.zloss_weight * loss_z_recon # loss for generator y_recon = LL.get_output(enc_layer_fc4, gen_x, deterministic=True) # reconstructed pool3 activations #loss_gen_x_adv = -loss_fake_x # adversarial loss loss_gen_x_adv = -T.mean(T.nnet.softplus(l_gen)) # loss_gen_x_fea = T.mean((recon_fc3 - real_fc3)**2) # feature loss, euclidean distance in feature space loss_gen_x_fea = T.mean(T.nnet.categorical_crossentropy(y_recon, y_1hot)) # feature loss loss_gen_x = args.advloss_weight * loss_gen_x_adv + args.fealoss_weight * loss_gen_x_fea + args.zloss_weight * loss_z_recon
loss_lab = T.mean(T.sum(T.pow(output_lab - label_matrix, 2), axis=1)) # Squared Error #l_gen = T.mean(T.sum(T.pow(output_gen, 2), axis=1)) # L2 norm #l_unl = T.mean(T.pow(T.max(output_unl) - 1, 2)) #l_unl = T.mean(T.pow(T.max(output_unl) - 1, 2)) + T.mean(T.sum(T.pow(output_unl, 2), axis=1)) #loss_unl = 0.5*l_unl + 0.5*l_gen """ log_gen = output_gen - nn.log_sum_exp(output_gen).dimshuffle(0,'x') ent_gen = T.mean(T.sum(T.exp(log_gen) * log_gen, axis=1)) log_fx = output_unl - nn.log_sum_exp(output_unl).dimshuffle(0,'x') ent_fx = T.mean(T.sum(T.exp(log_fx) * log_fx, axis=1)) # Entropy loss loss_unl = -0.5*ent_fx + 0.5*ent_gen """ l_unl = nn.log_sum_exp(output_unl) l_gen = nn.log_sum_exp(output_gen) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen)) train_err = T.mean(T.neq(T.argmax(output_lab, axis=1), labels)) # test error output_test = ll.get_output(disc_layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_test, axis=1), labels)) # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab +
n_plus = T.sqrt(T.sum((a - b)**2, axis=1)) n_minus = T.sqrt(T.sum((a - c)**2, axis=1)) z = T.concatenate([n_minus.dimshuffle(0, 'x'), n_plus.dimshuffle(0, 'x')], axis=1) z = nn.log_sum_exp(z, axis=1) return n_plus, n_minus, z n_plus_lab, n_minus_lab, z_lab = loss_labeled(a_lab, b_lab, c_lab) # defning triplet loss function loss_lab = -T.mean(n_minus_lab) + T.mean(z_lab) # defining unlabelled loss loss_unl = -0.5 * T.mean(nn.log_sum_exp(output_unl)) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_fake))) # defining feature matching loss for generator training mom_gen = LL.get_output(layers[-1], gen_dat) mom_real = LL.get_output(layers[-1], x_unl) loss_gen = T.mean(T.square(T.mean(mom_gen, axis=0) - T.mean(mom_real, axis=0))) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight * loss_unl, lr=lr,
print("DISCRIMINATOR CREATED") # costs labels = T.ivector() x_lab = T.tensor4() temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in disc_layers for u in getattr(l, 'init_updates', [])] output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] loss_lab = -T.mean(l_lab) + T.mean( T.mean(nn.log_sum_exp(output_before_softmax_lab))) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) # test error output_before_softmax = ll.get_output(disc_layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels)) print("ERROR FUNCTIONS CREATED") # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = lupd.adam(loss_lab,
u for l in gen_layers + disc_layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) output_before_softmax_unl = ll.get_output(disc_layers[-1], x_unl, deterministic=False) output_before_softmax_gen = ll.get_output(disc_layers[-1], gen_dat, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_lab = -T.mean(l_lab) + T.mean( T.mean(nn.log_sum_exp(output_before_softmax_lab))) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen)) # Gradient for disc z_delta_disc = T.tile(z_jacobian, (args.batch_size, 1)) * args.z_delta z_d_disc = T.sum(z_jacobian, axis=1).dimshuffle('x', 0) * args.z_delta x_disc_jacobian_lab = x_lab.repeat(sample_dim, axis=0) labels_jacobian = labels.repeat(sample_dim) gen_dat_del_lab = ll.get_output(gen_layers[-1], { gen_img_input: x_disc_jacobian_lab,
def getLossFuction(a,b,c): n_plus = T.sqrt(T.sum((a - b)**2, axis=1)) n_minus = T.sqrt(T.sum((a - c)**2, axis=1)) z = T.concatenate([n_minus.dimshuffle(0,'x'),n_plus.dimshuffle(0,'x')],axis=1) z = nn.log_sum_exp(z,axis=1) return n_plus,n_minus,z n_plus_lab,n_minus_lab,z_lab = getLossFuction(a_lab,b_lab,c_lab) loss_lab = -T.mean(n_minus_lab) + T.mean(z_lab) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) + 0.5*T.mean(T.nnet.softplus(l_gen)) # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight*loss_unl, lr=lr, mom1=0.5) disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in disc_params] disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(disc_params,disc_param_avg)] disc_avg_givens = [(p,a) for p,a in zip(disc_params,disc_param_avg)] # data based initialization train_batch_disc = th.function(inputs=[x_lab,x_unl,lr], outputs=[loss_lab, loss_unl], updates=disc_param_updates+disc_avg_updates) samplefun = th.function(inputs=[],outputs=gen_dat) # Theano functions for training the gen net
# costs labels = T.ivector() x_lab = T.tensor4() x_unl = T.tensor4() temp = ll.get_output(gen_layers[-1], deterministic=False, init=True) temp = ll.get_output(disc_layers[-1], x_lab, deterministic=False, init=True)#no use init_updates = [u for l in gen_layers+disc_layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = ll.get_output(disc_layers[-1], x_lab, deterministic=False) output_before_softmax_unl,output_before_softmax_unl_ = ll.get_output([disc_layers[-1],disc_layers[-2]], x_unl, deterministic=False) # no softmax output_before_softmax_unl2,output_before_softmax_unl2_ = ll.get_output([disc_layers[-1],disc_layers[-2]], x_unl, deterministic=False) # no softmax output_before_softmax_gen = ll.get_output(disc_layers[-1], gen_dat, deterministic=False) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) l_unl2 = nn.log_sum_exp(output_before_softmax_unl2) l_unl_ = nn.log_sum_exp(output_before_softmax_unl_) l_unl2_ = nn.log_sum_exp(output_before_softmax_unl2_) l_gen = nn.log_sum_exp(output_before_softmax_gen) loss_lab = -T.mean(l_lab) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_lab))) loss_comp = T.mean(lasagne.objectives.squared_error(T.nnet.softmax(output_before_softmax_unl),T.nnet.softmax(output_before_softmax_unl2))) loss_comp_ = T.mean(lasagne.objectives.squared_error(output_before_softmax_unl_,output_before_softmax_unl2_)) loss_unl = 0.05*loss_comp_ + 0.5*loss_comp -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(l_unl)) -0.5*np.log(1) + 0.5*T.mean(T.nnet.softplus(l_gen)) zeros = np.zeros(100)
def train(self): self.G_weights_layer = nn.softmax_weights(self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input)) self.D_weights_layer = nn.softmax_weights(self.args.ng, LL.InputLayer(shape=(), input_var=self.dummy_input)) self.G_weights = LL.get_output(self.G_weights_layer, None, deterministic=True) self.D_weights = LL.get_output(self.D_weights_layer, None, deterministic=True) self.Disc_weights_entropy = T.sum((-1./self.args.nd) * T.log(self.D_weights + 0.000001), [0,1]) self.Gen_weights_entropy = T.sum((-1./self.args.ng) * T.log(self.G_weights + 0.000001), [0,1]) for i in range(self.args.ng): gen_layers_i, gen_x_i = self.get_generator(self.meanx, self.z, self.y_1hot) self.G_layers.append(gen_layers_i) self.Gen_x_list.append(gen_x_i) self.Gen_x = T.concatenate(self.Gen_x_list, axis=0) for i in range(self.args.nd): disc_layers_i, disc_layer_adv_i, disc_layer_z_recon_i = self.get_discriminator() self.D_layers.append(disc_layers_i) self.D_layer_adv.append(disc_layer_adv_i) self.D_layer_z_recon.append(disc_layer_z_recon_i) #T.set_subtensor(self.Gen_x[i*self.args.batch_size:(i+1)*self.args.batch_size], gen_x_i) #self.samplers.append(self.sampler(self.z[i], self.y)) ''' forward pass ''' loss_gen0_cond_list = [] loss_disc0_class_list = [] loss_disc0_adv_list = [] loss_gen0_ent_list = [] loss_gen0_adv_list = [] #loss_disc_list for i in range(self.args.ng): self.y_recon_list.append(LL.get_output(self.enc_layer_fc4, self.Gen_x_list[i], deterministic=True)) # reconstructed pool3 activations for i in range(self.args.ng): #loss_gen0_cond = T.mean((recon_fc3_list[i] - self.real_fc3)**2) # feature loss, euclidean distance in feature space loss_gen0_cond = T.mean(T.nnet.categorical_crossentropy(self.y_recon_list[i], self.y)) loss_disc0_class = 0 loss_disc0_adv = 0 loss_gen0_ent = 0 loss_gen0_adv = 0 for j in range(self.args.nd): output_before_softmax_real0 = LL.get_output(self.D_layer_adv[j], self.x, deterministic=False) output_before_softmax_gen0, recon_z0 = LL.get_output([self.D_layer_adv[j], self.D_layer_z_recon[j]], self.Gen_x_list[i], deterministic=False) # discriminator's predicted probability that gen_x is real ''' loss for discriminator and Q ''' l_lab0 = output_before_softmax_real0[T.arange(self.args.batch_size),self.y] l_unl0 = nn.log_sum_exp(output_before_softmax_real0) l_gen0 = nn.log_sum_exp(output_before_softmax_gen0) loss_disc0_class += T.dot(self.D_weights[0,j], -T.mean(l_lab0) + T.mean(T.mean(nn.log_sum_exp(output_before_softmax_real0)))) # loss for not correctly classifying the category of real images loss_real0 = -T.mean(l_unl0) + T.mean(T.nnet.softplus(l_unl0)) # loss for classifying real as fake loss_fake0 = T.mean(T.nnet.softplus(l_gen0)) # loss for classifying fake as real loss_disc0_adv += T.dot(self.D_weights[0,j], 0.5*loss_real0 + 0.5*loss_fake0) loss_gen0_ent += T.dot(self.D_weights[0,j], T.mean((recon_z0 - self.z)**2)) #loss_gen0_ent = T.mean((recon_z0 - self.z)**2) ''' loss for generator ''' loss_gen0_adv += T.dot(self.D_weights[0,j], -T.mean(T.nnet.softplus(l_gen0))) loss_gen0_cond_list.append(T.dot(self.G_weights[0,i], loss_gen0_cond)) loss_disc0_class_list.append(T.dot(self.G_weights[0,i], loss_disc0_class)) loss_disc0_adv_list.append(T.dot(self.G_weights[0,i], loss_disc0_adv)) loss_gen0_ent_list.append(T.dot(self.G_weights[0,i], loss_gen0_ent)) loss_gen0_adv_list.append(T.dot(self.G_weights[0,i], loss_gen0_adv)) self.loss_gen0_cond = sum(loss_gen0_cond_list) self.loss_disc0_class = sum(loss_disc0_class_list) self.loss_disc0_adv = sum(loss_disc0_adv_list) self.loss_gen0_ent = sum(loss_gen0_ent_list) self.loss_gen0_adv = sum(loss_gen0_adv_list) self.loss_disc = self.args.labloss_weight * self.loss_disc0_class + self.args.advloss_weight * self.loss_disc0_adv + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Disc_weights_entropy self.loss_gen = self.args.advloss_weight * self.loss_gen0_adv + self.args.condloss_weight * self.loss_gen0_cond + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Gen_weights_entropy if self.args.load_epoch is not None: print("loading model") self.load_model(self.args.load_epoch) print("success") ''' collect parameter updates for discriminators ''' Disc_params = LL.get_all_params(self.D_weights_layer, trainable=True) Disc_bn_updates = [] Disc_bn_params = [] self.threshold = self.mincost + self.args.labloss_weight * self.loss_disc0_class + self.args.entloss_weight * self.loss_gen0_ent + self.args.mix_entloss_weight * self.Disc_weights_entropy #threshold = mincost + self.args.labloss_weight * self.loss_disc0_class + self.args.entloss_weight * self.loss_gen0_ent for i in range(self.args.nd): Disc_params.extend(LL.get_all_params(self.D_layers[i], trainable=True)) Disc_bn_updates.extend([u for l in LL.get_all_layers(self.D_layers[i][-1]) for u in getattr(l,'bn_updates',[])]) for l in LL.get_all_layers(self.D_layers[i][-1]): if hasattr(l, 'avg_batch_mean'): Disc_bn_params.append(l.avg_batch_mean) Disc_bn_params.append(l.avg_batch_var) Disc_param_updates = nn.adam_conditional_updates(Disc_params, self.loss_disc, mincost=self.threshold, lr=self.disc_lr, mom1=0.5) # if loss_disc_x < mincost, don't update the discriminator Disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in Disc_params] # initialized with 0 Disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(Disc_params, Disc_param_avg)] # online update of historical parameters """ #Disc_param_updates = nn.adam_updates(Disc_params, self.loss_disc, lr=self.lr, mom1=0.5) # collect parameters #Disc_params = LL.get_all_params(self.D_layers[-1], trainable=True) Disc_params = LL.get_all_params(self.D_layers, trainable=True) #Disc_param_updates = nn.adam_updates(Disc_params, loss_disc_x, lr=lr, mom1=0.5) # loss for discriminator = supervised_loss + unsupervised loss Disc_param_updates = nn.adam_conditional_updates(Disc_params, self.loss_disc, mincost=threshold, lr=self.disc_lr, mom1=0.5) # if loss_disc_x < mincost, don't update the discriminator Disc_param_avg = [th.shared(np.cast[th.config.floatX](0.*p.get_value())) for p in Disc_params] # initialized with 0 Disc_avg_updates = [(a,a+0.0001*(p-a)) for p,a in zip(Disc_params,Disc_param_avg)] # online update of historical parameters #Disc_avg_givens = [(p,a) for p,a in zip(Disc_params,Disc_param_avg)] Disc_bn_updates = [u for l in LL.get_all_layers(self.D_layers[-1]) for u in getattr(l,'bn_updates',[])] Disc_bn_params = [] for l in LL.get_all_layers(self.D_layers[-1]): if hasattr(l, 'avg_batch_mean'): Disc_bn_params.append(l.avg_batch_mean) Disc_bn_params.append(l.avg_batch_var) """ ''' collect parameter updates for generators ''' Gen_params = LL.get_all_params(self.G_weights_layer, trainable=True) Gen_params_updates = [] Gen_bn_updates = [] Gen_bn_params = [] for i in range(self.args.ng): Gen_params.extend(LL.get_all_params(self.G_layers[i][-1], trainable=True)) Gen_bn_updates.extend([u for l in LL.get_all_layers(self.G_layers[i][-1]) for u in getattr(l,'bn_updates',[])]) for l in LL.get_all_layers(self.G_layers[i][-1]): if hasattr(l, 'avg_batch_mean'): Gen_bn_params.append(l.avg_batch_mean) Gen_bn_params.append(l.avg_batch_var) Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.gen_lr, mom1=0.5) """ #print(Gen_params) #train_batch_gen = th.function(inputs=[self.x, self.meanx, self.z, self.y_1hot, self.lr], outputs=[self.loss_gen], on_unused_input='warn') #theano.printing.debugprint(train_batch_gen) Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.lr, mom1=0.5) Gen_params = LL.get_all_params(self.G_layers[-1], trainable=True) Gen_param_updates = nn.adam_updates(Gen_params, self.loss_gen, lr=self.gen_lr, mom1=0.5) Gen_bn_updates = [u for l in LL.get_all_layers(self.G_layers[-1]) for u in getattr(l,'bn_updates',[])] Gen_bn_params = [] for l in LL.get_all_layers(self.G_layers[-1]): if hasattr(l, 'avg_batch_mean'): Gen_bn_params.append(l.avg_batch_mean) Gen_bn_params.append(l.avg_batch_var) """ ''' define training and testing functions ''' #train_batch_disc = th.function(inputs=[x, meanx, y, lr], outputs=[loss_disc0_class, loss_disc0_adv, gen_x, x], # updates=disc0_param_updates+disc0_bn_updates) #th.printing.debugprint(self.loss_disc) train_batch_disc = th.function(inputs=[self.dummy_input, self.meanx, self.x, self.y, self.y_1hot, self.mincost, self.disc_lr], outputs=[self.loss_disc0_class, self.loss_disc0_adv], updates=Disc_param_updates+Disc_bn_updates+Disc_avg_updates) #th.printing.pydotprint(train_batch_disc, outfile="logreg_pydotprint_prediction.png", var_with_name_simple=True) #train_batch_gen = th.function(inputs=[x, meanx, y_1hot, lr], outputs=[loss_gen0_adv, loss_gen0_cond, loss_gen0_ent], # updates=gen0_param_updates+gen0_bn_updates) #train_batch_gen = th.function(inputs=gen_inputs, outputs=gen_outputs, updates=gen0_param_updates+gen0_bn_updates) #train_batch_gen = th.function(inputs=[self.dummy_input, self.x, self.meanx, self.z, self.y_1hot, self.lr], outputs=[self.loss_gen0_adv, self.loss_gen0_cond, self.loss_gen0_ent], updates=Gen_param_updates+Gen_bn_updates) train_batch_gen = th.function(inputs=[self.dummy_input, self.meanx, self.y, self.y_1hot, self.gen_lr], outputs=[self.loss_gen0_adv, self.loss_gen0_cond, self.loss_gen0_ent], updates=Gen_param_updates+Gen_bn_updates) # samplefun = th.function(inputs=[meanx, y_1hot], outputs=gen_x_joint) # sample function: generating images by stacking all generators reconfun = th.function(inputs=[self.meanx, self.y_1hot], outputs=self.Gen_x) # reconstruction function: use the bottom generator # to generate images conditioned on real fc3 features mix_weights = th.function(inputs=[self.dummy_input], outputs=[self.D_weights, self.Disc_weights_entropy, self.G_weights, self.Gen_weights_entropy]) ''' load data ''' print("Loading data...") meanimg, data = load_cifar_data(self.args.data_dir) trainx = data['X_train'] trainy = data['Y_train'] nr_batches_train = int(trainx.shape[0]/self.args.batch_size) # testx = data['X_test'] # testy = data['Y_test'] # nr_batches_test = int(testx.shape[0]/self.args.batch_size) ''' perform training ''' #logs = {'loss_gen0_adv': [], 'loss_gen0_cond': [], 'loss_gen0_ent': [], 'loss_disc0_class': [], 'var_gen0': [], 'var_real0': []} # training logs logs = {'loss_gen0_adv': [], 'loss_gen0_cond': [], 'loss_gen0_ent': [], 'loss_disc0_class': []} # training logs for epoch in range(self.args.load_epoch+1, self.args.num_epoch): begin = time.time() ''' shuffling ''' inds = rng.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] for t in range(nr_batches_train): #for t in range(1): ''' construct minibatch ''' #batchz = np.random.uniform(size=(self.args.batch_size, self.args.z0dim)).astype(np.float32) batchx = trainx[t*self.args.batch_size:(t+1)*self.args.batch_size] batchy = trainy[t*self.args.batch_size:(t+1)*self.args.batch_size] batchy_1hot = np.zeros((self.args.batch_size, 10), dtype=np.float32) batchy_1hot[np.arange(self.args.batch_size), batchy] = 1 # convert to one-hot label # randomy = np.random.randint(10, size = (self.args.batch_size,)) # randomy_1hot = np.zeros((self.args.batch_size, 10),dtype=np.float32) # randomy_1hot[np.arange(self.args.batch_size), randomy] = 1 ''' train discriminators ''' l_disc0_class, l_disc0_adv = train_batch_disc(0.0, meanimg, batchx, batchy, batchy_1hot, self.args.mincost, self.args.disc_lr) ''' train generators ''' #prob_gen0 = np.exp() if l_disc0_adv > 0.65: n_iter = 1 elif l_disc0_adv > 0.5: n_iter = 3 elif l_disc0_adv > 0.3: n_iter = 5 else: n_iter = 7 for i in range(n_iter): #l_gen0_adv, l_gen0_cond, l_gen0_ent = train_batch_gen(0.0, batchx, meanimg, batchz, batchy_1hot, self.args.gen_lr) l_gen0_adv, l_gen0_cond, l_gen0_ent = train_batch_gen(0.0, meanimg, batchy, batchy_1hot, self.args.gen_lr) d_mix_weights, d_entloss, g_mix_weights, g_entloss = mix_weights(0.0) ''' store log information ''' # logs['loss_gen1_adv'].append(l_gen1_adv) # logs['loss_gen1_cond'].append(l_gen1_cond) # logs['loss_gen1_ent'].append(l_gen1_ent) # logs['loss_disc1_class'].append(l_disc1_class) # logs['var_gen1'].append(np.var(np.array(g1))) # logs['var_real1'].append(np.var(np.array(r1))) logs['loss_gen0_adv'].append(l_gen0_adv) logs['loss_gen0_cond'].append(l_gen0_cond) logs['loss_gen0_ent'].append(l_gen0_ent) logs['loss_disc0_class'].append(l_disc0_class) #logs['var_gen0'].append(np.var(np.array(g0))) #logs['var_real0'].append(np.var(np.array(r0))) print("---Epoch %d, time = %ds" % (epoch, time.time()-begin)) print("D_weights=[%.6f, %.6f, %.6f, %.6f, %.6f] loss = %0.6f" % (d_mix_weights[0,0], d_mix_weights[0,1], d_mix_weights[0,2], d_mix_weights[0,3], d_mix_weights[0,4], d_entloss)) print("G_weights=[%.6f, %.6f, %.6f, %.6f, %.6f] loss = %0.6f" % (g_mix_weights[0,0], g_mix_weights[0,1], g_mix_weights[0,2], g_mix_weights[0,3], g_mix_weights[0,4], g_entloss)) #print("G_weights=[%.6f]" % (g_mix_weights[0,0])) print("loss_disc0_adv = %.4f, loss_gen0_adv = %.4f, loss_gen0_cond = %.4f, loss_gen0_ent = %.4f, loss_disc0_class = %.4f" % (l_disc0_adv, l_gen0_adv, l_gen0_cond, l_gen0_ent, l_disc0_class)) # ''' sample images by stacking all generators''' # imgs = samplefun(meanimg, refy_1hot) # imgs = np.transpose(np.reshape(imgs[:100,], (100, 3, 32, 32)), (0, 2, 3, 1)) # imgs = [imgs[i] for i in range(100)] # rows = [] # for i in range(10): # rows.append(np.concatenate(imgs[i::10], 1)) # imgs = np.concatenate(rows, 0) # scipy.misc.imsave(self.args.out_dir + "/mnist_sample_epoch{}.png".format(epoch), imgs) """ ''' original images in the training set''' orix = np.transpose(np.reshape(batchx[:100,], (100, 3, 32, 32)), (0, 2, 3, 1)) orix = [orix[i] for i in range(100)] rows = [] for i in range(10): rows.append(np.concatenate(orix[i::10], 1)) orix = np.concatenate(rows, 0) scipy.misc.imsave(self.args.out_dir + "/mnist_ori_epoch{}.png".format(epoch), orix) """ if epoch%self.args.save_interval==0: # np.savez(self.args.out_dir + "/disc1_params_epoch{}.npz".format(epoch), *LL.get_all_param_values(disc1_layers[-1])) # np.savez(self.args.out_dir + '/gen1_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(gen1_layers[-1])) #np.savez(self.args.out_dir + "/disc0_params_epoch{}.npz".format(epoch), *LL.get_all_param_values(disc0_layers)) #np.savez(self.args.out_dir + '/gen0_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(gen0_layers)) np.savez(self.args.out_dir + '/Dweights_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(self.D_weights_layer)) np.savez(self.args.out_dir + '/Gweights_params_epoch{}.npz'.format(epoch), *LL.get_all_param_values(self.G_weights_layer)) for i in range(self.args.ng): np.savez(self.args.out_dir + ("/disc%d_params_epoch%d.npz" % (i,epoch)), *LL.get_all_param_values(self.D_layers[i])) np.savez(self.args.out_dir + ("/gen%d_params_epoch%d.npz" % (i,epoch)), *LL.get_all_param_values(self.G_layers[i])) np.save(self.args.out_dir + '/logs.npy',logs) ''' reconstruct images ''' reconx = reconfun(meanimg, batchy_1hot) + meanimg width = np.round(np.sqrt(self.args.batch_size)).astype(int) for i in range(self.args.ng): reconx_i = np.transpose(np.reshape(reconx[i*self.args.batch_size:(i+1)*self.args.batch_size], (self.args.batch_size, 3, 32, 32)), (0, 2, 3, 1)) reconx_i = [reconx_i[j] for j in range(self.args.batch_size)] rows = [] for j in range(width): rows.append(np.concatenate(reconx_i[j::width], 1)) reconx_i = np.concatenate(rows, 0) scipy.misc.imsave(self.args.out_dir + ("/cifar_recon_%d_epoch%d.png"%(i,epoch)), reconx_i)
layers.append(nn.DenseLayer(layers[-1], num_units=10, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [u for l in gen_layers+layers for u in getattr(l,'init_updates',[])] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size),labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_unl = -0.5*T.mean(l_unl) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5*T.mean(T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean(T.neq(T.argmax(output_before_softmax_lab,axis=1),labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True)
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size, train_ex_cnt): trainy = trainy.astype(np.int32) testy = testy.astype(np.int32) trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX) testx = testx.reshape((-1, inp_size)).astype(th.config.floatX) assert train_ex_cnt == trainx.shape[0] # settings parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1) parser.add_argument('--seed_data', type=int, default=1) parser.add_argument('--unlabeled_weight', type=float, default=1.) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) parser.add_argument('--iter_limit', type=int, default=300) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) data_rng = np.random.RandomState(args.seed_data) # npshow(trainx.reshape((-1, 27, 32))[0]) trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) nr_batches_test = int(testx.shape[0] / args.batch_size) # select labeled data inds = data_rng.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] txs = [] tys = [] for _j in range(10): j = _j % lab_cnt txs.append(trainx[trainy == j][:args.count]) tys.append(trainy[trainy == j][:args.count]) txs = np.concatenate(txs, axis=0) tys = np.concatenate(tys, axis=0) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.l2normalize( LL.DenseLayer(gen_layers[-1], num_units=inp_size, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, inp_size))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append( nn.DenseLayer(layers[-1], num_units=lab_cnt, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [ u for l in gen_layers + layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus( nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean( T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels)) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight * loss_unl, lr=lr, mom1=0.5) disc_param_avg = [ th.shared(np.cast[th.config.floatX](0. * p.get_value())) for p in disc_params ] disc_avg_updates = [(a, a + 0.0001 * (p - a)) for p, a in zip(disc_params, disc_param_avg)] disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)] gen_params = LL.get_all_params(gen_layers[-1], trainable=True) gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5) init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates) train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr], outputs=[loss_lab, loss_unl, train_err], updates=disc_param_updates + disc_avg_updates) train_batch_gen = th.function(inputs=[x_unl, lr], outputs=[loss_gen], updates=gen_param_updates) test_batch = th.function(inputs=[x_lab, labels], outputs=test_err, givens=disc_avg_givens) init_param(trainx[:500]) # data dependent initialization # //////////// perform training ////////////// lr = 0.003 for epoch in range(args.iter_limit): begin = time.time() # construct randomly permuted minibatches trainx = [] trainy = [] for t in range(trainx_unl.shape[0] / txs.shape[0]): inds = rng.permutation(txs.shape[0]) trainx.append(txs[inds]) trainy.append(tys[inds]) trainx = np.concatenate(trainx, axis=0) trainy = np.concatenate(trainy, axis=0) trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])] # train loss_lab = 0. loss_unl = 0. train_err = 0. for t in range(nr_batches_train): ll, lu, te = train_batch_disc( trainx[t * args.batch_size:(t + 1) * args.batch_size], trainy[t * args.batch_size:(t + 1) * args.batch_size], trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab += ll loss_unl += lu train_err += te e = train_batch_gen( trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train # test test_err = 0. for t in range(nr_batches_test): test_err += test_batch( testx[t * args.batch_size:(t + 1) * args.batch_size], testy[t * args.batch_size:(t + 1) * args.batch_size]) test_err /= nr_batches_test # report print( "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f" % (epoch, time.time() - begin, loss_lab, loss_unl, train_err, test_err)) sys.stdout.flush()
temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [ u for l in gen_layers + layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) log_fx = output_before_softmax_lab - nn.log_sum_exp( output_before_softmax_lab).dimshuffle(0, 'x') loss_entropy = -0.1 * T.mean(T.sum(T.exp(log_fx) * log_fx, axis=1)) l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_entropy +