def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size, train_ex_cnt): trainy = trainy.astype(np.int32) testy = testy.astype(np.int32) trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX) testx = testx.reshape((-1, inp_size)).astype(th.config.floatX) assert train_ex_cnt == trainx.shape[0] # settings parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1) parser.add_argument('--seed_data', type=int, default=1) parser.add_argument('--unlabeled_weight', type=float, default=1.) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) parser.add_argument('--iter_limit', type=int, default=300) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) data_rng = np.random.RandomState(args.seed_data) # npshow(trainx.reshape((-1, 27, 32))[0]) trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) nr_batches_test = int(testx.shape[0] / args.batch_size) # select labeled data inds = data_rng.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] txs = [] tys = [] for _j in range(10): j = _j % lab_cnt txs.append(trainx[trainy == j][:args.count]) tys.append(trainy[trainy == j][:args.count]) txs = np.concatenate(txs, axis=0) tys = np.concatenate(tys, axis=0) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.l2normalize( LL.DenseLayer(gen_layers[-1], num_units=inp_size, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, inp_size))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append( nn.DenseLayer(layers[-1], num_units=lab_cnt, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [ u for l in gen_layers + layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus( nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean( T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels)) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight * loss_unl, lr=lr, mom1=0.5) disc_param_avg = [ th.shared(np.cast[th.config.floatX](0. * p.get_value())) for p in disc_params ] disc_avg_updates = [(a, a + 0.0001 * (p - a)) for p, a in zip(disc_params, disc_param_avg)] disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)] gen_params = LL.get_all_params(gen_layers[-1], trainable=True) gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5) init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates) train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr], outputs=[loss_lab, loss_unl, train_err], updates=disc_param_updates + disc_avg_updates) train_batch_gen = th.function(inputs=[x_unl, lr], outputs=[loss_gen], updates=gen_param_updates) test_batch = th.function(inputs=[x_lab, labels], outputs=test_err, givens=disc_avg_givens) init_param(trainx[:500]) # data dependent initialization # //////////// perform training ////////////// lr = 0.003 for epoch in range(args.iter_limit): begin = time.time() # construct randomly permuted minibatches trainx = [] trainy = [] for t in range(trainx_unl.shape[0] / txs.shape[0]): inds = rng.permutation(txs.shape[0]) trainx.append(txs[inds]) trainy.append(tys[inds]) trainx = np.concatenate(trainx, axis=0) trainy = np.concatenate(trainy, axis=0) trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])] # train loss_lab = 0. loss_unl = 0. train_err = 0. for t in range(nr_batches_train): ll, lu, te = train_batch_disc( trainx[t * args.batch_size:(t + 1) * args.batch_size], trainy[t * args.batch_size:(t + 1) * args.batch_size], trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab += ll loss_unl += lu train_err += te e = train_batch_gen( trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train # test test_err = 0. for t in range(nr_batches_test): test_err += test_batch( testx[t * args.batch_size:(t + 1) * args.batch_size], testy[t * args.batch_size:(t + 1) * args.batch_size]) test_err /= nr_batches_test # report print( "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f" % (epoch, time.time() - begin, loss_lab, loss_unl, train_err, test_err)) sys.stdout.flush()
gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-2'), name='gen-3')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-5'), name='gen-6')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7')) gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28 ** 2, nonlinearity=gen_final_non, name='gen-8'))) dis_in_x = ll.InputLayer(shape=(None, 28 ** 2)) dis_in_y = ll.InputLayer(shape=(None,)) dis_layers = [dis_in_x] dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D_data, name='dis-1')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-2')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=1000, name='dis-3')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-4')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-5')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-6')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=500, name='dis-7')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-8')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-9')) dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-10')) dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-11')) dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-12'))
batch_size = 100 learning_rate = 0.0003 seed = 1 n_epochs = 200 save_model_as = 'triplet_extractor.npz' #setting = [4048, 4048, 1024] #setting = [2048, 1048, 100] setting = [4048, 4048, 2048] ''' '' if we use loss from https://arxiv.org/abs/1704.02227 'L2' if we use loss max(d_+ - d_- + \lambda, 0), where \lambda=10.0''' l_type = 'L2' layers = [LL.InputLayer(shape=(None, 2048))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[0])) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[1])) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=setting[2])) trainx = get_data('cifar_train_x.npz') _, trainy = load(DATA_DIR, subset='train') print(trainx.shape) x_lab = T.matrix() output_lab = LL.get_output(layers[-1], x_lab, deterministic=False)
gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid)) # deconv # gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False) # gen_x = gen_x_pre - meanx gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=True) - meanx weights_toload = np.load('logs/gan0/gen0_params_epoch190.npz') weights_list_toload = [weights_toload['arr_{}'.format(k)] for k in range(len(weights_toload.files))] LL.set_all_param_values(gen0_layers, weights_list_toload) ''' specify discriminator D1 ''' disc1_layers = [LL.InputLayer(shape=(None, 256))] disc1_layers.append(nn.GaussianNoiseLayer(disc1_layers[-1], sigma=0.2)) disc1_layers.append(LL.DenseLayer(disc1_layers[-1], num_units=512, nonlinearity=nn.lrelu, W=Normal(0.02))) disc1_layers.append(nn.batch_norm(LL.DenseLayer(disc1_layers[-1], num_units=512, nonlinearity=nn.lrelu, W=Normal(0.02)))) disc1_layer_shared = disc1_layers[-1] disc1_layer_z_recon = LL.DenseLayer(disc1_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None) disc1_layers.append(disc1_layer_z_recon) disc1_layer_adv = LL.DenseLayer(disc1_layer_shared, num_units=10, W=Normal(0.02), nonlinearity=None) disc1_layers.append(disc1_layer_adv) ''' specify discriminator D0 ''' # disc0_layers = [LL.InputLayer(shape=(args.batch_size, 3, 32, 32))] # disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05)) # disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu)) # disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16