示例#1
0
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size,
                           train_ex_cnt):
    trainy = trainy.astype(np.int32)
    testy = testy.astype(np.int32)
    trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX)
    testx = testx.reshape((-1, inp_size)).astype(th.config.floatX)
    assert train_ex_cnt == trainx.shape[0]

    # settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--seed_data', type=int, default=1)
    parser.add_argument('--unlabeled_weight', type=float, default=1.)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--count', type=int, default=10)
    parser.add_argument('--iter_limit', type=int, default=300)
    args = parser.parse_args()
    print(args)

    # fixed random seeds
    rng = np.random.RandomState(args.seed)
    theano_rng = MRG_RandomStreams(rng.randint(2**15))
    lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15)))
    data_rng = np.random.RandomState(args.seed_data)

    # npshow(trainx.reshape((-1, 27, 32))[0])

    trainx_unl = trainx.copy()
    trainx_unl2 = trainx.copy()
    nr_batches_train = int(trainx.shape[0] / args.batch_size)
    nr_batches_test = int(testx.shape[0] / args.batch_size)

    # select labeled data
    inds = data_rng.permutation(trainx.shape[0])
    trainx = trainx[inds]
    trainy = trainy[inds]
    txs = []
    tys = []
    for _j in range(10):
        j = _j % lab_cnt
        txs.append(trainx[trainy == j][:args.count])
        tys.append(trainy[trainy == j][:args.count])
    txs = np.concatenate(txs, axis=0)
    tys = np.concatenate(tys, axis=0)

    # specify generative model
    noise = theano_rng.uniform(size=(args.batch_size, 100))
    gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)]
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.batch_norm(LL.DenseLayer(gen_layers[-1],
                                    num_units=500,
                                    nonlinearity=T.nnet.softplus),
                      g=None))
    gen_layers.append(
        nn.l2normalize(
            LL.DenseLayer(gen_layers[-1],
                          num_units=inp_size,
                          nonlinearity=T.nnet.sigmoid)))
    gen_dat = LL.get_output(gen_layers[-1], deterministic=False)

    # specify supervised model
    layers = [LL.InputLayer(shape=(None, inp_size))]
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
    layers.append(nn.DenseLayer(layers[-1], num_units=1000))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=500))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(nn.DenseLayer(layers[-1], num_units=250))
    layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
    layers.append(
        nn.DenseLayer(layers[-1],
                      num_units=lab_cnt,
                      nonlinearity=None,
                      train_scale=True))

    # costs
    labels = T.ivector()
    x_lab = T.matrix()
    x_unl = T.matrix()

    temp = LL.get_output(gen_layers[-1], init=True)
    temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True)
    init_updates = [
        u for l in gen_layers + layers for u in getattr(l, 'init_updates', [])
    ]

    output_before_softmax_lab = LL.get_output(layers[-1],
                                              x_lab,
                                              deterministic=False)
    output_before_softmax_unl = LL.get_output(layers[-1],
                                              x_unl,
                                              deterministic=False)
    output_before_softmax_fake = LL.get_output(layers[-1],
                                               gen_dat,
                                               deterministic=False)

    z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab))
    z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl))
    z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake))
    l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
    l_unl = nn.log_sum_exp(output_before_softmax_unl)
    loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab)
    loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
        T.nnet.softplus(
            nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean(
                T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake)))

    train_err = T.mean(
        T.neq(T.argmax(output_before_softmax_lab, axis=1), labels))

    mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0)
    mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0)
    loss_gen = T.mean(T.square(mom_gen - mom_real))

    # test error
    output_before_softmax = LL.get_output(layers[-1],
                                          x_lab,
                                          deterministic=True)
    test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels))

    # Theano functions for training and testing
    lr = T.scalar()
    disc_params = LL.get_all_params(layers, trainable=True)
    disc_param_updates = nn.adam_updates(disc_params,
                                         loss_lab +
                                         args.unlabeled_weight * loss_unl,
                                         lr=lr,
                                         mom1=0.5)
    disc_param_avg = [
        th.shared(np.cast[th.config.floatX](0. * p.get_value()))
        for p in disc_params
    ]
    disc_avg_updates = [(a, a + 0.0001 * (p - a))
                        for p, a in zip(disc_params, disc_param_avg)]
    disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)]
    gen_params = LL.get_all_params(gen_layers[-1], trainable=True)
    gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5)
    init_param = th.function(inputs=[x_lab],
                             outputs=None,
                             updates=init_updates)
    train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr],
                                   outputs=[loss_lab, loss_unl, train_err],
                                   updates=disc_param_updates +
                                   disc_avg_updates)
    train_batch_gen = th.function(inputs=[x_unl, lr],
                                  outputs=[loss_gen],
                                  updates=gen_param_updates)
    test_batch = th.function(inputs=[x_lab, labels],
                             outputs=test_err,
                             givens=disc_avg_givens)

    init_param(trainx[:500])  # data dependent initialization

    # //////////// perform training //////////////
    lr = 0.003
    for epoch in range(args.iter_limit):
        begin = time.time()

        # construct randomly permuted minibatches
        trainx = []
        trainy = []
        for t in range(trainx_unl.shape[0] / txs.shape[0]):
            inds = rng.permutation(txs.shape[0])
            trainx.append(txs[inds])
            trainy.append(tys[inds])
        trainx = np.concatenate(trainx, axis=0)
        trainy = np.concatenate(trainy, axis=0)
        trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])]
        trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])]

        # train
        loss_lab = 0.
        loss_unl = 0.
        train_err = 0.
        for t in range(nr_batches_train):
            ll, lu, te = train_batch_disc(
                trainx[t * args.batch_size:(t + 1) * args.batch_size],
                trainy[t * args.batch_size:(t + 1) * args.batch_size],
                trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr)
            loss_lab += ll
            loss_unl += lu
            train_err += te
            e = train_batch_gen(
                trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr)
        loss_lab /= nr_batches_train
        loss_unl /= nr_batches_train
        train_err /= nr_batches_train

        # test
        test_err = 0.
        for t in range(nr_batches_test):
            test_err += test_batch(
                testx[t * args.batch_size:(t + 1) * args.batch_size],
                testy[t * args.batch_size:(t + 1) * args.batch_size])
        test_err /= nr_batches_test

        # report
        print(
            "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f"
            % (epoch, time.time() - begin, loss_lab, loss_unl, train_err,
               test_err))
        sys.stdout.flush()
示例#2
0
gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500,
                                              nonlinearity=ln.softplus, name='gen-2'), name='gen-3'))
gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4'))

gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500,
                                              nonlinearity=ln.softplus, name='gen-5'), name='gen-6'))
gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7'))
gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28 ** 2,
                                               nonlinearity=gen_final_non, name='gen-8')))

dis_in_x = ll.InputLayer(shape=(None, 28 ** 2))
dis_in_y = ll.InputLayer(shape=(None,))
dis_layers = [dis_in_x]

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D_data, name='dis-1'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-2'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=1000, name='dis-3'))

dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-4'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-5'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-6'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=500, name='dis-7'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-8'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-9'))
dis_layers.append(nn.DenseLayer(dis_layers[-1], num_units=250, name='dis-10'))

dis_layers.append(nn.GaussianNoiseLayer(dis_layers[-1], sigma=noise_D, name='dis-11'))
dis_layers.append(MLPConcatLayer([dis_layers[-1], dis_in_y], num_classes, name='dis-12'))
示例#3
0
batch_size = 100
learning_rate = 0.0003
seed = 1
n_epochs = 200

save_model_as = 'triplet_extractor.npz'
#setting = [4048, 4048, 1024]
#setting = [2048, 1048, 100]
setting = [4048, 4048, 2048]
''' '' if we use loss from https://arxiv.org/abs/1704.02227
'L2' if we use loss max(d_+ - d_- + \lambda, 0), where \lambda=10.0'''
l_type = 'L2'

layers = [LL.InputLayer(shape=(None, 2048))]
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[0]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[1]))
layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
layers.append(nn.DenseLayer(layers[-1], num_units=setting[2]))

trainx = get_data('cifar_train_x.npz')
_, trainy = load(DATA_DIR, subset='train')

print(trainx.shape)

x_lab = T.matrix()
output_lab = LL.get_output(layers[-1], x_lab, deterministic=False)

示例#4
0
gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half',
                 W=Normal(0.02),  nonlinearity=nn.relu))) # deconv
gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid',
                 W=Normal(0.02),  nonlinearity=T.nnet.sigmoid)) # deconv

# gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False)
# gen_x = gen_x_pre - meanx
gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=True) - meanx

weights_toload = np.load('logs/gan0/gen0_params_epoch190.npz')
weights_list_toload = [weights_toload['arr_{}'.format(k)] for k in range(len(weights_toload.files))]
LL.set_all_param_values(gen0_layers, weights_list_toload)

''' specify discriminator D1 '''
disc1_layers = [LL.InputLayer(shape=(None, 256))]
disc1_layers.append(nn.GaussianNoiseLayer(disc1_layers[-1], sigma=0.2))
disc1_layers.append(LL.DenseLayer(disc1_layers[-1], num_units=512, nonlinearity=nn.lrelu, W=Normal(0.02)))
disc1_layers.append(nn.batch_norm(LL.DenseLayer(disc1_layers[-1], num_units=512, nonlinearity=nn.lrelu, W=Normal(0.02))))
disc1_layer_shared = disc1_layers[-1]

disc1_layer_z_recon = LL.DenseLayer(disc1_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None)
disc1_layers.append(disc1_layer_z_recon)

disc1_layer_adv = LL.DenseLayer(disc1_layer_shared, num_units=10, W=Normal(0.02), nonlinearity=None)
disc1_layers.append(disc1_layer_adv)

''' specify discriminator D0 '''
# disc0_layers = [LL.InputLayer(shape=(args.batch_size, 3, 32, 32))]
# disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05))
# disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu))
# disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16