def get_generator(batch_size, theano_rng, noise_length=100): noise_dim = (batch_size, noise_length) noise = theano_rng.uniform(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (batch_size, 512, 4, 4))) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (batch_size, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (batch_size, 128, 16, 16), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append( nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (batch_size, 3, 32, 32), (5, 5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 return gen_layers
def get_generator(self, meanx, z0, y_1hot): ''' specify generator G0, gen_x = G0(z0, h1) ''' """ #z0 = theano_rng.uniform(size=(self.args.batch_size, 16)) # uniform noise gen0_layers = [LL.InputLayer(shape=(self.args.batch_size, 50), input_var=z0)] # Input layer for z0 gen0_layers.append(nn.batch_norm(LL.DenseLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)), num_units=128, W=Normal(0.02), nonlinearity=nn.relu))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] #gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 256), input_var=real_fc3)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layers.append(LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot)) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append(LL.ConcatLayer([gen0_layer_fc3,gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append(LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[-1], num_units=256*5*5, W=Normal(0.02), nonlinearity=T.nnet.relu)), (self.args.batch_size,256,5,5))) # fc gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append(nn.Deconv2DLayer(gen0_layers[-1], (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid)) # deconv gen_x_pre = LL.get_output(gen0_layers[-1], deterministic=False) gen_x = gen_x_pre - meanx # gen_x_joint = LL.get_output(gen0_layers[-1], {gen0_layer_fc3: gen_fc3}, deterministic=False) - meanx return gen0_layers, gen_x """ gen_x_layer_z = LL.InputLayer(shape=(self.args.batch_size, self.args.z0dim), input_var=z0) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer(shape=(self.args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (self.args.batch_size,256,5,5)) gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (self.args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (self.args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (self.args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (self.args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid) # gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1, # W=Normal(0.02), nonlinearity=T.nnet.sigmoid) gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1, gen_x_layer_dconv2_2, gen_x_layer_dconv1_1, gen_x_layer_x] gen_x_pre = LL.get_output(gen_x_layer_x, deterministic=False) gen_x = gen_x_pre - meanx return gen_x_layers, gen_x
def _sample_trained_minibatch_gan(params_file, n, batch_size, rs): import lasagne from lasagne.init import Normal import lasagne.layers as ll import theano as th from theano.sandbox.rng_mrg import MRG_RandomStreams import theano.tensor as T import nn theano_rng = MRG_RandomStreams(rs.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rs.randint(2**15))) noise_dim = (batch_size, 100) noise = theano_rng.uniform(size=noise_dim) ls = [ll.InputLayer(shape=noise_dim, input_var=noise)] ls.append( nn.batch_norm(ll.DenseLayer(ls[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) ls.append(ll.ReshapeLayer(ls[-1], (batch_size, 512, 4, 4))) ls.append( nn.batch_norm(nn.Deconv2DLayer(ls[-1], (batch_size, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 ls.append( nn.batch_norm(nn.Deconv2DLayer(ls[-1], (batch_size, 128, 16, 16), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 ls.append( nn.weight_norm(nn.Deconv2DLayer(ls[-1], (batch_size, 3, 32, 32), (5, 5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(ls[-1]) with np.load(params_file) as d: params = [d['arr_{}'.format(i)] for i in range(9)] ll.set_all_param_values(ls[-1], params, trainable=True) sample_batch = th.function(inputs=[], outputs=gen_dat) samps = [] while len(samps) < n: samps.extend(sample_batch()) samps = np.array(samps[:n]) return samps
def get_discriminator(self): ''' specify discriminator D0 ''' """ disc0_layers = [LL.InputLayer(shape=(self.args.batch_size, 3, 32, 32))] disc0_layers.append(LL.GaussianNoiseLayer(disc0_layers[-1], sigma=0.05)) disc0_layers.append(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 16x16 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, W=Normal(0.02), nonlinearity=nn.lrelu))) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.02), nonlinearity=nn.lrelu))) # 8x8 disc0_layers.append(LL.DropoutLayer(disc0_layers[-1], p=0.1)) disc0_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc0_layers[-1], 192, (3,3), pad=0, W=Normal(0.02), nonlinearity=nn.lrelu))) # 6x6 disc0_layer_shared = LL.NINLayer(disc0_layers[-1], num_units=192, W=Normal(0.02), nonlinearity=nn.lrelu) # 6x6 disc0_layers.append(disc0_layer_shared) disc0_layer_z_recon = LL.DenseLayer(disc0_layer_shared, num_units=50, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_z_recon) # also need to recover z from x disc0_layers.append(LL.GlobalPoolLayer(disc0_layer_shared)) disc0_layer_adv = LL.DenseLayer(disc0_layers[-1], num_units=10, W=Normal(0.02), nonlinearity=None) disc0_layers.append(disc0_layer_adv) return disc0_layers, disc0_layer_adv, disc0_layer_z_recon """ disc_x_layers = [LL.InputLayer(shape=(None, 3, 32, 32))] disc_x_layers.append(LL.GaussianNoiseLayer(disc_x_layers[-1], sigma=0.2)) disc_x_layers.append(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers.append(LL.DropoutLayer(disc_x_layers[-1], p=0.5)) disc_x_layers.append(nn.batch_norm(dnn.Conv2DDNNLayer(disc_x_layers[-1], 192, (3,3), pad=0, W=Normal(0.01), nonlinearity=nn.lrelu))) disc_x_layers_shared = LL.NINLayer(disc_x_layers[-1], num_units=192, W=Normal(0.01), nonlinearity=nn.lrelu) disc_x_layers.append(disc_x_layers_shared) disc_x_layer_z_recon = LL.DenseLayer(disc_x_layers_shared, num_units=self.args.z0dim, nonlinearity=None) disc_x_layers.append(disc_x_layer_z_recon) # also need to recover z from x # disc_x_layers.append(nn.MinibatchLayer(disc_x_layers_shared, num_kernels=100)) disc_x_layers.append(LL.GlobalPoolLayer(disc_x_layers_shared)) disc_x_layer_adv = LL.DenseLayer(disc_x_layers[-1], num_units=10, W=Normal(0.01), nonlinearity=None) disc_x_layers.append(disc_x_layer_adv) #output_before_softmax_x = LL.get_output(disc_x_layer_adv, x, deterministic=False) #output_before_softmax_gen = LL.get_output(disc_x_layer_adv, gen_x, deterministic=False) # temp = LL.get_output(gen_x_layers[-1], deterministic=False, init=True) # temp = LL.get_output(disc_x_layers[-1], x, deterministic=False, init=True) # init_updates = [u for l in LL.get_all_layers(gen_x_layers)+LL.get_all_layers(disc_x_layers) for u in getattr(l,'init_updates',[])] return disc_x_layers, disc_x_layer_adv, disc_x_layer_z_recon
def _linear(self, x, h, bias_default=0.0): I, D = x.get_shape().as_list()[1], self._num_units w = weight('W', [I, D]) u = weight('U', [D, D]) b = bias('b', D, bias_default) if self.batch_norm: with tf.variable_scope('Linear1'): x_w = batch_norm(tf.matmul(x, w), is_training=self.is_training) with tf.variable_scope('Linear2'): h_u = batch_norm(tf.matmul(h, u), is_training=self.is_training) return x_w + h_u + b else: return tf.matmul(x, w) + tf.matmul(h, u) + b
def build_cnn(self): first_filter_size = int(self.config["sampling_rate"] / 2.0) first_filter_stride = int(self.config["sampling_rate"] / 16.0) with tf.variable_scope("cnn") as scope: net = nn.conv1d("conv1d_1", self.signals, 128, first_filter_size, first_filter_stride) net = nn.batch_norm("bn_1", net, self.is_training) net = tf.nn.relu(net, name="relu_1") net = nn.max_pool1d("maxpool1d_1", net, 8, 8) net = tf.layers.dropout(net, rate=0.5, training=self.is_training, name="drop_1") net = nn.conv1d("conv1d_2_1", net, 128, 8, 1) net = nn.batch_norm("bn_2_1", net, self.is_training) net = tf.nn.relu(net, name="relu_2_1") net = nn.conv1d("conv1d_2_2", net, 128, 8, 1) net = nn.batch_norm("bn_2_2", net, self.is_training) net = tf.nn.relu(net, name="relu_2_2") net = nn.conv1d("conv1d_2_3", net, 128, 8, 1) net = nn.batch_norm("bn_2_3", net, self.is_training) net = tf.nn.relu(net, name="relu_2_3") net = nn.max_pool1d("maxpool1d_2", net, 4, 4) net = tf.layers.flatten(net, name="flatten_2") net = tf.layers.dropout(net, rate=0.5, training=self.is_training, name="drop_2") return net
y_1hot = T.matrix() x = T.tensor4() y = T.ivector() meanx = T.tensor3() lr = T.scalar() # learning rate real_fc3 = LL.get_output(enc_layer_fc3, x, deterministic=True) ''' specify generator G0, gen_x = G0(z0, h1) ''' z0 = theano_rng.uniform(size=(args.batch_size, 16)) # uniform noise gen0_layers = [LL.InputLayer(shape=(args.batch_size, 16), input_var=z0)] # Input layer for z0 gen0_layers.append( nn.batch_norm( LL.DenseLayer(nn.batch_norm( LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)), num_units=128, W=Normal(0.02), nonlinearity=nn.relu))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] gen0_layers.append( LL.InputLayer(shape=(args.batch_size, 256), input_var=real_fc3) ) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append( LL.ConcatLayer([gen0_layer_fc3, gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append(
print(args) rng = np.random.RandomState(args.seed) # fixed random seeds theano_rng = MRG_RandomStreams(rng.randint(2 ** 19)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 9))) data_rng = np.random.RandomState(args.seed_data) ''' input tensor variables ''' y_1hot = T.matrix() x = T.tensor4() meanx = T.tensor3() ''' specify generator G1, gen_fc3 = G0(z1, y) ''' z1 = theano_rng.uniform(size=(args.batch_size, 50)) gen1_layers = [nn.batch_norm(LL.DenseLayer(LL.InputLayer(shape=(args.batch_size, 50), input_var=z1), num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu))] # Input layer for z1 gen1_layer_z = gen1_layers[-1] gen1_layers.append(nn.batch_norm(LL.DenseLayer(LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot), num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu))) # Input layer for labels gen1_layer_y = gen1_layers[-1] gen1_layers.append(LL.ConcatLayer([gen1_layer_z,gen1_layer_y],axis=1)) gen1_layers.append(nn.batch_norm(LL.DenseLayer(gen1_layers[-1], num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu))) gen1_layers.append(nn.batch_norm(LL.DenseLayer(gen1_layers[-1], num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu))) gen1_layers.append(LL.DenseLayer(gen1_layers[-1], num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu)) ''' specify generator G0, gen_x = G0(z0, h1) ''' z0 = theano_rng.uniform(size=(args.batch_size, 16)) # uniform noise gen0_layers = [LL.InputLayer(shape=(args.batch_size, 16), input_var=z0)] # Input layer for z0 gen0_layers.append(nn.batch_norm(LL.DenseLayer(nn.batch_norm(LL.DenseLayer(gen0_layers[0], num_units=128, W=Normal(0.02), nonlinearity=nn.relu)),
nr_batches_train = int(trainx.shape[0]/args.batch_size) #50000.0/100 = 500 nr_batches_test = int(testx.shape[0]/args.batch_size) #10000.0/100 =100 # specify generative model noise_dim = (args.batch_size, 50) noise = theano_rng.uniform(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size,512,4,4))) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(gen_layers[-1]) disc_layers = [ll.InputLayer(shape=(None, 3, 32, 32))] disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.2)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 128, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 128, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 128, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 256, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 256, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 256, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu)))
test_data = unpickle('/home/ubuntu/data/cifar-10-python/cifar-10-batches-py/test_batch') testx = test_data['x'] testy = test_data['y'] nr_batches_train = int(trainx.shape[0]/args.batch_size) nr_batches_test = int(testx.shape[0]/args.batch_size) # whitening whitener = nn.ZCA(x=trainx) trainx_white = whitener.apply(trainx) testx_white = whitener.apply(testx) # specify model if args.norm_type=='weight_norm': normalizer = lambda l: nn.weight_norm(l) elif args.norm_type=='batch_norm': normalizer = lambda l: nn.batch_norm(l) elif args.norm_type=='mean_only_bn': normalizer = lambda l: nn.mean_only_bn(l) elif args.norm_type=='no_norm': normalizer = lambda l: nn.no_norm(l) else: raise NotImplementedError('incorrect norm type') layers = [ll.InputLayer(shape=(None, 3, 32, 32))] layers.append(ll.GaussianNoiseLayer(layers[-1], sigma=0.15)) layers.append(normalizer(dnn.Conv2DDNNLayer(layers[-1], 96, (3,3), pad=1, nonlinearity=nn.lrelu))) layers.append(normalizer(dnn.Conv2DDNNLayer(layers[-1], 96, (3,3), pad=1, nonlinearity=nn.lrelu))) layers.append(normalizer(dnn.Conv2DDNNLayer(layers[-1], 96, (3,3), pad=1, nonlinearity=nn.lrelu))) layers.append(ll.MaxPool2DLayer(layers[-1], 2)) layers.append(ll.DropoutLayer(layers[-1], p=0.5)) layers.append(normalizer(dnn.Conv2DDNNLayer(layers[-1], 192, (3,3), pad=1, nonlinearity=nn.lrelu)))
def build_generator(self, version=1, encode=False): #from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer global mask if mask is None: mask = T.zeros(shape=(self.batch_size, 1, 64, 64), dtype=theano.config.floatX) mask = T.set_subtensor(mask[:, :, 16:48, 16:48], 1.) self.mask = mask noise_dim = (self.batch_size, 100) theano_rng = MRG_RandomStreams(rng.randint(2**15)) noise = theano_rng.uniform(size=noise_dim) input = ll.InputLayer(shape=noise_dim, input_var=noise) cropped_image = T.cast(T.zeros_like(self.input_) * mask + (1. - mask) * self.input_, dtype=theano.config.floatX) encoder_input = T.concatenate([cropped_image, mask], axis=1) if version == 1: if encode: gen_layers = [ ll.InputLayer(shape=(self.batch_size, 4, 64, 64), input_var=encoder_input) ] # 3 x 64 x 64 --> 64 x 32 x 32 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 64, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 64 x 32 x 32 --> 128 x 16 x 16 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 128, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 128 x 16 x 16 --> 256 x 8 x 8 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 256, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 256 x 8 x 8 --> 512 x 4 x 4 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 512, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 512 x 4 x 4 --> 1024 x 2 x 2 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 4000, 4, 4, pad=1, nonlinearity=nn.lrelu)) ) # 1024 x 2 x 2 --> 2048 x 1 x 1 #gen_layers.append(nn.batch_norm(ll.Conv2DLayer(gen_layers[-1], 2048, 4, 2, pad=1, nonlinearity=nn.lrelu))) # flatten this out #gen_layers.append(ll.FlattenLayer(gen_layers[-1])) gen_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 128 * 4, 4, 4), (5, 5), stride=(4, 4)))) # concat with noise latent_size = 2048 else: gen_layers = [input] latent_size = 100 gen_layers.append( nn.batch_norm( ll.DenseLayer(gen_layers[-1], 128 * 8 * 4 * 4, W=Normal(0.02)))) gen_layers.append( ll.ReshapeLayer(gen_layers[-1], (self.batch_size, 128 * 8, 4, 4))) # creating array of mixing coefficients (shared Theano floats) that will be used for mixing generated_output and image at each layer mixing_coefs = [ theano.shared(lasagne.utils.floatX(0.25)) for i in range(3) ] mixing_coefs.append(theano.shared(lasagne.utils.floatX(0.9))) border = 2 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer( gen_layers[-1], (self.batch_size, 128 * 2, 8, 8), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 4 -> 8 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) if reset: gen_layers.append( nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[0])) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 128, 16, 16), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 8 -> 16 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) if reset: gen_layers.append( nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1])) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 64, 32, 32), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 16 -> 32 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) if reset: gen_layers.append( nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[2])) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 3, 64, 64), (5, 5), W=Normal(0.02), nonlinearity=T.tanh)) # 32 -> 64 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) if reset: gen_layers.append( nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[3], trainable=False)) for layer in gen_layers: print layer.output_shape print '' GAN.mixing_coefs = mixing_coefs return gen_layers
nr_batches_test = int(testx.shape[0] / args.batch_size) # generator x = T.tensor4() n_dim = args.n_dim sample_dim = args.sample_dim noise_dim = (args.batch_size, n_dim) noise = theano_rng.uniform(size=noise_dim) gen_img_input = ll.InputLayer(shape=(None, 3, 32, 32)) n_batch = gen_img_input.shape[0] gen_noise_input = ll.InputLayer(shape=noise_dim) gen_layers = [ nn.batch_norm(dnn.Conv2DDNNLayer(gen_img_input, 32, (5, 5), stride=(2, 2), pad=2, W=Normal(0.05), nonlinearity=nn.lrelu), g=None) ] # 32 -> 16 gen_layers.append( nn.batch_norm(dnn.Conv2DDNNLayer(gen_layers[-1], 64, (5, 5), stride=(2, 2), pad=2, W=Normal(0.05), nonlinearity=nn.lrelu), g=None)) # 16 -> 8 gen_layers.append( nn.batch_norm(dnn.Conv2DDNNLayer(gen_layers[-1], 128, (5, 5),
sym_z_input = T.matrix() sym_z_rand = theano_rng.uniform(size=(batch_size_g, n_z)) sym_z_shared = T.tile(theano_rng.uniform((batch_size_g / num_classes, n_z)), (num_classes, 1)) '''models''' gen_in_z = ll.InputLayer(shape=(batch_size_g, n_z)) gen_in_y = ll.InputLayer(shape=(batch_size_g, )) gen_layers = [gen_in_z] # gen_layers = [(nn.MoGLayer(gen_in_z, noise_dim=(batch_size_g, n_z)))] gen_layers.append(nn.MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (batch_size_g, 512, 4, 4))) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (batch_size_g, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (batch_size_g, 128, 16, 16), (5, 5), W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append(
trainx_unl2 = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) nr_batches_test = int(np.ceil(float(testx.shape[0]) / args.batch_size)) # input layers noise_dim = (args.batch_size, 100) noise = theano_rng.uniform(size=noise_dim) x_input = ll.InputLayer(shape=(None, 3, 32, 32)) z_input = ll.InputLayer(shape=noise_dim, input_var=noise) # specify generative model gen_layers = [z_input] gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu, name='g1'), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size, 512, 4, 4))) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu, name='g2'), g=None)) # 4 -> 8 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, 128, 16, 16), (5, 5), W=Normal(0.05),
noise_dim = (args.batch_size, 100) Z = th.shared(value=rng.uniform(-1.0, 1.0, noise_dim).astype(np.float32), name='Z', borrow=True) sig = th.shared(value=rng.uniform(0.2, 0.2, noise_dim).astype(np.float32), name='sig', borrow=True) noise = theano_rng.normal(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append( nn.MoGLayer(gen_layers[-1], noise_dim=noise_dim, z=Z, sig=sig) ) # Comment this line when testing/training baseline GAN model gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * gen_dim * 4, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append( ll.ReshapeLayer(gen_layers[-1], (args.batch_size, gen_dim * 4, 4, 4))) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, gen_dim * 2, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, gen_dim, 16, 16), (5, 5), W=Normal(0.05),
# real_fc3 = LL.get_output(enc_layer_fc3, x, deterministic=True) #y_pred, real_pool3 = LL.get_output([fc8, poo5], x, deterministic=False) # real_pool3 = LL.get_output(poo5, x, deterministic=False) #enc_error = T.mean(T.neq(T.argmax(y_pred,axis=1),y)) # classification error of the encoder, to make sure the encoder is working properly # specify generator, gen_x = G(z, real_pool3) z = theano_rng.uniform(size=(args.batch_size, 50)) # uniform noise # y_1hot = T.matrix() gen_x_layer_z = LL.InputLayer(shape=(args.batch_size, 50), input_var=z) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (args.batch_size,256,5,5)) gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid) # gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1, # W=Normal(0.02), nonlinearity=T.nnet.sigmoid) print(gen_x_layer_x.output_shape) gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1,
def main(num, seed, args): import time import numpy as np import theano as th import theano.tensor as T from theano.sandbox.rng_mrg import MRG_RandomStreams import lasagne import lasagne.layers as ll from lasagne.init import Normal from lasagne.layers import dnn import nn import sys from checkpoints import save_weights, load_weights # fixed random seeds rng = np.random.RandomState(seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) #logsoftmax for computing entropy def logsoftmax(x): xdev = x - T.max(x, 1, keepdims=True) lsm = xdev - T.log(T.sum(T.exp(xdev), 1, keepdims=True)) return lsm #load MNIST data data = np.load(args.data_root) trainx = np.concatenate([data['x_train'], data['x_valid']], axis=0).astype(th.config.floatX) trainy = np.concatenate([data['y_train'], data['y_valid']]).astype(np.int32) testx = data['x_test'].astype(th.config.floatX) testy = data['y_test'].astype(np.int32) rng_data = np.random.RandomState(args.seed_data) inds = rng_data.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] trainx_unl = trainx[trainy == num] inds = np.arange(len(testy))[np.random.permutation(len(testy))] testx = testx[inds] testy = testy[inds] print(len(trainx_unl)) # specify generator h = T.matrix() gen_layers = [ll.InputLayer(shape=(None, 100))] gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, W=Normal(0.05), nonlinearity=T.nnet.softplus, name='g1'), g=None, name='g_b1')) gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, W=Normal(0.05), nonlinearity=T.nnet.softplus, name='g2'), g=None, name='g_b2')) gen_layers.append( nn.l2normalize( ll.DenseLayer(gen_layers[-1], num_units=28**2, W=Normal(0.05), nonlinearity=T.nnet.sigmoid, name='g3'))) gen_dat = ll.get_output(gen_layers[-1], h, deterministic=False) # specify random field layers = [ll.InputLayer(shape=(None, 28**2))] layers.append( nn.DenseLayer(layers[-1], num_units=1000, theta=Normal(0.05), name='d_1')) layers.append( nn.DenseLayer(layers[-1], num_units=500, theta=Normal(0.05), name='d_2')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_3')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_4')) layers.append( nn.DenseLayer(layers[-1], num_units=250, theta=Normal(0.05), name='d_5')) layers.append( nn.DenseLayer(layers[-1], num_units=1, theta=Normal(0.05), nonlinearity=None, train_scale=True, name='d_6')) #revision method if args.revison_method == 'revision_x_sgld': #only x will be revised, SGLD x_revised = gen_dat gradient_coefficient = T.scalar() noise_coefficient = T.scalar() for i in range(args.L): loss_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_revision, [x_revised])[0] x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) revision = th.function( inputs=[h, gradient_coefficient, noise_coefficient], outputs=x_revised) elif args.revison_method == 'revision_x_sghmc': #only x will be revised, SGHMC x_revised = gen_dat + args.sig * theano_rng.normal( size=T.shape(gen_dat)) gradient_coefficient = T.scalar() beta = T.scalar() noise_coefficient = T.scalar() v_x = 0. for i in range(args.L): # x_revised=x_revised loss_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_revision, [x_revised])[0] v_x = beta * v_x + gradient_coefficient * gradient_x x_revised = x_revised + v_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) x_revised = T.clip(x_revised, 0., 1.) revision = th.function( inputs=[h, beta, gradient_coefficient, noise_coefficient], outputs=x_revised, on_unused_input='ignore') elif args.revison_method == 'revision_joint_sgld': #x and h will be revised jointly, SGLD x_revised = gen_dat h_revised = h gradient_coefficient = T.scalar() noise_coefficient = T.scalar() for i in range(args.L): loss_x_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_x_revision, [x_revised])[0] x_revised = x_revised + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) if i == 0: loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum( T.square(h)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h])[0] h_revised = h - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) else: loss_h_revision = T.sum( T.square(x_revised - gen_dat_h_revised)) + T.sum( T.square(h_revised)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h_revised])[0] h_revised = h_revised - gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) gen_dat_h_revised = ll.get_output(gen_layers[-1], h_revised, deterministic=False) revision = th.function( inputs=[h, gradient_coefficient, noise_coefficient], outputs=[x_revised, h_revised]) elif args.revison_method == 'revision_joint_sghmc': #x and h will be revised jointly, SGHMC x_revised = gen_dat h_revised = h beta = T.scalar() gradient_coefficient = T.scalar() noise_coefficient = T.scalar() v_x = 0. for i in range(args.L): loss_x_revision = T.sum( ll.get_output(layers[-1], x_revised, deterministic=False)) gradient_x = T.grad(loss_x_revision, [x_revised])[0] v_x = v_x * beta + gradient_coefficient * gradient_x + noise_coefficient * theano_rng.normal( size=T.shape(x_revised)) x_revised = x_revised + v_x if i == 0: loss_h_revision = T.sum(T.square(x_revised - gen_dat)) + T.sum( T.square(h)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h])[0] v_h = gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) h_revised = h - v_h else: loss_h_revision = T.sum( T.square(x_revised - gen_dat_h_revised)) + T.sum( T.square(h_revised)) / args.batch_size gradient_h = T.grad(loss_h_revision, [h_revised])[0] v_h = v_h * beta + gradient_coefficient * gradient_h + noise_coefficient * theano_rng.normal( size=T.shape(h)) h_revised = h_revised - v_h gen_dat_h_revised = ll.get_output(gen_layers[-1], h_revised, deterministic=False) revision = th.function( inputs=[h, beta, gradient_coefficient, noise_coefficient], outputs=[x_revised, h_revised]) x_revised = T.matrix() x_unl = T.matrix() temp = ll.get_output(layers[-1], x_unl, deterministic=False, init=True) init_updates = [u for l in layers for u in getattr(l, 'init_updates', [])] output_before_softmax_unl = ll.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_revised = ll.get_output(layers[-1], x_revised, deterministic=False) u_unl = T.mean(output_before_softmax_unl) u_revised = T.mean(output_before_softmax_revised) #unsupervised loss loss_unl = u_revised - u_unl + T.mean(output_before_softmax_unl** 2) * args.fxp # Theano functions for training the random field lr = T.scalar() RF_params = ll.get_all_params(layers, trainable=True) RF_param_updates = lasagne.updates.rmsprop(loss_unl, RF_params, learning_rate=lr) # RF_param_updates = lasagne.updates.adam(loss_unl, RF_params, learning_rate=lr,beta1=0.5) train_RF = th.function(inputs=[x_revised, x_unl, lr], outputs=[loss_unl, u_unl], updates=RF_param_updates) #weight norm initalization init_param = th.function(inputs=[x_unl], outputs=None, updates=init_updates) #predition on test data output_before_softmax = ll.get_output(layers[-1], x_unl, deterministic=True) test_batch = th.function(inputs=[x_unl], outputs=output_before_softmax) #loss on generator loss_G = T.sum(T.square(x_revised - gen_dat)) # Theano functions for training the generator gen_params = ll.get_all_params(gen_layers, trainable=True) gen_param_updates = lasagne.updates.rmsprop(loss_G, gen_params, learning_rate=lr) # gen_param_updates = lasagne.updates.adam(loss_G, gen_params, learning_rate=lr,beta1=0.5) train_G = th.function(inputs=[h, x_revised, lr], outputs=None, updates=gen_param_updates) # select labeled data # //////////// perform training ////////////// lr_D = args.lrd lr_G = args.lrg beta = args.beta gradient_coefficient = args.gradient_coefficient noise_coefficient = args.noise_coefficient supervised_loss_weight = args.supervised_loss_weight entropy_loss_weight = 0. acc_all = [] best_acc = 0 nr_batches_train = len(trainx_unl) // args.batch_size nr_batches_test = int(np.ceil(len(testy) / float(args.batch_size))) for epoch in range(args.max_e): begin = time.time() # construct randomly permuted minibatches trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] if epoch == 0: init_param(trainx[:500]) # data based initialization if args.load: load_weights('mnist_model/mnist_jrf_' + args.load + '.npy', layers + gen_layers) # train loss_lab = 0. loss_unl = 0. train_err = 0. f_unl_all = 0. for t in range(nr_batches_train): h = np.cast[th.config.floatX](rng.uniform(size=(args.batch_size, 100))) if args.revison_method == 'revision_x_sgld': x_revised = revision(h, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_x_sghmc': x_revised = revision(h, beta, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_joint_sgld': x_revised, h = revision(h, gradient_coefficient, noise_coefficient) elif args.revison_method == 'revision_joint_sghmc': x_revised, h = revision(h, beta, gradient_coefficient, noise_coefficient) ran_from = t * args.batch_size ran_to = (t + 1) * args.batch_size #updata random field lo_unl, f_unl = train_RF(x_revised, trainx_unl[ran_from:ran_to], lr_D) loss_unl += lo_unl f_unl_all += f_unl #updata generator train_G(h, x_revised, lr_G) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train f_unl_all /= nr_batches_train # test test_pred = np.zeros((len(testy), 1), dtype=th.config.floatX) for t in range(nr_batches_test): last_ind = np.minimum((t + 1) * args.batch_size, len(testy)) first_ind = last_ind - args.batch_size test_pred[first_ind:last_ind] = test_batch( testx[first_ind:last_ind]) test_pred = test_pred[:, 0] from sklearn.metrics import roc_auc_score test_err = roc_auc_score(testy == num, test_pred) acc_all.append(test_err) if acc_all[-1] > best_acc: best_acc = acc_all[-1] if (epoch + 1) % 10 == 0: print('best acc:', best_acc, test_err) f_test_all = np.mean(test_pred) print( "epoch %d, time = %ds, loss_unl = %.4f, f unl = %.4f, f test = %.4f " % (epoch + 1, time.time() - begin, loss_unl, f_unl_all, f_test_all)) sys.stdout.flush() if (epoch + 1) % 50 == 0: import os if not os.path.exists('mnist_model'): os.mkdir('mnist_model') params = ll.get_all_params(layers + gen_layers) save_weights( 'mnist_model/nrf_dec_ep%d_num%d_seed%d_%s.npy' % (epoch + 1, num, seed, args.sf), params) if loss_unl < -100: break return best_acc
def build_generator(self, version=1, encode=False): #from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer global mask if mask is None: mask = T.zeros(shape=(self.batch_size, 1, 64, 64), dtype=theano.config.floatX) mask = T.set_subtensor(mask[:, :, 16:48, 16:48], 1.) self.mask = mask noise_dim = (self.batch_size, 100) theano_rng = MRG_RandomStreams(rng.randint(2**15)) noise = theano_rng.uniform(size=noise_dim) # mask_color = T.cast(T.cast(theano_rng.uniform(size=(self.batch_size,), low=0., high=2.), 'int16').dimshuffle(0, 'x', 'x', 'x') * mask, dtype=theano.config.floatX) input = ll.InputLayer(shape=noise_dim, input_var=noise) cropped_image = T.cast(T.zeros_like(self.input_) * mask + (1. - mask) * self.input_, dtype=theano.config.floatX) encoder_input = T.concatenate([cropped_image, mask], axis=1) # shoudl concat wrt channels if version == 1: if encode: gen_layers = [ ll.InputLayer(shape=(self.batch_size, 4, 64, 64), input_var=encoder_input) ] # 3 x 64 x 64 --> 64 x 32 x 32 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 64, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 64 x 32 x 32 --> 128 x 16 x 16 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 128, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 128 x 16 x 16 --> 256 x 8 x 8 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 256, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 256 x 8 x 8 --> 512 x 4 x 4 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 512, 4, 2, pad=1, nonlinearity=nn.lrelu)) ) # 512 x 4 x 4 --> 1024 x 2 x 2 gen_layers.append( nn.batch_norm( ll.Conv2DLayer(gen_layers[-1], 4000, 4, 4, pad=1, nonlinearity=nn.lrelu)) ) # 1024 x 2 x 2 --> 2048 x 1 x 1 #gen_layers.append(nn.batch_norm(ll.Conv2DLayer(gen_layers[-1], 2048, 4, 2, pad=1, nonlinearity=nn.lrelu))) # flatten this out #gen_layers.append(ll.FlattenLayer(gen_layers[-1])) gen_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 128 * 4, 4, 4), (5, 5), stride=(4, 4)))) # concat with noise latent_size = 2048 else: gen_layers = [input] latent_size = 100 # TODO : put batchorm back on all layers, + g=None gen_layers.append( ll.DenseLayer(gen_layers[-1], 128 * 8 * 4 * 4, W=Normal(0.02))) gen_layers.append( ll.ReshapeLayer(gen_layers[-1], (self.batch_size, 128 * 8, 4, 4))) # creating array of mixing coefficients (shared Theano floats) that will be used for mixing generated_output and image at each layer mixing_coefs = [ theano.shared(lasagne.utils.floatX(0.05)) for i in range(2) ] # theano.shared(lasagne.utils.floatX(np.array([0.5]))) for i in range(3)] mixing_coefs.append(theano.shared(lasagne.utils.floatX(1))) border = 2 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer( gen_layers[-1], (self.batch_size, 128 * 2, 8, 8), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 4 -> 8 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[0], border=border)) #layer_a = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[0]) # all new #layer_concat_a = ll.ConcatLayer([layer_a, gen_layers[-1]], axis=1) #gen_layers.append(layer_concat_a) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 128, 16, 16), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 8 -> 16 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1], border=border*2)) #layer_b = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1]) # all new #layer_concat_b = ll.ConcatLayer([layer_b, gen_layers[-1]], axis=1) #gen_layers.append(layer_concat_b) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (self.batch_size, 64, 32, 32), (5, 5), W=Normal(0.02), nonlinearity=nn.relu), g=None)) # 16 -> 32 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[2], border=border*2*2)) #layer_c = nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[1]) # all new #layer_concat_c = ll.ConcatLayer([layer_c, gen_layers[-1]], axis=1) #gen_layers.append(layer_concat_c) gen_layers.append( nn.Deconv2DLayer( gen_layers[-1], (self.batch_size, 3, 64, 64), (5, 5), W=Normal(0.02), nonlinearity=lasagne.nonlinearities.sigmoid)) # 32 -> 64 #gen_layers.append(ll.DropoutLayer(gen_layers[-1],p=0.5)) #gen_layers.append(nn.ResetDeconvLayer(gen_layers[-1], cropped_image, mixing_coefs[3], border=border*2*2*2, trainable=False)) for layer in gen_layers: print layer.output_shape print '' GAN.mixing_coefs = mixing_coefs return gen_layers
parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) data_rng = np.random.RandomState(args.seed_data) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.l2normalize(LL.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, 28**2))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
def build_critic(self, version=1, extra_middle=False, only_middle=False): assert self.generator != None if version == 1: from lasagne.nonlinearities import sigmoid disc_layers = [ ll.InputLayer(shape=(self.batch_size, 3, 64, 64), input_var=self.input_c) ] # b_s x 3 x 64 x 64 --> b_s x 32 x 32 x 32 disc_layers.append( nn.batch_norm( ll.Conv2DLayer(disc_layers[-1], 128, (3, 3), pad=1, stride=2, W=Normal(0.03), nonlinearity=nn.lrelu))) #nn.weight_norm # disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) # b_s x 32 x 32 x 32 --> b_s x 64 x 16 x 16 disc_layers.append( nn.batch_norm( ll.Conv2DLayer(disc_layers[-1], 256, (3, 3), pad=1, stride=2, W=Normal(0.03), nonlinearity=nn.lrelu))) #nn.weight_norm # b_s x 64 x 16 x 16 --> b_s x 128 x 8 x 8 # note : this layer will be used to compare statistics (output of disc_layer[3]) disc_layers.append( nn.batch_norm( ll.Conv2DLayer(disc_layers[-1], 512, (3, 3), pad=1, stride=2, W=Normal(0.03), nonlinearity=nn.lrelu))) #nn.weight_norm # b_s x 128 x 8 x 8 --> b_s x 256 x 4 x 4 disc_layers.append( nn.batch_norm( ll.Conv2DLayer(disc_layers[-1], 1024, (3, 3), pad=1, stride=2, W=Normal(0.03), nonlinearity=nn.lrelu))) #nn.weight_norm disc_layers.append(ll.GlobalPoolLayer(disc_layers[-1])) disc_layers.append( nn.MinibatchLayer(disc_layers[-1], num_kernels=100)) if extra_middle or only_middle: # goal : add mechanism that checks for mode collapse, but only for the middle part layer = nn.ExtractMiddleLayer(disc_layers[0], extra=2) # two convolutions print layer.output_shape layer = nn.batch_norm( ll.Conv2DLayer(layer, 128, 5, stride=2, pad='same', nonlinearity=nn.lrelu)) print layer.output_shape layer = nn.batch_norm( ll.Conv2DLayer(layer, 256, 5, stride=2, pad='same', nonlinearity=nn.lrelu)) print layer.output_shape layer = nn.batch_norm( ll.Conv2DLayer(layer, 512, 5, stride=2, pad='same', nonlinearity=nn.lrelu)) #layer = nn.batch_norm(ll.Conv2DLayer(layer, 1024, 5, stride=2, pad='same', # nonlinearity=nn.lrelu)) #disc_layers.append(layer) #print layer.output_shape layer = ll.GlobalPoolLayer(layer) #print layer.output_shape layer = nn.MinibatchLayer(layer, num_kernels=400) if only_middle: disc_layers = [] disc_layers.append((ll.DenseLayer(layer, num_units=1, W=Normal(0.03), nonlinearity=None))) return disc_layers disc_layers.append(ll.ConcatLayer([layer, disc_layers[-1]])) disc_layers.append((ll.DenseLayer(disc_layers[-1], num_units=1, W=Normal(0.03), nonlinearity=None))) # seeing how there is strong intra-batch normalization, I think it would be good to do minibatch discrimination solely on the center of the images. # I think that the statistics of the border are throwing for layer in disc_layers: print layer.output_shape print '' return disc_layers
real_fc3 = LL.get_output(enc_layer_fc3, x, deterministic=True) ''' specify generator G1, gen_fc3 = G0(z1, y) ''' z1 = theano_rng.uniform(size=(args.batch_size, 50)) gen1_layers = [LL.InputLayer(shape=(args.batch_size, 50), input_var=z1)] # Input layer for z1 gen1_layer_z = gen1_layers[-1] gen1_layers.append(LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot)) # Input layer for labels gen1_layer_y = gen1_layers[-1] gen1_layers.append(LL.ConcatLayer([gen1_layer_z, gen1_layer_y], axis=1)) gen1_layers.append( nn.batch_norm( LL.DenseLayer(gen1_layers[-1], num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu))) gen1_layers.append( nn.batch_norm( LL.DenseLayer(gen1_layers[-1], num_units=512, W=Normal(0.02), nonlinearity=T.nnet.relu))) gen1_layers.append( LL.DenseLayer(gen1_layers[-1], num_units=256, W=Normal(0.02), nonlinearity=T.nnet.relu)) gen_fc3 = LL.get_output(gen1_layers[-1], deterministic=False)
def gan_unlabelled_classif(trainx, trainy, testx, testy, lab_cnt, inp_size, train_ex_cnt): trainy = trainy.astype(np.int32) testy = testy.astype(np.int32) trainx = trainx.reshape((-1, inp_size)).astype(th.config.floatX) testx = testx.reshape((-1, inp_size)).astype(th.config.floatX) assert train_ex_cnt == trainx.shape[0] # settings parser = argparse.ArgumentParser() parser.add_argument('--seed', type=int, default=1) parser.add_argument('--seed_data', type=int, default=1) parser.add_argument('--unlabeled_weight', type=float, default=1.) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) parser.add_argument('--iter_limit', type=int, default=300) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2**15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2**15))) data_rng = np.random.RandomState(args.seed_data) # npshow(trainx.reshape((-1, 27, 32))[0]) trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() nr_batches_train = int(trainx.shape[0] / args.batch_size) nr_batches_test = int(testx.shape[0] / args.batch_size) # select labeled data inds = data_rng.permutation(trainx.shape[0]) trainx = trainx[inds] trainy = trainy[inds] txs = [] tys = [] for _j in range(10): j = _j % lab_cnt txs.append(trainx[trainy == j][:args.count]) tys.append(trainy[trainy == j][:args.count]) txs = np.concatenate(txs, axis=0) tys = np.concatenate(tys, axis=0) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append( nn.l2normalize( LL.DenseLayer(gen_layers[-1], num_units=inp_size, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, inp_size))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append( nn.DenseLayer(layers[-1], num_units=lab_cnt, nonlinearity=None, train_scale=True)) # costs labels = T.ivector() x_lab = T.matrix() x_unl = T.matrix() temp = LL.get_output(gen_layers[-1], init=True) temp = LL.get_output(layers[-1], x_lab, deterministic=False, init=True) init_updates = [ u for l in gen_layers + layers for u in getattr(l, 'init_updates', []) ] output_before_softmax_lab = LL.get_output(layers[-1], x_lab, deterministic=False) output_before_softmax_unl = LL.get_output(layers[-1], x_unl, deterministic=False) output_before_softmax_fake = LL.get_output(layers[-1], gen_dat, deterministic=False) z_exp_lab = T.mean(nn.log_sum_exp(output_before_softmax_lab)) z_exp_unl = T.mean(nn.log_sum_exp(output_before_softmax_unl)) z_exp_fake = T.mean(nn.log_sum_exp(output_before_softmax_fake)) l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels] l_unl = nn.log_sum_exp(output_before_softmax_unl) loss_lab = -T.mean(l_lab) + T.mean(z_exp_lab) loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean( T.nnet.softplus( nn.log_sum_exp(output_before_softmax_unl))) + 0.5 * T.mean( T.nnet.softplus(nn.log_sum_exp(output_before_softmax_fake))) train_err = T.mean( T.neq(T.argmax(output_before_softmax_lab, axis=1), labels)) mom_gen = T.mean(LL.get_output(layers[-3], gen_dat), axis=0) mom_real = T.mean(LL.get_output(layers[-3], x_unl), axis=0) loss_gen = T.mean(T.square(mom_gen - mom_real)) # test error output_before_softmax = LL.get_output(layers[-1], x_lab, deterministic=True) test_err = T.mean(T.neq(T.argmax(output_before_softmax, axis=1), labels)) # Theano functions for training and testing lr = T.scalar() disc_params = LL.get_all_params(layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_lab + args.unlabeled_weight * loss_unl, lr=lr, mom1=0.5) disc_param_avg = [ th.shared(np.cast[th.config.floatX](0. * p.get_value())) for p in disc_params ] disc_avg_updates = [(a, a + 0.0001 * (p - a)) for p, a in zip(disc_params, disc_param_avg)] disc_avg_givens = [(p, a) for p, a in zip(disc_params, disc_param_avg)] gen_params = LL.get_all_params(gen_layers[-1], trainable=True) gen_param_updates = nn.adam_updates(gen_params, loss_gen, lr=lr, mom1=0.5) init_param = th.function(inputs=[x_lab], outputs=None, updates=init_updates) train_batch_disc = th.function(inputs=[x_lab, labels, x_unl, lr], outputs=[loss_lab, loss_unl, train_err], updates=disc_param_updates + disc_avg_updates) train_batch_gen = th.function(inputs=[x_unl, lr], outputs=[loss_gen], updates=gen_param_updates) test_batch = th.function(inputs=[x_lab, labels], outputs=test_err, givens=disc_avg_givens) init_param(trainx[:500]) # data dependent initialization # //////////// perform training ////////////// lr = 0.003 for epoch in range(args.iter_limit): begin = time.time() # construct randomly permuted minibatches trainx = [] trainy = [] for t in range(trainx_unl.shape[0] / txs.shape[0]): inds = rng.permutation(txs.shape[0]) trainx.append(txs[inds]) trainy.append(tys[inds]) trainx = np.concatenate(trainx, axis=0) trainy = np.concatenate(trainy, axis=0) trainx_unl = trainx_unl[rng.permutation(trainx_unl.shape[0])] trainx_unl2 = trainx_unl2[rng.permutation(trainx_unl2.shape[0])] # train loss_lab = 0. loss_unl = 0. train_err = 0. for t in range(nr_batches_train): ll, lu, te = train_batch_disc( trainx[t * args.batch_size:(t + 1) * args.batch_size], trainy[t * args.batch_size:(t + 1) * args.batch_size], trainx_unl[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab += ll loss_unl += lu train_err += te e = train_batch_gen( trainx_unl2[t * args.batch_size:(t + 1) * args.batch_size], lr) loss_lab /= nr_batches_train loss_unl /= nr_batches_train train_err /= nr_batches_train # test test_err = 0. for t in range(nr_batches_test): test_err += test_batch( testx[t * args.batch_size:(t + 1) * args.batch_size], testy[t * args.batch_size:(t + 1) * args.batch_size]) test_err /= nr_batches_test # report print( "Iteration %d, time = %ds, loss_lab = %.4f, loss_unl = %.4f, train err = %.4f, test err = %.4f" % (epoch, time.time() - begin, loss_lab, loss_unl, train_err, test_err)) sys.stdout.flush()
parser.add_argument('--unlabeled_weight', type=float, default=1.) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--count', type=int, default=10) args = parser.parse_args() print(args) # fixed random seeds rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) data_rng = np.random.RandomState(args.seed_data) # specify generative model noise = theano_rng.uniform(size=(args.batch_size, 100)) gen_layers = [LL.InputLayer(shape=(args.batch_size, 100), input_var=noise)] gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.batch_norm(LL.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=T.nnet.softplus), g=None)) gen_layers.append(nn.l2normalize(LL.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=T.nnet.sigmoid))) gen_dat = LL.get_output(gen_layers[-1], deterministic=False) # specify supervised model layers = [LL.InputLayer(shape=(None, 28**2))] layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.3)) layers.append(nn.DenseLayer(layers[-1], num_units=1000)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=500)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5)) layers.append(nn.DenseLayer(layers[-1], num_units=250)) layers.append(nn.GaussianNoiseLayer(layers[-1], sigma=0.5))
lr = T.scalar() # learning rate # specify generator, gen_x = G(z, real_pool3) z = theano_rng.uniform(size=(args.batch_size, 50)) # uniform noise # y_1hot = T.matrix() gen_x_layer_z = LL.InputLayer(shape=(args.batch_size, 50), input_var=z) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer( shape=(args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y, gen_x_layer_z], axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer( nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256 * 5 * 5)), (args.batch_size, 256, 5, 5)) gen_x_layer_dconv2_1 = nn.batch_norm( nn.Deconv2DLayer(gen_x_layer_pool2, (args.batch_size, 256, 10, 10), (5, 5), stride=(2, 2), padding='half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm( nn.Deconv2DLayer(gen_x_layer_dconv2_1, (args.batch_size, 128, 14, 14), (5, 5), stride=(1, 1), padding='valid', W=Normal(0.02), nonlinearity=nn.relu))
rng_data = np.random.RandomState(args.seed_data) rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) # load CIFAR-10 trainx, trainy = cifar10_data.load(args.data_dir, subset='train') trainx_unl = trainx.copy() nr_batches_train = int(trainx.shape[0]/args.batch_size) # specify generative model noise_dim = (args.batch_size, 100) noise = theano_rng.uniform(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size,512,4,4))) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(gen_layers[-1]) # specify discriminative model disc_layers = [ll.InputLayer(shape=(None, 3, 32, 32))] disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.2)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 192, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 192, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu)))
# load CIFAR-10 trainx, trainy = cxr_data.load_cxr(args.data_dir, subset='train') trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() testx, testy = cxr_data.load_cxr(args.data_dir, subset='test') nr_batches_train = int(trainx.shape[0]/args.batch_size) nr_batches_test = int(testx.shape[0]/args.batch_size) print("DATA LOADED") # specify generative model noise_dim = (args.batch_size, 100) noise = theano_rng.uniform(size=noise_dim) orig_gen_n = 1024 gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*orig_gen_n, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size,orig_gen_n,4,4))) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/2,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/4,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/8,32,32), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 16 -> 32 gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,1,64,64), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 32 -> 64 gen_dat = ll.get_output(gen_layers[-1]) print("GENERATOR CREATED") # specify discriminative model disc_layers = [ll.InputLayer(shape=(None, 1, 64, 64))] disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu)))
def build(self): params = self.params V, d, A = params.embed_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder('int32', shape=[self.params.batch_size, self.params.max_fact_count, self.params.max_sent_size], name='x') # [num_batch, fact_count, sentence_len] question = tf.placeholder('int32', shape=[self.params.batch_size, self.params.max_ques_size], name='q') # [num_batch, question_len] answer = tf.placeholder('int32', shape=[self.params.batch_size], name='y') # [num_batch] - one word answer fact_counts = tf.placeholder('int64', shape=[self.params.batch_size], name='fc') input_mask = tf.placeholder('float32', shape=[self.params.batch_size, self.params.max_fact_count, self.params.max_sent_size,self.params.embed_size], name='xm') is_training = tf.placeholder(tf.bool) self.att = tf.constant(0.) # Prepare parameters gru = tf.nn.rnn_cell.GRUCell(self.params.hidden_size) l = self.positional_encoding() embedding = weight('embedding', [self.words.vocab_size, self.params.embed_size], init='uniform', range=3 ** (1 / 2)) with tf.name_scope('SentenceReader'): input_list = tf.unstack(tf.transpose(input)) # L x [F, N] input_embed = [] for facts in input_list: facts = tf.unstack(facts) embed = tf.stack([tf.nn.embedding_lookup(embedding, w) for w in facts]) # [F, N, V] input_embed.append(embed) # apply positional encoding input_embed = tf.transpose(tf.stack(input_embed), [2, 1, 0, 3]) # [N, F, L, V] encoded = l * input_embed * input_mask facts = tf.reduce_sum(encoded, 2) # [N, F, V] # dropout time facts = dropout(facts, params.keep_prob, is_training) with tf.name_scope('InputFusion'): # Bidirectional RNN with tf.variable_scope('Forward'): forward_states, _ = tf.nn.dynamic_rnn(gru, facts, fact_counts, dtype=tf.float32) with tf.variable_scope('Backward'): facts_reverse = tf.reverse_sequence(facts, fact_counts, 1) backward_states, _ = tf.nn.dynamic_rnn(gru, facts_reverse, fact_counts, dtype=tf.float32) # Use forward and backward states both facts = forward_states + backward_states # [N, F, d] with tf.variable_scope('Question'): tf.logging.info(question) ques_list = tf.unstack(tf.transpose(question)) tf.logging.info(ques_list) ques_embed = tf.stack([tf.nn.embedding_lookup(embedding, w) for w in ques_list]) #ques_embed = tf.expand_dims(ques_embed, 0) tf.logging.info(ques_embed) initial_state = gru.zero_state(self.params.batch_size, dtype=tf.float32) _, question_vec = tf.nn.dynamic_rnn(gru, ques_embed,initial_state=initial_state, dtype=tf.float32,time_major=True) # Episodic Memory with tf.variable_scope('Episodic'): episode = EpisodeModule(self.params.hidden_size, question_vec, facts, is_training, self.params.batch_norm) memory = tf.identity(question_vec) for t in range(params.memory_step): with tf.variable_scope('Layer%d' % t) as scope: if params.memory_update == 'gru': memory = gru(episode.new(memory), memory)[0] else: # ReLU update c = episode.new(memory) concated = tf.concat([memory, c, question_vec],1) w_t = weight('w_t', [3 * d, d]) z = tf.matmul(concated, w_t) if params.batch_norm: z = batch_norm(z, is_training) else: b_t = bias('b_t', d) z = z + b_t memory = tf.nn.relu(z) # [N, d] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A], init='xavier') logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n(tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdamOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.xm = input_mask self.q = question self.y = answer self.fc = fact_counts self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0, ps=1, n_kerns=256, d_kerns=(3, 3), pad='same', stride=1, W=Normal(0.05),nonlinearity=ln.rectify, name='cla-6')) cla_layers.append(ll.GlobalPoolLayer(cla_layers[-1])) cla_layers.append(ll.DenseLayer(cla_layers[-1], num_units=num_classes, W=lasagne.init.Normal(1e-2, 0), nonlinearity=ln.softmax, name='cla-6')) ################# Generator gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None,)) gen_layers = [gen_in_z] gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-5')) gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=512*4*4, nonlinearity=ln.linear, name='gen-6'), g=None, name='gen-61')) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (-1, 512, 4, 4), name='gen-7')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-8')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 256, 8, 8), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-9')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 128, 16, 16), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 64, 32, 32), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-11')) gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 1, 64, 64), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=gen_final_non, name='gen-31'), train_g=True, init_stdv=0.1, name='gen-32'))
''' models ''' # symbols sym_y_g = T.ivector() sym_z_input = T.matrix() sym_z_rand = theano_rng.uniform(size=(batch_size_g, n_z)) sym_z_shared = T.tile(theano_rng.uniform((batch_size_g/num_classes, n_z)), (num_classes, 1)) # generator y2x: p_g(x, y) = p(y) p_g(x | y) where x = G(z, y), z follows p_g(z) gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None,)) gen_layers = [gen_in_z] if args.dataset == 'svhn' or args.dataset == 'cifar10': gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-00')) gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu, name='gen-01'), g=None, name='gen-02')) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (-1,512,4,4), name='gen-03')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) # 4 -> 8 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-20')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-21'), g=None, name='gen-22')) # 8 -> 16 gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-30')) gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (None,3,32,32), (5,5), W=Normal(0.05), nonlinearity=gen_final_non, name='gen-31'), train_g=True, init_stdv=0.1, name='gen-32')) # 16 -> 32 elif args.dataset == 'mnist': gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-1')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-2'), name='gen-3')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-4')) gen_layers.append(ll.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=500, nonlinearity=ln.softplus, name='gen-5'), name='gen-6')) gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-7')) gen_layers.append(nn.l2normalize(ll.DenseLayer(gen_layers[-1], num_units=28**2, nonlinearity=gen_final_non, name='gen-8')))
def build_basic_vgg16(self): """ Build the basic VGG16 net. """ print("Building the basic VGG16 net...") bn = self.nn.batch_norm imgs = tf.placeholder(tf.float32, [self.batch_size] + self.img_shape) is_train = tf.placeholder(tf.bool) conv1_1_feats = nn.convolution(imgs, 3, 3, 64, 1, 1, 'conv1_1') conv1_1_feats = nn.batch_norm(conv1_1_feats, 'bn1_1', is_train, bn, 'relu') conv1_2_feats = nn.convolution(conv1_1_feats, 3, 3, 64, 1, 1, 'conv1_2') conv1_2_feats = nn.batch_norm(conv1_2_feats, 'bn1_2', is_train, bn, 'relu') pool1_feats = nn.max_pool(conv1_2_feats, 2, 2, 2, 2, 'pool1') conv2_1_feats = nn.convolution(pool1_feats, 3, 3, 128, 1, 1, 'conv2_1') conv2_1_feats = nn.batch_norm(conv2_1_feats, 'bn2_1', is_train, bn, 'relu') conv2_2_feats = nn.convolution(conv2_1_feats, 3, 3, 128, 1, 1, 'conv2_2') conv2_2_feats = nn.batch_norm(conv2_2_feats, 'bn2_2', is_train, bn, 'relu') pool2_feats = nn.max_pool(conv2_2_feats, 2, 2, 2, 2, 'pool2') conv3_1_feats = nn.convolution(pool2_feats, 3, 3, 256, 1, 1, 'conv3_1') conv3_1_feats = nn.batch_norm(conv3_1_feats, 'bn3_1', is_train, bn, 'relu') conv3_2_feats = nn.convolution(conv3_1_feats, 3, 3, 256, 1, 1, 'conv3_2') conv3_2_feats = nn.batch_norm(conv3_2_feats, 'bn3_2', is_train, bn, 'relu') conv3_3_feats = nn.convolution(conv3_2_feats, 3, 3, 256, 1, 1, 'conv3_3') conv3_3_feats = nn.batch_norm(conv3_3_feats, 'bn3_3', is_train, bn, 'relu') pool3_feats = nn.max_pool(conv3_3_feats, 2, 2, 2, 2, 'pool3') conv4_1_feats = nn.convolution(pool3_feats, 3, 3, 512, 1, 1, 'conv4_1') conv4_1_feats = nn.batch_norm(conv4_1_feats, 'bn4_1', is_train, bn, 'relu') conv4_2_feats = nn.convolution(conv4_1_feats, 3, 3, 512, 1, 1, 'conv4_2') conv4_2_feats = nn.batch_norm(conv4_2_feats, 'bn4_2', is_train, bn, 'relu') conv4_3_feats = nn.convolution(conv4_2_feats, 3, 3, 512, 1, 1, 'conv4_3') conv4_3_feats = nn.batch_norm(conv4_3_feats, 'bn4_3', is_train, bn, 'relu') pool4_feats = nn.max_pool(conv4_3_feats, 2, 2, 2, 2, 'pool4') conv5_1_feats = nn.convolution(pool4_feats, 3, 3, 512, 1, 1, 'conv5_1') conv5_1_feats = nn.batch_norm(conv5_1_feats, 'bn5_1', is_train, bn, 'relu') conv5_2_feats = nn.convolution(conv5_1_feats, 3, 3, 512, 1, 1, 'conv5_2') conv5_2_feats = nn.batch_norm(conv5_2_feats, 'bn5_2', is_train, bn, 'relu') conv5_3_feats = nn.convolution(conv5_2_feats, 3, 3, 512, 1, 1, 'conv5_3') conv5_3_feats = nn.batch_norm(conv5_3_feats, 'bn5_3', is_train, bn, 'relu') self.conv_feats = conv5_3_feats self.conv_feat_shape = [40, 40, 512] self.roi_warped_feat_shape = [16, 16, 512] self.roi_pooled_feat_shape = [8, 8, 512] self.imgs = imgs self.is_train = is_train print("Basic VGG16 net built.")