def cnn_autoencoder(input_var=None): """ Build the network using Lasagne library """ ################## # Network config # ################## input_channels = 3 weight_init = lasagne.init.Normal() # encoder conv1_nb_filt = 32 conv1_sz_filt = (9, 9) conv1_sz_padd = 2 # conv1 output size = (60, 60) pool1_sz = (2, 2) # pool1 output size = (30, 30) conv2_nb_filt = 64 conv2_sz_filt = (7, 7) conv2_sz_padd = 0 # conv2 output size = (24, 24) pool2_sz = (4, 4) # pool2 size = (6, 6) conv3_nb_filt = 128 conv3_sz_filt = (5, 5) conv3_sz_padd = 0 # conv3 output size = (2, 2) pool3_sz = (2, 2) # pool3 output size = (32, 1, 1) dens1_nb_unit = 256 # dense1 output (vector 256) dens2_nb_unit = 256 # dense2 output (vector 256) rshp_sz = 1 # reshape output (256, 1, 1) # decoder tconv1_nb_filt = 64 tconv1_sz_filt = (5, 5) tconv1_sz_strd = (1, 1) # conv1 output size = (5, 5) upsamp1_sz = (2, 2) # upsamp1 output size = (10, 10) tconv2_nb_filt = 32 tconv2_sz_filt = (4, 4) tconv2_sz_strd = (1, 1) # tconv2 output size = (13, 13) upsamp2_sz = (2, 2) # upsamp2 output size = (26, 26) tconv3_nb_filt = 32 tconv3_sz_filt = (5, 5) tconv3_sz_strd = (1, 1) # tconv3 output size = (30, 30) tconv4_nb_filt = 3 tconv4_sz_filt = (3, 3) tconv4_sz_strd = (1, 1) # tconv4 output size = (32, 32) # final output = (3 channels, 32 x 32) ##################### # Build the network # ##################### # Add input layer network = lyr.InputLayer(shape=(None, input_channels, 64, 64), input_var=input_var) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv1_nb_filt, filter_size=conv1_sz_filt, pad=conv1_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool1_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv2_nb_filt, filter_size=conv2_sz_filt, pad=conv2_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool2_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv3_nb_filt, filter_size=conv3_sz_filt, pad=conv3_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool3_sz) network = lyr.FlattenLayer(network) # Add dense layer network = lyr.DenseLayer(network, dens1_nb_unit, W=weight_init) network = lyr.DenseLayer(network, dens2_nb_unit, W=weight_init) network = lyr.ReshapeLayer(network, (input_var.shape[0], dens2_nb_unit / (rshp_sz**2), rshp_sz, rshp_sz)) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv1_nb_filt, filter_size=tconv1_sz_filt, stride=tconv1_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp1_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv2_nb_filt, filter_size=tconv2_sz_filt, stride=tconv2_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp2_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv3_nb_filt, filter_size=tconv3_sz_filt, stride=tconv3_sz_strd, W=weight_init) # Add transposed convolution layer network = lyr.TransposedConv2DLayer( incoming=network, num_filters=tconv4_nb_filt, filter_size=tconv4_sz_filt, stride=tconv4_sz_strd, W=weight_init, nonlinearity=lasagne.nonlinearities.sigmoid) return network
gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None, )) gen_layers = [gen_in_z] if args.dataset == 'svhn' or args.dataset == 'cifar10': gen_layers.append( MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-00')) gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu, name='gen-01'), g=None, name='gen-02')) gen_layers.append( ll.ReshapeLayer(gen_layers[-1], (-1, 512, 4, 4), name='gen-03')) gen_layers.append( ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) # 4 -> 8 gen_layers.append( ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-20')) gen_layers.append(
def build_network(self, K, vocab_size, W_init): l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0]) l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1]) l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2]) l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3]) l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6]) l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7]) l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[8]) l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[9]) l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11]) doc_shp = self.inps[1].shape qry_shp = self.inps[3].shape l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, output_size=self.embed_dim, W=W_init) # B x N x 1 x DE l_doce = L.ReshapeLayer( l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim)) # B x N x DE l_qembed = L.EmbeddingLayer(l_qin, input_size=vocab_size, output_size=self.embed_dim, W=l_docembed.W) l_qembed = L.ReshapeLayer( l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim)) # B x N x DE l_fembed = L.EmbeddingLayer(l_featin, input_size=2, output_size=2) # B x N x 2 if self.train_emb == 0: l_docembed.params[l_docembed.W].remove('trainable') # char embeddings if self.use_chars: l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 2 * self.char_dim) # T x L x D l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=True) l_bgru = L.GRULayer(l_lookup, 2 * self.char_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=True) # T x 2D l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim / 2, nonlinearity=None) # T x DE/2 l_bckembed = L.DenseLayer(l_bgru, self.embed_dim / 2, nonlinearity=None) # T x DE/2 l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1) l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2 l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2 l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2) l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2) l_fwd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=False) l_bkd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=False) l_q = L.ConcatLayer([l_fwd_q, l_bkd_q]) # B x Q x 2D q = L.get_output(l_q) # B x Q x 2D q = q[T.arange(q.shape[0]), self.inps[12], :] # B x 2D l_qs = [l_q] for i in range(K - 1): l_fwd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2) # B x N x DE l_fwd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1], axis=2) # B x Q x DE l_qs.append(l_q_c_1) qd = L.get_output(l_q_c_1) # B x Q x DE dd = L.get_output(l_doc_1) # B x N x DE M = T.batched_dot(dd, qd.dimshuffle((0, 2, 1))) # B x N x Q alphas = T.nnet.softmax( T.reshape(M, (M.shape[0] * M.shape[1], M.shape[2]))) alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \ self.inps[7][:,np.newaxis,:] # B x N x Q alphas_r = alphas_r / alphas_r.sum(axis=2)[:, :, np.newaxis] # B x N x Q q_rep = T.batched_dot(alphas_r, qd) # B x N x DE l_q_rep_in = L.InputLayer(shape=(None, None, 2 * self.nhidden), input_var=q_rep) l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul) l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE if self.use_feat: l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2) # B x N x DE+2 l_fwd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2) d = L.get_output(l_doc) # B x N x 2D p = T.batched_dot(d, q) # B x N pm = T.nnet.softmax(p) * self.inps[10] pm = pm / pm.sum(axis=1)[:, np.newaxis] final = T.batched_dot(pm, self.inps[4]) dv = L.get_output(l_doc, deterministic=True) # B x N x 2D p = T.batched_dot(dv, q) # B x N pm = T.nnet.softmax(p) * self.inps[10] pm = pm / pm.sum(axis=1)[:, np.newaxis] final_v = T.batched_dot(pm, self.inps[4]) return final, final_v, l_doc, l_qs, l_docembed.W
def dmr_regularizer(f_1, f2): x_bin = f_1 / (T.abs_(f_1) + args.gamma) x_bin_2 = T.sgn(f2) M_reg = (T.dot(x_bin_2, x_bin_2.T) - T.dot(x_bin_2, x_bin_2.T) * (T.eye(x_bin_2.shape[0]))) / x_bin_2.shape[1] M_true = (T.dot(x_bin, x_bin.T) - T.dot(x_bin, x_bin.T) * (T.eye(x_bin.shape[0]))) / x_bin.shape[1] l_reg = T.sum(T.abs_(M_reg - M_true)) / (x_bin.shape[0] * (x_bin.shape[0] - 1)) return l_reg if args.dataset_type == 'brown': global_pool = ll.GlobalPoolLayer(disc_layers[f_low_dim]) global_pool_2 = ll.ReshapeLayer(disc_layers[f_high_dim], ([0], -1)) else: global_pool = disc_layers[f_low_dim] global_pool_2 = ll.GlobalPoolLayer(disc_layers[f_high_dim]) features_1 = ll.get_output(global_pool, x_unl_1, deterministic=False) features_2 = ll.get_output(global_pool_2, x_unl_1, deterministic=False) loss_unl = loss_unl + args.lambda_dmr * dmr_regularizer(features_1, features_2) \ + args.lambda_bre * bre_regularizer(features_1, features_2) # Theano functions for training the disc net lr = T.scalar() disc_params = ll.get_all_params(disc_layers, trainable=True) disc_param_updates = nn.adam_updates(disc_params, loss_unl, lr=lr, mom1=0.5) disc_param_avg = [ th.shared(np.cast[th.config.floatX](0. * p.get_value()))
rng = np.random.RandomState(args.seed) theano_rng = MRG_RandomStreams(rng.randint(2 ** 15)) lasagne.random.set_rng(np.random.RandomState(rng.randint(2 ** 15))) # load CIFAR-10 trainx, trainy = cifar10_data.load(args.data_dir, subset='train') trainx_unl = trainx.copy() nr_batches_train = int(trainx.shape[0]/args.batch_size) # specify generative model noise_dim = (args.batch_size, 100) noise = theano_rng.uniform(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*512, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size,512,4,4))) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,256,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,128,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,3,32,32), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 16 -> 32 gen_dat = ll.get_output(gen_layers[-1]) # specify discriminative model disc_layers = [ll.InputLayer(shape=(None, 3, 32, 32))] disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.2)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 192, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 192, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 192, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu)))
W=Normal(0.02)))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] gen0_layers.append( LL.InputLayer(shape=(args.batch_size, 256), input_var=real_fc3) ) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append( LL.ConcatLayer([gen0_layer_fc3, gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append( LL.ReshapeLayer( nn.batch_norm( LL.DenseLayer(gen0_layers[-1], num_units=128 * 4 * 4, W=Normal(0.02), nonlinearity=T.nnet.relu)), (args.batch_size, 128, 4, 4))) # fc gen0_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size, 128, 8, 8), (5, 5), stride=(2, 2), padding='half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size, 64, 12, 12), (5, 5), stride=(1, 1),
sym_alpha_cla_g = T.scalar() sym_alpha_unlabel_entropy = T.scalar() sym_alpha_unlabel_average = T.scalar() shared_unlabel = theano.shared(x_unlabelled, borrow=True) slice_x_u_g = T.ivector() slice_x_u_d = T.ivector() slice_x_u_c = T.ivector() """ classifier x2y: p_c(x, y) = p(x) p_c(y | x) ll: lasagne.layers """ cla_in_x = ll.InputLayer(shape=(None, 28 ** 2)) cla_layers = [cla_in_x] cla_layers.append(ll.ReshapeLayer(cla_layers[-1], (-1, 1, 28, 28))) cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0.5, ps=2, n_kerns=32, d_kerns=(5, 5), pad='valid', stride=1, W=Normal(0.05), nonlinearity=ln.rectify, name='cla-1')) cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0, ps=1, n_kerns=64, d_kerns=(3, 3), pad='same', stride=1, W=Normal(0.05), nonlinearity=ln.rectify, name='cla-2')) cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0.5, ps=2, n_kerns=64, d_kerns=(3, 3), pad='valid', stride=1, W=Normal(0.05), nonlinearity=ln.rectify, name='cla-3')) cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0, ps=1, n_kerns=128, d_kerns=(3, 3), pad='same', stride=1, W=Normal(0.05), nonlinearity=ln.rectify, name='cla-4')) cla_layers.append(convlayer(l=cla_layers[-1], bn=True, dr=0, ps=1, n_kerns=128, d_kerns=(3, 3), pad='same', stride=1, W=Normal(0.05), nonlinearity=ln.rectify, name='cla-5'))
name='cla-6')) cla_layers.append(ll.GlobalPoolLayer(cla_layers[-1])) cla_layers.append(ll.DenseLayer(cla_layers[-1], num_units=num_classes, W=lasagne.init.Normal(1e-2, 0), nonlinearity=ln.softmax, name='cla-6')) ################# Generator gen_in_z = ll.InputLayer(shape=(None, n_z)) gen_in_y = ll.InputLayer(shape=(None,)) gen_layers = [gen_in_z] gen_layers.append(MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-5')) gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=512*4*4, nonlinearity=ln.linear, name='gen-6'), g=None, name='gen-61')) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (-1, 512, 4, 4), name='gen-7')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-8')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 256, 8, 8), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-9')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 128, 16, 16), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-10')) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 64, 32, 32), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=nn.relu, name='gen-11'), g=None, name='gen-12')) gen_layers.append(ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes, name='gen-11')) gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (None, 1, 64, 64), filter_size=(4,4), stride=(2, 2), W=Normal(0.05), nonlinearity=gen_final_non, name='gen-31'), train_g=True, init_stdv=0.1, name='gen-32'))
def __init__(self, input_shapes, output_dim, obs_network_params=[], input_layers=None, fusion_net_params=None, hidden_W_init=LI.GlorotUniform(), hidden_b_init=LI.Constant(0.), output_W_init=LI.GlorotUniform(), output_b_init=LI.Constant(0.), use_flat_obs=True, name=None): """ :param output_dim: (list of int or just int) number of output dimensions :param output_nonlinearities: (nonlinearity of a list of nonlinearities if output_dim is a list as well) :param input_layers: (list of input layers) :param fusion_hidden_sizes: (tuple of int) fusion network hidden sizes :param obs_network_params: (list of dict) parameters of observation networks: conv_filters: (dict of int) number of conv filters per layer conv_filter_sizes: (dict of int) spatial sizes of square conv filters (only supports square filters at this point) conv_strides: (dict of int) strides of convolutional filters conv_pads: (dict of str) padding alg to use by layer: Options: 'valid', 'full', 'same'. See lasagne documentation: http://lasagne.readthedocs.io/en/latest/modules/layers/conv.html hidden_sizes: (dict of int) number of hidden units per fully connected layer hidden_nonlinearity: (str) nonlinearity for hidden units: See lasagne for options. Examples: 'rectify', 'tanh' output_nonlinearity: (str) nonlinearity of outputs: see lasagne docs. :param name: (str) name """ self.use_flat_obs = use_flat_obs if name is None: prefix = "" else: prefix = name + "_" ## Setting default values of parameters if fusion_net_params is None: fusion_net_params = {} if 'hidden_sizes' not in fusion_net_params: fusion_hidden_sizes = [] else: fusion_hidden_sizes = fusion_net_params['hidden_sizes'] if 'hidden_nonlinearity' not in fusion_net_params: fusion_net_params['output_nonlinearities'] = 'rectify' if 'output_nonlinearities' not in fusion_net_params: output_nonlinearities = None print('!!! WARNING: Output nonlinearities were not assigned ') else: output_nonlinearities = fusion_net_params['output_nonlinearities'] obs_networks = [] obs_network_outputs = [] obs_network_inputs = [] obs_network_input_vars = [] if not isinstance(input_layers, list): input_layers = [input_layers] * len(obs_network_params) # for net_i, obs_param in enumerate(obs_network_params): print(self.__class__.__name__ + ': OBS NUM = ', len(input_shapes), ' OBS_SHAPES: ', input_shapes) for net_i in range(len(input_shapes)): obs_param = obs_network_params[net_i] ## Checking parameters if 'conv_filters' not in obs_param or 'conv_filters' is None: obs_param['conv_filters'] = [] if 'conv_filter_sizes' not in obs_param or 'conv_filter_sizes' is None: obs_param['conv_filter_sizes'] = [] if 'conv_strides' not in obs_param or 'conv_strides' is None: obs_param['conv_strides'] = [1] * len(obs_param['conv_filters']) if 'conv_pads' not in obs_param or 'conv_pads' is None: obs_param['conv_pads'] = ['valid'] * len(obs_param['conv_filters']) if 'hidden_sizes' not in obs_param or 'hidden_sizes' is None: obs_param['hidden_sizes'] = [] if 'hidden_nonlinearity' not in obs_param: obs_param['hidden_nonlinearity'] = LN.rectify if 'output_nonlinearity' not in obs_param: obs_param['output_nonlinearity'] = LN.rectify # If you flatten images before you feed them the use_flat_obs = True # WARNING: At this moment it actually breaks if I don't use flat obs var_name = 'obs_%d' % (net_i) if self.use_flat_obs: obs_var = theano.tensor.matrix(name=var_name) else: if len(input_shapes[net_i]) == 3: obs_var = theano.tensor.tensor4(name=var_name) elif len(input_shapes[net_i]) == 2: obs_var = theano.tensor.tensor3(name=var_name) else: obs_var = theano.tensor.matrix(name=var_name) obs_net = ConvNetwork( input_shape=input_shapes[net_i], input_layer=input_layers[net_i], conv_filters=obs_param['conv_filters'], conv_filter_sizes=obs_param['conv_filter_sizes'], conv_strides=obs_param['conv_strides'], conv_pads=obs_param['conv_pads'], hidden_sizes=obs_param['hidden_sizes'], hidden_nonlinearity=getattr(NL, obs_param['hidden_nonlinearity']), output_nonlinearity=getattr(NL, obs_param['output_nonlinearity']), name=name + '_obs%d' % net_i, input_var=obs_var, use_flat_obs=use_flat_obs ) obs_networks.append(obs_net) obs_network_inputs.append(obs_net.input_layer) obs_network_input_vars.append(obs_net.input_var) embed_shape = obs_net.output_layer.output_shape embed_shape_flat = ([0], int(np.prod(embed_shape[1:]))) obs_network_outputs.append(L.ReshapeLayer(obs_net.output_layer, shape=embed_shape_flat)) print('Obs_%d Flattened output shape:' % net_i, embed_shape_flat) print('--- FUSION NET ----------------------------------------------------------') # Concatenating observation layers l_hid = L.ConcatLayer(obs_network_outputs) print('Merged obs embeding shape:', l_hid.output_shape) # Fusion MLP layers for idx, hidden_size in enumerate(fusion_hidden_sizes): l_hid = L.DenseLayer( l_hid, num_units=hidden_size, nonlinearity=getattr(NL, fusion_net_params['hidden_nonlinearity']), name="%shidden_%d" % (prefix, idx), W=hidden_W_init, b=hidden_b_init, ) print('Dense layer out shape = ', l_hid.output_shape, ' Nonlinearity = ', fusion_net_params['hidden_nonlinearity']) # Outputs if not isinstance(output_dim, list): output_dim = [output_dim] if not isinstance(output_nonlinearities, (list,tuple)): output_nonlinearities = [output_nonlinearities] * len(output_dim) else: assert len(output_nonlinearities) == len(output_dim), ' ERROR: Number of outputs does not match number of nonlinearities' outputs = [] print('--- Fusion net outputs: ') for dim_i in range(len(output_dim)): outputs.append(L.DenseLayer( l_hid, num_units=output_dim[dim_i], nonlinearity=getattr(NL, output_nonlinearities[dim_i]), name="%s_output_%d" % (prefix, dim_i), W=output_W_init, b=output_b_init, )) print('Output %d shape = ' % dim_i, outputs[-1].output_shape, ' Nonlinearity = ', output_nonlinearities[dim_i]) # Concatenate outputs if len(outputs) != 0: l_out = L.ConcatLayer(outputs) print('Merged outputs shape: ', l_out.output_shape) else: print('!!! WARING: No outputs were specified, thus the last hidden layer of fusion net will be used') l_out = l_hid self._l_in = obs_network_inputs self._l_out = l_out self._input_vars = obs_network_input_vars
trainx, trainy = cxr_data.load_cxr(args.data_dir, subset='train') trainx_unl = trainx.copy() trainx_unl2 = trainx.copy() testx, testy = cxr_data.load_cxr(args.data_dir, subset='test') nr_batches_train = int(trainx.shape[0]/args.batch_size) nr_batches_test = int(testx.shape[0]/args.batch_size) print("DATA LOADED") # specify generative model noise_dim = (args.batch_size, 100) noise = theano_rng.uniform(size=noise_dim) orig_gen_n = 1024 gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append(nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4*4*orig_gen_n, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (args.batch_size,orig_gen_n,4,4))) gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/2,8,8), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/4,16,16), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append(nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,orig_gen_n/8,32,32), (5,5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 16 -> 32 gen_layers.append(nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size,1,64,64), (5,5), W=Normal(0.05), nonlinearity=T.tanh), train_g=True, init_stdv=0.1)) # 32 -> 64 gen_dat = ll.get_output(gen_layers[-1]) print("GENERATOR CREATED") # specify discriminative model disc_layers = [ll.InputLayer(shape=(None, 1, 64, 64))] disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5)) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(nn.weight_norm(dnn.Conv2DDNNLayer(disc_layers[-1], 96, (3,3), pad=1, stride=2, W=Normal(0.05), nonlinearity=nn.lrelu))) disc_layers.append(ll.DropoutLayer(disc_layers[-1], p=0.5))
def modelinv(y, fnet): net = {} net['input'] = layers.InputLayer(shape=(None, 10), input_var=y) net['input'] = layers.ReshapeLayer(net['input'], (-1, 10, 1, 1)) net['ipool3'] = layers.Upscale2DLayer(net['input'], 8) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool3'], fnet['cccp6'].b) net['icccp6'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp6'].input_shape[1], filter_size=fnet['cccp6'].filter_size, stride=fnet['cccp6'].stride, crop=fnet['cccp6'].pad, W=fnet['cccp6'].W, b=None, flip_filters=not fnet['cccp6'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp6'], fnet['cccp5'].b) net['icccp5'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp5'].input_shape[1], filter_size=fnet['cccp5'].filter_size, stride=fnet['cccp5'].stride, crop=fnet['cccp5'].pad, W=fnet['cccp5'].W, b=None, flip_filters=not fnet['cccp5'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp5'], fnet['conv3'].b) net['iconv3'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv3'].input_shape[1], filter_size=fnet['conv3'].filter_size, stride=fnet['conv3'].stride, crop=fnet['conv3'].pad, W=fnet['conv3'].W, b=None, flip_filters=not fnet['conv3'].flip_filters) net['ipool2'] = layers.Upscale2DLayer(net['iconv3'], 2) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool2'], fnet['cccp4'].b) net['icccp4'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp4'].input_shape[1], filter_size=fnet['cccp4'].filter_size, stride=fnet['cccp4'].stride, crop=fnet['cccp4'].pad, W=fnet['cccp4'].W, b=None, flip_filters=not fnet['cccp4'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp4'], fnet['cccp3'].b) net['icccp3'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp3'].input_shape[1], filter_size=fnet['cccp3'].filter_size, stride=fnet['cccp3'].stride, crop=fnet['cccp3'].pad, W=fnet['cccp3'].W, b=None, flip_filters=not fnet['cccp3'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp3'], fnet['conv2'].b) net['iconv2'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv2'].input_shape[1], filter_size=fnet['conv2'].filter_size, stride=fnet['conv2'].stride, crop=fnet['conv2'].pad, W=fnet['conv2'].W, b=None, flip_filters=not fnet['conv2'].flip_filters) net['ipool1'] = layers.Upscale2DLayer(net['iconv2'], 2) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool1'], fnet['cccp2'].b) net['icccp2'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp2'].input_shape[1], filter_size=fnet['cccp2'].filter_size, stride=fnet['cccp2'].stride, crop=fnet['cccp2'].pad, W=fnet['cccp2'].W, b=None, flip_filters=not fnet['cccp2'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp2'], fnet['cccp1'].b) net['icccp1'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp1'].input_shape[1], filter_size=fnet['cccp1'].filter_size, stride=fnet['cccp1'].stride, crop=fnet['cccp1'].pad, W=fnet['cccp1'].W, b=None, flip_filters=not fnet['cccp1'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp1'], fnet['conv1'].b) net['iconv1'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv1'].input_shape[1], filter_size=fnet['conv1'].filter_size, stride=fnet['conv1'].stride, crop=fnet['conv1'].pad, W=fnet['conv1'].W, b=None, flip_filters=not fnet['conv1'].flip_filters) net['out'] = net['iconv1'] return net
def __init__(self): self.batch_size = 32 self.embedding_size = 50 self.nb_max_sentences = 10 self.length_max_sentences = 30 self.vocab_size = 10000 self.nb_hidden = 32 self.nb_hops = 5 # Dimension of the input context is (batch_size, number of sentences, max size of sentences) self.context = T.itensor3('context') self.mask_context = T.imatrix('context_mask') # Dimension of the question input is (batch_size, max size of sentences) self.question = T.itensor3('question') self.mask_question = T.imatrix('question_mask') """ Building the Input context module """ mask_context = layers.InputLayer( (self.batch_size * self.nb_max_sentences, self.length_max_sentences), input_var=self.mask_context) # (batch_size, nb_sentences, length_max_sentences) input_module = layers.InputLayer( (self.batch_size, self.nb_max_sentences, self.length_max_sentences), input_var=self.context) # (batch_size, nb_sentences * length_max_sentences) input_module = layers.ReshapeLayer(input_module, (self.batch_size, -1)) # (batch_size, nb_sentences * length_max_sequences, embedding_size) input_module = layers.EmbeddingLayer(input_module, self.vocab_size, self.embedding_size) # (batch_size, nb_sentences, length_max_sequences, embedding_size) input_module = layers.ReshapeLayer( input_module, (self.batch_size, self.nb_max_sentences, self.length_max_sentences, self.embedding_size)) # (batch_size * nb_sentences, length_sentences, embedding_size) input_module = layers.ReshapeLayer( input_module, (self.batch_size * self.nb_max_sentences, self.length_max_sentences, self.embedding_size)) # (batch_size * nb_sentences, nb_hidden) input_module = layers.GRULayer(input_module, self.nb_hidden, mask_input=mask_context, only_return_final=True) context = layers.get_output(input_module) # input_module = layers.ReshapeLayer(input_module, (self.batch_size, self.nb_max_sentences, self.nb_hidden)) """ Building the Input context module """ # (bach_size, length_sentences) mask_question = layers.InputLayer( (self.batch_size, self.length_max_sentences), input_var=self.mask_question) # (batch_size, length_sentences) question_module = layers.InputLayer( (self.batch_size, self.length_max_sentences)) # (batch_size, length_sentences, embedding_size) question_module = layers.EmbeddingLayer(question_module, self.vocab_size, self.embedding_size) # (batch_size, nb_hidden) question_module = layers.GRULayer(question_module, self.nb_hidden, mask_input=mask_question, only_return_final=True) question = layers.get_output(question_module) """ Building the Memory module """ memory = question self._M = utils.get_shared('glorot_uniform', self.nb_hidden, self.nb_hidden) for step in xrange(self.nb_hops): z_score_vector = T.concatenate([ context, question, memory, context * question, context * memory, T.abs_(context - question), T.abs_(context - memory), T.dot(T.dot(context, self._M), question), T.dot(T.dot(context, self._M), memory) ]) self._M1 = utils.get_shared('glorot_uniform', self.nb_hidden * 9, self.nb_hidden) self._B1 = utils.get_shared('constant_zero', self.nb_hidden, None) z1 = T.tanh(T.dot(self._M1, z_score_vector) + self._B1) self._M2 = utils.get_shared('glorot_uniform', self.nb_hidden, 1) self._B2 = utils.get_shared('constant_zero', self.nb_hidden, None) z2 = T.nnet.sigmoid(T.dot(self._M2, z1) + self._B2)
sig = th.shared(value=rng.uniform(0.2, 0.2, noise_dim).astype(np.float32), name='sig', borrow=True) noise = theano_rng.normal(size=noise_dim) gen_layers = [ll.InputLayer(shape=noise_dim, input_var=noise)] gen_layers.append( nn.MoGLayer(gen_layers[-1], noise_dim=noise_dim, z=Z, sig=sig) ) # Comment this line when testing/training baseline GAN model gen_layers.append( nn.batch_norm(ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * gen_dim * 4, W=Normal(0.05), nonlinearity=nn.relu), g=None)) gen_layers.append( ll.ReshapeLayer(gen_layers[-1], (args.batch_size, gen_dim * 4, 4, 4))) gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, gen_dim * 2, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 4 -> 8 gen_layers.append( nn.batch_norm(nn.Deconv2DLayer(gen_layers[-1], (args.batch_size, gen_dim, 16, 16), (5, 5), W=Normal(0.05), nonlinearity=nn.relu), g=None)) # 8 -> 16 gen_layers.append( nn.weight_norm(nn.Deconv2DLayer(gen_layers[-1],
# real_fc3 = LL.get_output(enc_layer_fc3, x, deterministic=True) #y_pred, real_pool3 = LL.get_output([fc8, poo5], x, deterministic=False) # real_pool3 = LL.get_output(poo5, x, deterministic=False) #enc_error = T.mean(T.neq(T.argmax(y_pred,axis=1),y)) # classification error of the encoder, to make sure the encoder is working properly # specify generator, gen_x = G(z, real_pool3) z = theano_rng.uniform(size=(args.batch_size, 50)) # uniform noise # y_1hot = T.matrix() gen_x_layer_z = LL.InputLayer(shape=(args.batch_size, 50), input_var=z) # z, 20 # gen_x_layer_z_embed = nn.batch_norm(LL.DenseLayer(gen_x_layer_z, num_units=128), g=None) # 20 -> 64 gen_x_layer_y = LL.InputLayer(shape=(args.batch_size, 10), input_var=y_1hot) # conditioned on real fc3 activations gen_x_layer_y_z = LL.ConcatLayer([gen_x_layer_y,gen_x_layer_z],axis=1) #512+256 = 768 gen_x_layer_pool2 = LL.ReshapeLayer(nn.batch_norm(LL.DenseLayer(gen_x_layer_y_z, num_units=256*5*5)), (args.batch_size,256,5,5)) gen_x_layer_dconv2_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_pool2, (args.batch_size,256,10,10), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv2_2 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_1, (args.batch_size,128,14,14), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_dconv1_1 = nn.batch_norm(nn.Deconv2DLayer(gen_x_layer_dconv2_2, (args.batch_size,128,28,28), (5,5), stride=(2, 2), padding = 'half', W=Normal(0.02), nonlinearity=nn.relu)) gen_x_layer_x = nn.Deconv2DLayer(gen_x_layer_dconv1_1, (args.batch_size,3,32,32), (5,5), stride=(1, 1), padding = 'valid', W=Normal(0.02), nonlinearity=T.nnet.sigmoid) # gen_x_layer_x = dnn.Conv2DDNNLayer(gen_x_layer_dconv1_2, 3, (1,1), pad=0, stride=1, # W=Normal(0.02), nonlinearity=T.nnet.sigmoid) print(gen_x_layer_x.output_shape) gen_x_layers = [gen_x_layer_z, gen_x_layer_y, gen_x_layer_y_z, gen_x_layer_pool2, gen_x_layer_dconv2_1,
def __init__(self, n_inputs, n_outputs, n_components=1, n_filters=[], n_hiddens=[10, 10], n_rnn=None, impute_missing=True, seed=None, svi=True): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int or tuple of ints or list of ints Dimensionality of input n_outputs : int Dimensionality of output n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded svi : bool Whether to use SVI version or not """ self.impute_missing = impute_missing self.n_components = n_components self.n_filters = n_filters self.n_hiddens = n_hiddens self.n_outputs = n_outputs self.svi = svi self.iws = tt.vector('iws', dtype=dtype) if n_rnn is None: self.n_rnn = 0 else: self.n_rnn = n_rnn if self.n_rnn > 0 and len(self.n_filters) > 0: raise NotImplementedError self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) # cast n_inputs to tuple if type(n_inputs) is int: self.n_inputs = (n_inputs, ) elif type(n_inputs) is list: self.n_inputs = tuple(n_inputs) elif type(n_inputs) is tuple: self.n_inputs = n_inputs else: raise ValueError('n_inputs type not supported') # compose layers self.layer = collections.OrderedDict() # stats : input placeholder, (batch, *self.n_inputs) if len(self.n_inputs) + 1 == 2: self.stats = tt.matrix('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 3: self.stats = tt.tensor3('stats', dtype=dtype) elif len(self.n_inputs) + 1 == 4: self.stats = tt.tensor4('stats', dtype=dtype) else: raise NotImplementedError # input layer self.layer['input'] = ll.InputLayer((None, *self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = dl.ImputeMissingLayer( last(self.layer), n_inputs=self.n_inputs) else: self.layer['missing'] = dl.ReplaceMissingLayer( last(self.layer), n_inputs=self.n_inputs) # recurrent neural net # expects shape (batch, sequence_length, num_inputs) if self.n_rnn > 0: if len(self.n_inputs) == 1: rs = (-1, *self.n_inputs, 1) self.layer['rnn_reshape'] = ll.ReshapeLayer( last(self.layer), rs) self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional layers # expects shape (batch, num_input_channels, input_rows, input_columns) if len(self.n_filters) > 0: # reshape if len(self.n_inputs) == 1: raise NotImplementedError elif len(self.n_inputs) == 2: rs = (-1, 1, *self.n_inputs) else: rs = None if rs is not None: self.layer['conv_reshape'] = ll.ReshapeLayer( last(self.layer), rs) # add layers for l in range(len(n_filters)): self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=n_filters[l], filter_size=3, stride=(2, 2), pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) # flatten self.layer['flatten'] = ll.FlattenLayer(incoming=last(self.layer), outdim=2) # hidden layers for l in range(len(n_hiddens)): self.layer['hidden_' + str(l + 1)] = dl.FullyConnectedLayer( last(self.layer), n_units=n_hiddens[l], svi=svi, name='h' + str(l + 1)) last_hidden = last(self.layer) # mixture layers self.layer['mixture_weights'] = dl.MixtureWeightsLayer( last_hidden, n_units=n_components, actfun=lnl.softmax, svi=svi, name='weights') self.layer['mixture_means'] = dl.MixtureMeansLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='means') self.layer['mixture_precisions'] = dl.MixturePrecisionsLayer( last_hidden, n_components=n_components, n_dim=n_outputs, svi=svi, name='precisions') last_mog = [ self.layer['mixture_weights'], self.layer['mixture_means'], self.layer['mixture_precisions'] ] # output placeholder self.params = tt.matrix('params', dtype=dtype) # (batch, self.n_outputs) # mixture parameters # a : weights, matrix with shape (batch, n_components) # ms : means, list of len n_components with (batch, n_dim, n_dim) # Us : precision factors, n_components list with (batch, n_dim, n_dim) # ldetUs : log determinants of precisions, n_comp list with (batch, ) self.a, self.ms, precision_out = ll.get_output(last_mog, deterministic=False) self.Us = precision_out['Us'] self.ldetUs = precision_out['ldetUs'] self.comps = { **{ 'a': self.a }, **{'m' + str(i): self.ms[i] for i in range(self.n_components)}, **{'U' + str(i): self.Us[i] for i in range(self.n_components)} } # log probability of y given the mixture distribution # lprobs_comps : log probs per component, list of len n_components with (batch, ) # probs : log probs of mixture, (batch, ) self.lprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.ms, self.Us, self.ldetUs) ] self.lprobs = (MyLogSumExp(tt.stack(self.lprobs_comps, axis=1) + tt.log(self.a), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # the quantities from above again, but with deterministic=True # --- in the svi case, this will disable injection of randomness; # the mean of weights is used instead self.da, self.dms, dprecision_out = ll.get_output(last_mog, deterministic=True) self.dUs = dprecision_out['Us'] self.dldetUs = dprecision_out['ldetUs'] self.dcomps = { **{ 'a': self.da }, **{'m' + str(i): self.dms[i] for i in range(self.n_components)}, **{'U' + str(i): self.dUs[i] for i in range(self.n_components)} } self.dlprobs_comps = [ -0.5 * tt.sum(tt.sum( (self.params - m).dimshuffle([0, 'x', 1]) * U, axis=2)**2, axis=1) + ldetU for m, U, ldetU in zip(self.dms, self.dUs, self.dldetUs) ] self.dlprobs = (MyLogSumExp(tt.stack(self.dlprobs_comps, axis=1) + tt.log(self.da), axis=1) \ - (0.5 * self.n_outputs * np.log(2 * np.pi))).squeeze() # parameters of network self.aps = ll.get_all_params(last_mog) # all parameters self.mps = ll.get_all_params(last_mog, mp=True) # means self.sps = ll.get_all_params(last_mog, sp=True) # log stds # weight and bias parameter sets as seperate lists self.mps_wp = ll.get_all_params(last_mog, mp=True, wp=True) self.sps_wp = ll.get_all_params(last_mog, sp=True, wp=True) self.mps_bp = ll.get_all_params(last_mog, mp=True, bp=True) self.sps_bp = ll.get_all_params(last_mog, sp=True, bp=True) # theano functions self.compile_funs() self.iws = tt.vector('iws', dtype=dtype)
def fcn(input_var, early_conv_dict, late_conv_dict, dense_filter_size, final_pool_function=T.max, input_size=128, output_size=188, p_dropout=0.5): ''' early_conv_dict: dict it contains the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' pool_filter_list: list each element is an integer pool_stride_list: list each element is int or None late_conv_dict: dict it contains the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' dense_filter_size: int the filter size of the final dense-like conv layer ''' # early conv layers input_network = lasagne.layers.InputLayer(shape=(None, 1, None, input_size), input_var=input_var) total_stride = 1 network, total_stride = conv_layers(input_network, early_conv_dict, total_stride, init_input_size=input_size, p_dropout=0, base_name='early') # late conv layers (dense layers) network, total_stride = conv_layers(network, late_conv_dict, total_stride, init_input_size=1, p_dropout=p_dropout, base_name='late') # frame output layer. every frame has a value network = cl.Conv2DXLayer(lasagne.layers.dropout(network, p=p_dropout), num_filters=output_size, filter_size=(dense_filter_size, 1), nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform()) # pool network = layers.GlobalPoolLayer(network, pool_function=final_pool_function) network = layers.ReshapeLayer(network, ([0], -1)) return network
nonlinearity=nn.relu))) # embedding, 50 -> 128 gen0_layer_z_embed = gen0_layers[-1] gen0_layers.append( LL.InputLayer(shape=(args.batch_size, 256), input_var=real_fc3) ) # Input layer for real_fc3 in independent training, gen_fc3 in joint training gen0_layer_fc3 = gen0_layers[-1] gen0_layers.append( LL.ConcatLayer([gen0_layer_fc3, gen0_layer_z_embed], axis=1)) # concatenate noise and fc3 features gen0_layers.append( LL.ReshapeLayer( nn.batch_norm( LL.DenseLayer(gen0_layers[-1], num_units=256 * 5 * 5, W=Normal(0.02), nonlinearity=T.nnet.relu)), (args.batch_size, 256, 5, 5))) # fc gen0_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size, 256, 10, 10), (5, 5), stride=(2, 2), padding='half', W=Normal(0.02), nonlinearity=nn.relu))) # deconv gen0_layers.append( nn.batch_norm( nn.Deconv2DLayer(gen0_layers[-1], (args.batch_size, 128, 14, 14), (5, 5),
def p_fcn(input_var, early_conv_dict, pl_dict, late_conv_dict, dense_filter_size, final_pool_function=T.max, input_size=128, output_size=188, p_dropout=0.5): ''' This uses PL 2D layer instead 2D layer, comparing to fcn_pnn_multisource early_conv_dict_list: list each element in the list is a dictionary containing the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' pool_filter_list: list each element is an integer pool_stride_list: list each element is int or None pl_dict: dict it contains the following keys: 'num_lambda', 'num_points', 'value_range', 'seg_size', 'seg_stride' late_conv_dict: dict it contains the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' dense_filter_size: int the filter size of the final dense-like conv layer ''' # early conv layers input_network = lasagne.layers.InputLayer(shape=(None, 1, None, input_size), input_var=input_var) total_stride = 1 network, total_stride = conv_layers(input_network, early_conv_dict, total_stride, init_input_size=input_size, p_dropout=0, base_name='early') # Persistence landscape num_lambda = pl_dict['num_lambda'] num_points = pl_dict['num_points'] value_range = pl_dict['value_range'] seg_size = pl_dict['seg_size'] seg_step = pl_dict['seg_step'] patch_size = (seg_size, 1) patch_step = (seg_step, 1) network = cl.PersistenceFlatten2DLayer(network, num_lambda, num_points, value_range, patch_size, patch_step) # late conv layers (dense layers) network, total_stride = conv_layers(network, late_conv_dict, total_stride, init_input_size=1, p_dropout=p_dropout, base_name='late') # frame output layer. every frame has a value network = cl.Conv2DXLayer(lasagne.layers.dropout(network, p=p_dropout), num_filters=output_size, filter_size=(dense_filter_size, 1), nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform()) # pool network = layers.GlobalPoolLayer(network, pool_function=final_pool_function) network = layers.ReshapeLayer(network, ([0], -1)) return network
sym_z_rand = theano_rng.uniform(size=(batch_size_g, n_z)) sym_z_shared = T.tile(theano_rng.uniform((batch_size_g / num_classes, n_z)), (num_classes, 1)) '''models''' gen_in_z = ll.InputLayer(shape=(batch_size_g, n_z)) gen_in_y = ll.InputLayer(shape=(batch_size_g, )) gen_layers = [gen_in_z] # gen_layers = [(nn.MoGLayer(gen_in_z, noise_dim=(batch_size_g, n_z)))] gen_layers.append(nn.MLPConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( ll.DenseLayer(gen_layers[-1], num_units=4 * 4 * 512, W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(ll.ReshapeLayer(gen_layers[-1], (batch_size_g, 512, 4, 4))) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (batch_size_g, 256, 8, 8), (5, 5), W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (batch_size_g, 128, 16, 16), (5, 5), W=Normal(0.05), nonlinearity=nn.relu)) gen_layers.append(nn.batch_norm(gen_layers[-1], g=None)) gen_layers.append(nn.ConvConcatLayer([gen_layers[-1], gen_in_y], num_classes)) gen_layers.append( nn.Deconv2DLayer(gen_layers[-1], (batch_size_g, 3, 32, 32), (5, 5),
def pc_fcn(input_var, early_conv_dict, middle_conv_dict, pl_dict, late_conv_dict, dense_filter_size, final_pool_function=T.max, input_size=128, output_size=188, p_dropout=0.5): ''' early_conv_dict_list: list each element in the list is a dictionary containing the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' pl_dict: dict it contains the following keys: 'num_lambda', 'num_points', 'value_range', 'seg_size', 'seg_stride' late_conv_dict: dict it contains the following keys: 'conv_filter_list', 'pool_filter_list', 'pool_stride_list' dense_filter_size: int the filter size of the final dense-like conv layer pool_filter_list: list each element is an integer pool_stride_list: list each element is int or None ''' # early conv layers input_network = lasagne.layers.InputLayer(shape=(None, 1, None, input_size), input_var=input_var) total_stride = 1 network, total_stride = conv_layers(input_network, early_conv_dict, total_stride, init_input_size=input_size, p_dropout=0, base_name='early') # middle conv layers (dense layers) network_c, _ = conv_layers(network, middle_conv_dict, total_stride, init_input_size=1, p_dropout=0, base_name='middle_conv') # Persistence landscape network_p = network try: num_lambda = pl_dict['num_lambda'] except: num_lambda = pl_dict['n_f_db'] try: num_points = pl_dict['num_points'] except: num_points = pl_dict['n_points'] value_range = pl_dict['value_range'] seg_size = pl_dict['seg_size'] seg_step = pl_dict['seg_step'] patch_size = (seg_size, 1) patch_step = (seg_step, 1) network_p = cl.PersistenceFlatten2DLayer(network_p, num_lambda, num_points, value_range, patch_size, patch_step) # Convolution+Persistence network = layers.ConcatLayer([network_c, network_p], axis=1, cropping=[None, None, 'lower', None], name='Convolution+Persistence') # late conv layers (dense layers) network, total_stride = conv_layers(network, late_conv_dict, total_stride, init_input_size=1, p_dropout=p_dropout, base_name='late') # frame output layer. every frame has a value network = cl.Conv2DXLayer(lasagne.layers.dropout(network, p=p_dropout), num_filters=output_size, filter_size=(dense_filter_size, 1), nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.GlorotUniform()) # pool network = layers.GlobalPoolLayer(network, pool_function=final_pool_function) network = layers.ReshapeLayer(network, ([0], -1)) return network
from sklearn.preprocessing import StandardScaler from sklearn.externals import joblib from model.custom_layer import STFTLayer, MelSpecLayer from preprocess.prepare_task import get_intersection from utils.misc import load_audio, pmap from tqdm import tqdm SONG_ROOT = '/mnt/msd/songs/' MEL_ROOT = '/mnt/msdmel/' SR = 22050 # build CudaMel l_in = L.InputLayer((None, 2, None)) l_stft = STFTLayer(L.ReshapeLayer(l_in, ([0], [1], [2], 1)), n_ch=2, n_fft=1024, hop_size=256, log_amplitude=False) l_mel = MelSpecLayer(l_stft, sr=22050, n_fft=1024, log_amplitude=True) out = L.get_output(l_mel, deterministic=True) melspec = theano.function([l_in.input_var], out) def prepare_scaler(): tids = get_intersection() path_map = pkl.load(open('/mnt/msd/MSD_to_path.pkl')) path = filter( lambda x: x[1] is not None, map(
def build_network(self, K, vocab_size, W_init): l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0]) l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1]) l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2]) l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3]) l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6]) l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7]) l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[8]) l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[9]) l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11]) l_match_feat = L.InputLayer(shape=(None, None, None), input_var=self.inps[13]) l_match_feat = L.EmbeddingLayer(l_match_feat, 2, 1) l_match_feat = L.ReshapeLayer(l_match_feat, (-1, [1], [2])) l_use_char = L.InputLayer(shape=(None, None, self.feat_cnt), input_var=self.inps[14]) l_use_char_q = L.InputLayer(shape=(None, None, self.feat_cnt), input_var=self.inps[15]) doc_shp = self.inps[1].shape qry_shp = self.inps[3].shape l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, output_size=self.embed_dim, W=W_init) # B x N x 1 x DE l_doce = L.ReshapeLayer( l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim)) # B x N x DE l_qembed = L.EmbeddingLayer(l_qin, input_size=vocab_size, output_size=self.embed_dim, W=l_docembed.W) if self.train_emb == 0: l_docembed.params[l_docembed.W].remove('trainable') l_qembed.params[l_qembed.W].remove('trainable') l_qembed = L.ReshapeLayer( l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim)) # B x N x DE l_fembed = L.EmbeddingLayer(l_featin, input_size=2, output_size=2) # B x N x 2 # char embeddings if self.use_chars: # ====== concatenation ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 2*self.char_dim) # T x L x D # l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, # mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, # only_return_final=True) # l_bgru = L.GRULayer(l_lookup, 2*self.char_dim, grad_clipping=GRAD_CLIP, # mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, # backwards=True, only_return_final=True) # T x 2D # l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim/2, nonlinearity=None) # T x DE/2 # l_bckembed = L.DenseLayer(l_bgru, self.embed_dim/2, nonlinearity=None) # T x DE/2 # l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1) # l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2 # l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2 # l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2) # l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2) # ====== bidir feat concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2) # ====== char concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_qchar_embed, l_qembed], axis = 2) # ====== feat concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2) # ====== gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== tie gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed], W = l_doce.W, b = l_doce.b) # ====== scalar gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = ScalarDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = ScalarDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== dibirectional gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== gate + concat ====== l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=True) l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) l_doce = L.ConcatLayer([l_use_char, l_doce], axis=2) l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis=2) # ====== bidirectional gate + concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # l_doce = L.ConcatLayer([l_use_char, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis = 2) attentions = [] if self.save_attn: l_m = PairwiseInteractionLayer([l_doce, l_qembed]) attentions.append(L.get_output(l_m, deterministic=True)) for i in range(K - 1): l_fwd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2) # B x N x DE l_fwd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1], axis=2) # B x Q x DE l_doce = MatrixAttentionLayer( [l_doc_1, l_q_c_1, l_qmask, l_match_feat]) # l_doce = MatrixAttentionLayer([l_doc_1, l_q_c_1, l_qmask]) # === begin GA === # l_m = PairwiseInteractionLayer([l_doc_1, l_q_c_1]) # l_doc_2_in = GatedAttentionLayer([l_doc_1, l_q_c_1, l_m], mask_input=self.inps[7]) # l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE # === end GA === # if self.save_attn: # attentions.append(L.get_output(l_m, deterministic=True)) if self.use_feat: l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2) # B x N x DE+2 l_fwd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2) l_fwd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=False) l_bkd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=False) l_q = L.ConcatLayer([l_fwd_q, l_bkd_q], axis=2) # B x Q x 2D if self.save_attn: l_m = PairwiseInteractionLayer([l_doc, l_q]) attentions.append(L.get_output(l_m, deterministic=True)) l_prob = AttentionSumLayer([l_doc, l_q], self.inps[4], self.inps[12], mask_input=self.inps[10]) final = L.get_output(l_prob) final_v = L.get_output(l_prob, deterministic=True) return final, final_v, l_prob, l_docembed.W, attentions
def __init__(self, n_inputs=None, n_outputs=None, input_shape=None, n_bypass=0, density='mog', n_hiddens=(10, 10), impute_missing=True, seed=None, n_filters=(), filter_sizes=3, pool_sizes=2, n_rnn=0, **density_opts): """Initialize a mixture density network with custom layers Parameters ---------- n_inputs : int Total input dimensionality (data/summary stats) n_outputs : int Dimensionality of output (simulator parameters) input_shape : tuple Size to which data are reshaped before CNN or RNN n_bypass : int Number of elements at end of input which bypass CNN or RNN density : string Type of density condition on the network, can be 'mog' or 'maf' n_components : int Number of components of the mixture density n_filters : list of ints Number of filters per convolutional layer n_hiddens : list of ints Number of hidden units per fully connected layer n_rnn : None or int Number of RNN units impute_missing : bool If set to True, learns replacement value for NaNs, otherwise those inputs are set to zero seed : int or None If provided, random number generator will be seeded density_opts : dict Options for the density estimator """ if n_rnn > 0 and len(n_filters) > 0: raise NotImplementedError assert isint(n_inputs) and isint(n_outputs)\ and n_inputs > 0 and n_outputs > 0 self.density = density.lower() self.impute_missing = impute_missing self.n_hiddens = list(n_hiddens) self.n_outputs, self.n_inputs = n_outputs, n_inputs self.n_bypass = n_bypass self.n_rnn = n_rnn self.n_filters, self.filter_sizes, self.pool_sizes, n_cnn = \ list(n_filters), filter_sizes, pool_sizes, len(n_filters) if type(self.filter_sizes) is int: self.filter_sizes = [self.filter_sizes for _ in range(n_cnn)] else: assert len(self.filter_sizes) >= n_cnn if type(self.pool_sizes) is int: self.pool_sizes = [self.pool_sizes for _ in range(n_cnn)] else: assert len(self.pool_sizes) >= n_cnn self.iws = tt.vector('iws', dtype=dtype) self.seed = seed if seed is not None: self.rng = np.random.RandomState(seed=seed) else: self.rng = np.random.RandomState() lasagne.random.set_rng(self.rng) self.input_shape = (n_inputs,) if input_shape is None else input_shape assert np.prod(self.input_shape) + self.n_bypass == self.n_inputs assert 1 <= len(self.input_shape) <= 3 # params: output placeholder (batch, self.n_outputs) self.params = tensorN(2, name='params', dtype=dtype) # stats : input placeholder, (batch, self.n_inputs) self.stats = tensorN(2, name='stats', dtype=dtype) # compose layers self.layer = collections.OrderedDict() # input layer, None indicates batch size not fixed at compile time self.layer['input'] = ll.InputLayer( (None, self.n_inputs), input_var=self.stats) # learn replacement values if self.impute_missing: self.layer['missing'] = \ dl.ImputeMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) else: self.layer['missing'] = \ dl.ReplaceMissingLayer(last(self.layer), n_inputs=(self.n_inputs,)) if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): last_layer = last(self.layer) bypass_slice = slice(self.n_inputs - self.n_bypass, self.n_inputs) direct_slice = slice(0, self.n_inputs - self.n_bypass) self.layer['bypass'] = ll.SliceLayer(last_layer, bypass_slice) self.layer['direct'] = ll.SliceLayer(last_layer, direct_slice) # reshape inputs prior to RNN or CNN step if self.n_rnn > 0 or n_cnn > 0: if len(n_filters) > 0 and len(self.input_shape) == 2: # 1 channel rs = (-1, 1, *self.input_shape) else: if self.n_rnn > 0: assert len(self.input_shape) == 2 # time, dim else: assert len(self.input_shape) == 3 # channel, row, col rs = (-1, *self.input_shape) # last layer is 'missing' or 'direct' self.layer['reshape'] = ll.ReshapeLayer(last(self.layer), rs) # recurrent neural net, input: (batch, sequence_length, num_inputs) if self.n_rnn > 0: self.layer['rnn'] = ll.GRULayer(last(self.layer), n_rnn, only_return_final=True) # convolutional net, input: (batch, channels, rows, columns) if n_cnn > 0: for l in range(n_cnn): # add layers if self.pool_sizes[l] == 1: padding = (self.filter_sizes[l] - 1) // 2 else: padding = 0 self.layer['conv_' + str(l + 1)] = ll.Conv2DLayer( name='c' + str(l + 1), incoming=last(self.layer), num_filters=self.n_filters[l], filter_size=self.filter_sizes[l], stride=(1, 1), pad=padding, untie_biases=False, W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), nonlinearity=lnl.rectify, flip_filters=True, convolution=tt.nnet.conv2d) if self.pool_sizes[l] > 1: self.layer['pool_' + str(l + 1)] = ll.MaxPool2DLayer( name='p' + str(l + 1), incoming=last(self.layer), pool_size=self.pool_sizes[l], stride=None, ignore_border=True) # flatten self.layer['flatten'] = ll.FlattenLayer( incoming=last(self.layer), outdim=2) # incorporate bypass inputs if self.n_bypass > 0 and (self.n_rnn > 0 or n_cnn > 0): self.layer['bypass_merge'] = lasagne.layers.ConcatLayer( [self.layer['bypass'], last(self.layer)], axis=1) if self.density == 'mog': self.init_mdn(**density_opts) elif self.density == 'maf': self.init_maf(**density_opts) else: raise NotImplementedError self.compile_funs() # theano functions
def __init__(self, train_raw, test_raw, dim, mode, l2, l1, batch_norm, dropout, batch_size, ihm_C, los_C, ph_C, decomp_C, partition, nbins, **kwargs): print "==> not used params in network class:", kwargs.keys() self.train_raw = train_raw self.test_raw = test_raw self.dim = dim self.mode = mode self.l2 = l2 self.l1 = l1 self.batch_norm = batch_norm self.dropout = dropout self.batch_size = batch_size self.ihm_C = ihm_C self.los_C = los_C self.ph_C = ph_C self.decomp_C = decomp_C self.nbins = nbins if (partition == 'log'): self.get_bin = metrics.get_bin_log self.get_estimate = metrics.get_estimate_log else: assert self.nbins == 10 self.get_bin = metrics.get_bin_custom self.get_estimate = metrics.get_estimate_custom self.train_batch_gen = self.get_batch_gen(self.train_raw) self.test_batch_gen = self.get_batch_gen(self.test_raw) self.input_var = T.tensor3('X') self.input_lens = T.ivector('L') self.ihm_pos = T.ivector('ihm_pos') self.ihm_mask = T.ivector('ihm_mask') self.ihm_label = T.ivector('ihm_label') self.los_mask = T.imatrix('los_mask') self.los_label = T.matrix('los_label') # for regression #self.los_label = T.imatrix('los_label') self.ph_label = T.imatrix('ph_label') self.decomp_mask = T.imatrix('decomp_mask') self.decomp_label = T.imatrix('decomp_label') print "==> Building neural network" # common network network = layers.InputLayer((None, None, self.train_raw[0][0].shape[1]), input_var=self.input_var) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) network = layers.LSTMLayer(incoming=network, num_units=dim, only_return_final=False, grad_clipping=10, ingate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), forgetgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1)), cell=lasagne.layers.Gate(W_cell=None, nonlinearity=lasagne.nonlinearities.tanh, W_in=Orthogonal(), W_hid=Orthogonal()), outgate=lasagne.layers.Gate( W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=Normal(0.1))) if (self.dropout > 0): network = layers.DropoutLayer(network, p=self.dropout) lstm_output = layers.get_output(network) self.params = layers.get_all_params(network, trainable=True) self.reg_params = layers.get_all_params(network, regularizable=True) # for each example in minibatch take the last output last_outputs = [] for index in range(self.batch_size): last_outputs.append(lstm_output[index, self.input_lens[index]-1, :]) last_outputs = T.stack(last_outputs) # take 48h outputs for fixed mortality task mid_outputs = [] for index in range(self.batch_size): mid_outputs.append(lstm_output[index, self.ihm_pos[index], :]) mid_outputs = T.stack(mid_outputs) # in-hospital mortality related network ihm_network = layers.InputLayer((None, dim), input_var=mid_outputs) ihm_network = layers.DenseLayer(incoming=ihm_network, num_units=2, nonlinearity=softmax) self.ihm_prediction = layers.get_output(ihm_network) self.ihm_det_prediction = layers.get_output(ihm_network, deterministic=True) self.params += layers.get_all_params(ihm_network, trainable=True) self.reg_params += layers.get_all_params(ihm_network, regularizable=True) self.ihm_loss = (self.ihm_mask * categorical_crossentropy(self.ihm_prediction, self.ihm_label)).mean() # length of stay related network # Regression los_network = layers.InputLayer((None, None, dim), input_var=lstm_output) los_network = layers.ReshapeLayer(los_network, (-1, dim)) los_network = layers.DenseLayer(incoming=los_network, num_units=1, nonlinearity=rectify) los_network = layers.ReshapeLayer(los_network, (lstm_output.shape[0], -1)) self.los_prediction = layers.get_output(los_network) self.los_det_prediction = layers.get_output(los_network, deterministic=True) self.params += layers.get_all_params(los_network, trainable=True) self.reg_params += layers.get_all_params(los_network, regularizable=True) self.los_loss = (self.los_mask * squared_error(self.los_prediction, self.los_label)).mean(axis=1).mean(axis=0) # phenotype related network ph_network = layers.InputLayer((None, dim), input_var=last_outputs) ph_network = layers.DenseLayer(incoming=ph_network, num_units=25, nonlinearity=sigmoid) self.ph_prediction = layers.get_output(ph_network) self.ph_det_prediction = layers.get_output(ph_network, deterministic=True) self.params += layers.get_all_params(ph_network, trainable=True) self.reg_params += layers.get_all_params(ph_network, regularizable=True) self.ph_loss = nn_utils.multilabel_loss(self.ph_prediction, self.ph_label) # decompensation related network decomp_network = layers.InputLayer((None, None, dim), input_var=lstm_output) decomp_network = layers.ReshapeLayer(decomp_network, (-1, dim)) decomp_network = layers.DenseLayer(incoming=decomp_network, num_units=2, nonlinearity=softmax) decomp_network = layers.ReshapeLayer(decomp_network, (lstm_output.shape[0], -1, 2)) self.decomp_prediction = layers.get_output(decomp_network)[:, :, 1] self.decomp_det_prediction = layers.get_output(decomp_network, deterministic=True)[:, :, 1] self.params += layers.get_all_params(decomp_network, trainable=True) self.reg_params += layers.get_all_params(decomp_network, regularizable=True) self.decomp_loss = nn_utils.multilabel_loss_with_mask(self.decomp_prediction, self.decomp_label, self.decomp_mask) """ data = next(self.train_batch_gen) print max(data[1]) print lstm_output.eval({self.input_var:data[0]}).shape exit() """ if self.l2 > 0: self.loss_l2 = self.l2 * nn_utils.l2_reg(self.reg_params) else: self.loss_l2 = T.constant(0) if self.l1 > 0: self.loss_l1 = self.l1 * nn_utils.l1_reg(self.reg_params) else: self.loss_l1 = T.constant(0) self.reg_loss = self.loss_l1 + self.loss_l2 self.loss = (ihm_C * self.ihm_loss + los_C * self.los_loss + ph_C * self.ph_loss + decomp_C * self.decomp_loss + self.reg_loss) #updates = lasagne.updates.adadelta(self.loss, self.params, # learning_rate=0.001) #updates = lasagne.updates.momentum(self.loss, self.params, # learning_rate=0.00003) #updates = lasagne.updates.adam(self.loss, self.params) updates = lasagne.updates.adam(self.loss, self.params, beta1=0.5, learning_rate=0.0001) # from DCGAN paper #updates = lasagne.updates.nesterov_momentum(loss, params, momentum=0.9, # learning_rate=0.001, all_inputs = [self.input_var, self.input_lens, self.ihm_pos, self.ihm_mask, self.ihm_label, self.los_mask, self.los_label, self.ph_label, self.decomp_mask, self.decomp_label] train_outputs = [self.ihm_prediction, self.los_prediction, self.ph_prediction, self.decomp_prediction, self.loss, self.ihm_loss, self.los_loss, self.ph_loss, self.decomp_loss, self.reg_loss] test_outputs = [self.ihm_det_prediction, self.los_det_prediction, self.ph_det_prediction, self.decomp_det_prediction, self.loss, self.ihm_loss, self.los_loss, self.ph_loss, self.decomp_loss, self.reg_loss] ## compiling theano functions if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=all_inputs, outputs=train_outputs, updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=all_inputs, outputs=test_outputs)
def build_1Dregression_v1(input_var=None, input_width=None, nin_units=12, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=0.15) # Input layer l_in = LL.InputLayer(shape=(None, 22, input_width), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer l_in = LL.NINLayer(l_in, num_units=nin_units, nonlinearity=lasagne.nonlinearities.linear) l_in_1 = LL.DimshuffleLayer(l_in, (0, 2, 1)) # RNN layers for h in h_num_units: # Forward layers l_forward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=False, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) # Backward layers l_backward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=True, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_in_1 = LL.ElemwiseSumLayer([l_forward_0c, l_backward_0c]) # Output layers network_0a = LL.ReshapeLayer(l_in_1, (-1, h_num_units[-1])) network_0b = LL.DenseLayer(network_0a, num_units=output_width, nonlinearity=nonlin) network_0c = LL.ReshapeLayer(network_0b, (batchsize, input_width, output_width)) output_net_1 = LL.FlattenLayer(network_0c, outdim=2) output_net_2 = LL.FeaturePoolLayer(output_net_1, pool_size=input_width, pool_function=T.mean) return output_net_2
#params = l.get_params() #shapes = [p.get_value().shape for p in params] #for i in shapes: n_params*=i print("layer {}, outputs shape {}".format(l,shape)) gen_filters=32 #define generator network gen.append(ll.InputLayer(shape=input_shape)) gen.append(ll.batch_norm(ll.DenseLayer(gen[-1], num_units=1024, ))) gen.append(ll.batch_norm(ll.DenseLayer(gen[-1], num_units=gen_filters*7*7, ))) gen.append(ll.ReshapeLayer(gen[-1],shape=(batch_size,gen_filters,7,7))) gen.append(ll.batch_norm(TransposedConv2DLayer( gen[-1], filter_size=(5,5), num_filters=gen_filters, stride=(2,2), crop=(2,2) ))) gen.append(ll.batch_norm(TransposedConv2DLayer( gen[-1], filter_size=(4,4),
#X_conj[:,:,:,] XX = np.array(np.real(np.fft.ifftn(X_conj*X,axes=(2,3),norm='ortho')),dtype='float32') XY = np.array(np.real(np.fft.ifftn(X_conj*Y,axes=(2,3),norm='ortho')),dtype='float32') XX = np.fft.ifftshift(XX,axes=(2,3)) XY = np.fft.ifftshift(XY,axes=(2,3)) return XX, XY, label, dx, dy frame, targets = T.tensor4(), T.tensor4() net = ll.InputLayer((None,2,100,100),input_var=frame) net = ll.Conv2DLayer(net,32,(5,5),b=None,pad='same') net = ll.Pool2DLayer(net,(2,2), mode='average_inc_pad') net = ll.Conv2DLayer(net,8,(3,3),b=None,pad='same',nonlinearity=l.nonlinearities.LeakyRectify(0.1)) net = ll.Pool2DLayer(net,(2,2), mode='average_inc_pad') net = ll.DenseLayer(net,625,b=None,nonlinearity=None) net = ll.ReshapeLayer(net,([0],1,25,25)) predict = ll.get_output(net) targets_pool = pool_2d(targets, ds=(4,4), mode='average_inc_pad') loss = T.mean((predict-targets_pool)**2) params = ll.get_all_params(net,trainable=True) updates = l.updates.adam(loss,params,0.01) train_f = theano.function([frame,targets],[loss,predict],updates=updates) data = premnist() errlist = [] for i in range(6000): x, y, move, label = mnist_data(data,(32,1,100,100),noise=None,heatmap=True,down=1) xx, xy = fftprocess(x,y) err, result = train_f(np.concatenate((xx,xy),axis=1),label)
def build_network(self, vocab_size, input_var, mask_var, docidx_var, docidx_mask, skip_connect=True): l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var) l_mask = L.InputLayer(shape=(None, None), input_var=mask_var) l_embed = L.EmbeddingLayer(l_in, input_size=vocab_size, output_size=EMBED_DIM, W=self.params['W_emb']) l_embed_noise = L.dropout(l_embed, p=DROPOUT_RATE) # NOTE: Moved initialization of forget gate biases to init_params #forget_gate_1 = L.Gate(b=lasagne.init.Constant(3)) #forget_gate_2 = L.Gate(b=lasagne.init.Constant(3)) # NOTE: LSTM layer provided by Lasagne is slightly different from that used in DeepMind's paper. # In the paper the cell-to-* weights are not diagonal. # the 1st lstm layer in_gate = L.Gate(W_in=self.params['W_lstm1_xi'], W_hid=self.params['W_lstm1_hi'], W_cell=self.params['W_lstm1_ci'], b=self.params['b_lstm1_i'], nonlinearity=lasagne.nonlinearities.sigmoid) forget_gate = L.Gate(W_in=self.params['W_lstm1_xf'], W_hid=self.params['W_lstm1_hf'], W_cell=self.params['W_lstm1_cf'], b=self.params['b_lstm1_f'], nonlinearity=lasagne.nonlinearities.sigmoid) out_gate = L.Gate(W_in=self.params['W_lstm1_xo'], W_hid=self.params['W_lstm1_ho'], W_cell=self.params['W_lstm1_co'], b=self.params['b_lstm1_o'], nonlinearity=lasagne.nonlinearities.sigmoid) cell_gate = L.Gate(W_in=self.params['W_lstm1_xc'], W_hid=self.params['W_lstm1_hc'], W_cell=None, b=self.params['b_lstm1_c'], nonlinearity=lasagne.nonlinearities.tanh) l_fwd_1 = L.LSTMLayer(l_embed_noise, NUM_HIDDEN, ingate=in_gate, forgetgate=forget_gate, cell=cell_gate, outgate=out_gate, peepholes=True, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # the 2nd lstm layer if skip_connect: # construct skip connection from the lookup table to the 2nd layer batch_size, seq_len, _ = input_var.shape # concatenate the last dimension of l_fwd_1 and embed l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN)) l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM)) to_next_layer = L.ReshapeLayer( L.concat([l_fwd_1_shp, l_embed_shp], axis=1), (batch_size, seq_len, NUM_HIDDEN + EMBED_DIM)) else: to_next_layer = l_fwd_1 to_next_layer_noise = L.dropout(to_next_layer, p=DROPOUT_RATE) in_gate = L.Gate(W_in=self.params['W_lstm2_xi'], W_hid=self.params['W_lstm2_hi'], W_cell=self.params['W_lstm2_ci'], b=self.params['b_lstm2_i'], nonlinearity=lasagne.nonlinearities.sigmoid) forget_gate = L.Gate(W_in=self.params['W_lstm2_xf'], W_hid=self.params['W_lstm2_hf'], W_cell=self.params['W_lstm2_cf'], b=self.params['b_lstm2_f'], nonlinearity=lasagne.nonlinearities.sigmoid) out_gate = L.Gate(W_in=self.params['W_lstm2_xo'], W_hid=self.params['W_lstm2_ho'], W_cell=self.params['W_lstm2_co'], b=self.params['b_lstm2_o'], nonlinearity=lasagne.nonlinearities.sigmoid) cell_gate = L.Gate(W_in=self.params['W_lstm2_xc'], W_hid=self.params['W_lstm2_hc'], W_cell=None, b=self.params['b_lstm2_c'], nonlinearity=lasagne.nonlinearities.tanh) l_fwd_2 = L.LSTMLayer(to_next_layer_noise, NUM_HIDDEN, ingate=in_gate, forgetgate=forget_gate, cell=cell_gate, outgate=out_gate, peepholes=True, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # slice final states of both lstm layers l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1) l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1) # g will be used to score the words based on their embeddings g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1), num_units=EMBED_DIM, W=self.params['W_dense'], b=self.params['b_dense'], nonlinearity=lasagne.nonlinearities.tanh) ## get outputs #g_out = L.get_output(g) # B x D #g_out_val = L.get_output(g, deterministic=True) # B x D ## compute softmax probs #probs,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm), # outputs_info=None, # sequences=[g_out,docidx_var,docidx_mask], # non_sequences=self.params['W_emb']) #predicted_probs = probs.reshape(docidx_var.shape) # B x N #probs_val,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm), # outputs_info=None, # sequences=[g_out_val,docidx_var,docidx_mask], # non_sequences=self.params['W_emb']) #predicted_probs_val = probs_val.reshape(docidx_var.shape) # B x N #return predicted_probs, predicted_probs_val # W is shared with the lookup table l_out = L.DenseLayer(g, num_units=vocab_size, W=self.params['W_emb'].T, nonlinearity=lasagne.nonlinearities.softmax, b=None) return l_out
def define_model(input_var, **kwargs): """ Defines the model and returns (network, validation network output) -Return layers.get_output(final_layer_name) if validation network output and train network output are the same -For example, return layers.get_output(final_layer_name, deterministic = true) if there is a dropout layer -Use **kwargs to pass model specific parameters """ image_size = 32 conv_filter_count = 100 conv_filter_size = 5 pool_size = 2 n_dense_units = 3000 input = layers.InputLayer(shape=(None, 3, image_size, image_size), input_var=input_var) greyscale_input = our_layers.GreyscaleLayer( incoming=input, random_greyscale=True, ) conv1 = layers.Conv2DLayer( incoming=greyscale_input, num_filters=conv_filter_count, filter_size=conv_filter_size, stride=1, nonlinearity=lasagne.nonlinearities.sigmoid, ) pool1 = layers.MaxPool2DLayer( incoming=conv1, pool_size=pool_size, stride=pool_size, ) dense1 = layers.DenseLayer( incoming=pool1, num_units=n_dense_units, nonlinearity=lasagne.nonlinearities.rectify, ) pre_unpool1 = layers.DenseLayer( incoming=dense1, num_units=conv_filter_count * (image_size + conv_filter_size - 1)**2 / (pool_size * pool_size), nonlinearity=lasagne.nonlinearities.linear, ) pre_unpool1 = layers.ReshapeLayer( incoming=pre_unpool1, shape=(input_var.shape[0], conv_filter_count) + ((image_size + conv_filter_size - 1) / 2, (image_size + conv_filter_size - 1) / 2), ) unpool1 = our_layers.Unpool2DLayer( incoming=pre_unpool1, kernel_size=2, ) deconv1 = layers.Conv2DLayer( incoming=unpool1, num_filters=3, filter_size=conv_filter_size, stride=1, nonlinearity=lasagne.nonlinearities.sigmoid, ) output = layers.ReshapeLayer(incoming=deconv1, shape=input_var.shape) return (output, layers.get_output(output))
def init_generator(self, first_layer, input_var=None, embedding_var=None): """ Initialize the DCGAN generator network using lasagne Additional units: Number of units to be added at the dense layer to compensate for embedding Returns the network """ layers = [] l_noise = lyr.InputLayer((None, 100), input_var) layers.append(l_noise) l_embedding = lyr.InputLayer((None, 300), embedding_var) layers.append(l_embedding) l_in = lyr.ConcatLayer([l_noise, l_embedding], axis=1) layers.append(l_in) l_1 = lyr.batch_norm( lyr.DenseLayer(incoming=l_in, num_units=4 * 4 * first_layer * 8, nonlinearity=nonlinearities.rectify)) l_1 = lyr.ReshapeLayer(incoming=l_1, shape=(-1, first_layer * 8, 4, 4)) layers.append(l_1) l_2 = lyr.batch_norm( lyr.Deconv2DLayer(incoming=l_1, num_filters=first_layer * 4, filter_size=5, stride=2, crop=2, output_size=8, nonlinearity=nonlinearities.rectify)) layers.append(l_2) l_3 = lyr.batch_norm( lyr.Deconv2DLayer(incoming=l_2, num_filters=first_layer * 2, filter_size=5, stride=2, crop=2, output_size=16, nonlinearity=nonlinearities.rectify)) layers.append(l_3) l_4 = lyr.batch_norm( lyr.Deconv2DLayer(incoming=l_3, num_filters=first_layer, filter_size=5, stride=2, crop=2, output_size=32, nonlinearity=nonlinearities.rectify)) layers.append(l_4) l_out = lyr.Deconv2DLayer(incoming=l_4, num_filters=3, filter_size=5, stride=2, crop=2, output_size=64, nonlinearity=nonlinearities.sigmoid) layers.append(l_out) if self.verbose: for i, layer in enumerate(layers): print 'generator layer %s output shape:' % i, layer.output_shape return l_out, l_noise, l_embedding