def build_auto_encoder_mnist_cnn(input_var=None): """ Generate an auto-encoder cnn using the Lasagne library """ # Build encoder part network = lyr.InputLayer(shape=(None, 1, 28, 28), input_var=input_var) network = lyr.Conv2DLayer(network, 64, (5, 5), W=lasagne.init.Normal()) network = lyr.MaxPool2DLayer(network, (2, 2)) network = lyr.Conv2DLayer(network, 128, (5, 5), W=lasagne.init.Normal()) network = lyr.MaxPool2DLayer(network, (2, 2)) network = lyr.FlattenLayer(network) network = lyr.DenseLayer(network, 2048, W=lasagne.init.Normal()) network = lyr.ReshapeLayer(network, (input_var.shape[0], 2048, 1, 1)) # Build decoder part network = lyr.TransposedConv2DLayer(network, 128, (5, 5), W=lasagne.init.Normal()) network = lyr.Upscale2DLayer(network, (2, 2)) network = lyr.TransposedConv2DLayer(network, 64, (4, 4), W=lasagne.init.Normal()) network = lyr.Upscale2DLayer(network, (2, 2)) network = lyr.TransposedConv2DLayer(network, 1, (3, 3), W=lasagne.init.Normal(), nonlinearity=None) return network
def __init__(self, input, scale=(2, 2)): """ Allocate an UnpoolLayer """ self.input = input self.output = layers.Upscale2DLayer(self.input, scale)
def build_autoencoder_network(): input_var = T.tensor4('input_var'); layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var); layer = batch_norm(layers.Conv2DLayer(layer, 80, filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 80, filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 80, filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 80, filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 100, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 100, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Conv2DLayer(layer, 120, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify)); prely = batch_norm(layers.Conv2DLayer(layer, 140, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify)); featm = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify)); feat_map = batch_norm(layers.Conv2DLayer(featm, 100, filter_size=(1,1), nonlinearity=rectify, name="feat_map")); maskm = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify)); mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1, filter_size=(1,1), nonlinearity=None), beta=None, gamma=None); mask_map = SoftThresPerc(mask_rep, perc=99.9, alpha=0.5, beta=init.Constant(0.1), tight=100.0, name="mask_map"); layer = ChInnerProdMerge(feat_map, mask_map, name="encoder"); layer = batch_norm(layers.Deconv2DLayer(layer, 140, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 120, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 100, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 100, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 80, filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 80, filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 80, filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = batch_norm(layers.Deconv2DLayer(layer, 80, filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify)); layer = layers.Deconv2DLayer(layer, 3, filter_size=(1,1), stride=1, crop='same', nonlinearity=identity); glblf = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify)); glblf = layers.Pool2DLayer(glblf, pool_size=(20,20), stride=20, mode='average_inc_pad'); glblf = batch_norm(layers.Conv2DLayer(glblf, 64, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Conv2DLayer(glblf, 3, filter_size=(1,1), nonlinearity=rectify), name="global_feature"); glblf = batch_norm(layers.Deconv2DLayer(glblf, 64, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 64, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = layers.Upscale2DLayer(glblf, scale_factor=20); glblf = batch_norm(layers.Deconv2DLayer(glblf, 48, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 48, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 48, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 32, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 32, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = batch_norm(layers.Deconv2DLayer(glblf, 32, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify)); glblf = layers.Deconv2DLayer(glblf, 3, filter_size=(1,1), stride=1, crop='same', nonlinearity=identity); layer = layers.ElemwiseSumLayer([layer, glblf]); network = ReshapeLayer(layer, ([0], -1)); mask_var = lasagne.layers.get_output(mask_map); output_var = lasagne.layers.get_output(network); return network, input_var, mask_var, output_var;
def _invert_Conv2DLayer(self, layer, feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) feeder = self._get_normalised_relevance_layer(layer, feeder) f_s = layer.filter_size if layer.pad == 'same': pad = 'same' elif layer.pad == 'valid' or layer.pad == (0, 0): pad = 'full' else: raise RuntimeError("Define your padding as full or same.") # By definition the # Flip filters must be on to be a proper deconvolution. num_filters = L.get_output_shape(layer.input_layer)[1] if layer.stride == (4, 4): # Todo: similar code gradient based explainers. Merge. feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) conv_layer = output_layer tmp = L.SliceLayer(output_layer, slice(0, -3), axis=3) output_layer = L.SliceLayer(tmp, slice(0, -3), axis=2) output_layer.W = conv_layer.W else: output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) W = output_layer.W # Do the multiplication. x_layer = L.ReshapeLayer(layer.input_layer, (-1, ) + L.get_output_shape(output_layer)[1:]) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def projection(l_inp, fixed_nchan): # twice normal channels when projecting! l = g_register(ll.Upscale2DLayer(l_inp, 2, mode='repeat')) if fixed_nchan: n_filters = fixed_nchan else: n_filters = l_inp.output_shape[1] // 2 l = g_register( deconv(l, (None, n_filters, l_inp.output_shape[2] * 2, l_inp.output_shape[3] * 2), filter_size=(1, 1), stride=(1, 1), nonlinearity=None, b=None)) return l
def _invert_GlobalPoolLayer(self, layer, feeder): assert isinstance(layer, L.GlobalPoolLayer) assert layer.pool_function == T.mean assert len(L.get_output_shape(layer.input_layer)) == 4 target_shape = L.get_output_shape(feeder) + (1, 1) if target_shape[0] is None: target_shape = (-1, ) + target_shape[1:] feeder = L.ReshapeLayer(feeder, target_shape) upscaling = L.get_output_shape(layer.input_layer)[2:] feeder = L.Upscale2DLayer(feeder, upscaling) def expression(x): return x / np.prod(upscaling).astype(theano.config.floatX) feeder = L.ExpressionLayer(feeder, expression) return feeder
def modelinv(y, fnet): net = {} net['input'] = layers.InputLayer(shape=(None, 10), input_var=y) net['input'] = layers.ReshapeLayer(net['input'], (-1, 10, 1, 1)) net['ipool3'] = layers.Upscale2DLayer(net['input'], 8) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool3'], fnet['cccp6'].b) net['icccp6'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp6'].input_shape[1], filter_size=fnet['cccp6'].filter_size, stride=fnet['cccp6'].stride, crop=fnet['cccp6'].pad, W=fnet['cccp6'].W, b=None, flip_filters=not fnet['cccp6'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp6'], fnet['cccp5'].b) net['icccp5'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp5'].input_shape[1], filter_size=fnet['cccp5'].filter_size, stride=fnet['cccp5'].stride, crop=fnet['cccp5'].pad, W=fnet['cccp5'].W, b=None, flip_filters=not fnet['cccp5'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp5'], fnet['conv3'].b) net['iconv3'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv3'].input_shape[1], filter_size=fnet['conv3'].filter_size, stride=fnet['conv3'].stride, crop=fnet['conv3'].pad, W=fnet['conv3'].W, b=None, flip_filters=not fnet['conv3'].flip_filters) net['ipool2'] = layers.Upscale2DLayer(net['iconv3'], 2) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool2'], fnet['cccp4'].b) net['icccp4'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp4'].input_shape[1], filter_size=fnet['cccp4'].filter_size, stride=fnet['cccp4'].stride, crop=fnet['cccp4'].pad, W=fnet['cccp4'].W, b=None, flip_filters=not fnet['cccp4'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp4'], fnet['cccp3'].b) net['icccp3'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp3'].input_shape[1], filter_size=fnet['cccp3'].filter_size, stride=fnet['cccp3'].stride, crop=fnet['cccp3'].pad, W=fnet['cccp3'].W, b=None, flip_filters=not fnet['cccp3'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp3'], fnet['conv2'].b) net['iconv2'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv2'].input_shape[1], filter_size=fnet['conv2'].filter_size, stride=fnet['conv2'].stride, crop=fnet['conv2'].pad, W=fnet['conv2'].W, b=None, flip_filters=not fnet['conv2'].flip_filters) net['ipool1'] = layers.Upscale2DLayer(net['iconv2'], 2) biasremove = myUtils.layers.RemoveBiasLayer(net['ipool1'], fnet['cccp2'].b) net['icccp2'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp2'].input_shape[1], filter_size=fnet['cccp2'].filter_size, stride=fnet['cccp2'].stride, crop=fnet['cccp2'].pad, W=fnet['cccp2'].W, b=None, flip_filters=not fnet['cccp2'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp2'], fnet['cccp1'].b) net['icccp1'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['cccp1'].input_shape[1], filter_size=fnet['cccp1'].filter_size, stride=fnet['cccp1'].stride, crop=fnet['cccp1'].pad, W=fnet['cccp1'].W, b=None, flip_filters=not fnet['cccp1'].flip_filters) biasremove = myUtils.layers.RemoveBiasLayer(net['icccp1'], fnet['conv1'].b) net['iconv1'] = layers.Deconv2DLayer( biasremove, num_filters=fnet['conv1'].input_shape[1], filter_size=fnet['conv1'].filter_size, stride=fnet['conv1'].stride, crop=fnet['conv1'].pad, W=fnet['conv1'].W, b=None, flip_filters=not fnet['conv1'].flip_filters) net['out'] = net['iconv1'] return net
def cnn_autoencoder(input_var=None): """ Build the network using Lasagne library """ ################## # Network config # ################## input_channels = 3 weight_init = lasagne.init.Normal() # encoder conv1_nb_filt = 32 conv1_sz_filt = (9, 9) conv1_sz_padd = 2 # conv1 output size = (60, 60) pool1_sz = (2, 2) # pool1 output size = (30, 30) conv2_nb_filt = 64 conv2_sz_filt = (7, 7) conv2_sz_padd = 0 # conv2 output size = (24, 24) pool2_sz = (4, 4) # pool2 size = (6, 6) conv3_nb_filt = 128 conv3_sz_filt = (5, 5) conv3_sz_padd = 0 # conv3 output size = (2, 2) pool3_sz = (2, 2) # pool3 output size = (32, 1, 1) dens1_nb_unit = 256 # dense1 output (vector 256) dens2_nb_unit = 256 # dense2 output (vector 256) rshp_sz = 1 # reshape output (256, 1, 1) # decoder tconv1_nb_filt = 64 tconv1_sz_filt = (5, 5) tconv1_sz_strd = (1, 1) # conv1 output size = (5, 5) upsamp1_sz = (2, 2) # upsamp1 output size = (10, 10) tconv2_nb_filt = 32 tconv2_sz_filt = (4, 4) tconv2_sz_strd = (1, 1) # tconv2 output size = (13, 13) upsamp2_sz = (2, 2) # upsamp2 output size = (26, 26) tconv3_nb_filt = 32 tconv3_sz_filt = (5, 5) tconv3_sz_strd = (1, 1) # tconv3 output size = (30, 30) tconv4_nb_filt = 3 tconv4_sz_filt = (3, 3) tconv4_sz_strd = (1, 1) # tconv4 output size = (32, 32) # final output = (3 channels, 32 x 32) ##################### # Build the network # ##################### # Add input layer network = lyr.InputLayer(shape=(None, input_channels, 64, 64), input_var=input_var) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv1_nb_filt, filter_size=conv1_sz_filt, pad=conv1_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool1_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv2_nb_filt, filter_size=conv2_sz_filt, pad=conv2_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool2_sz) # Add convolution layer network = lyr.Conv2DLayer(incoming=network, num_filters=conv3_nb_filt, filter_size=conv3_sz_filt, pad=conv3_sz_padd, W=weight_init) # Add pooling layer network = lyr.MaxPool2DLayer(incoming=network, pool_size=pool3_sz) network = lyr.FlattenLayer(network) # Add dense layer network = lyr.DenseLayer(network, dens1_nb_unit, W=weight_init) network = lyr.DenseLayer(network, dens2_nb_unit, W=weight_init) network = lyr.ReshapeLayer(network, (input_var.shape[0], dens2_nb_unit / (rshp_sz**2), rshp_sz, rshp_sz)) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv1_nb_filt, filter_size=tconv1_sz_filt, stride=tconv1_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp1_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv2_nb_filt, filter_size=tconv2_sz_filt, stride=tconv2_sz_strd, W=weight_init) # Add upsampling layer network = lyr.Upscale2DLayer(incoming=network, scale_factor=upsamp2_sz) # Add transposed convolution layer network = lyr.TransposedConv2DLayer(incoming=network, num_filters=tconv3_nb_filt, filter_size=tconv3_sz_filt, stride=tconv3_sz_strd, W=weight_init) # Add transposed convolution layer network = lyr.TransposedConv2DLayer( incoming=network, num_filters=tconv4_nb_filt, filter_size=tconv4_sz_filt, stride=tconv4_sz_strd, W=weight_init, nonlinearity=lasagne.nonlinearities.sigmoid) return network
def build_segmenter_jet_preconv(): # downsample down to a small region, then upsample all the way back up, using jet architecture # recreate basic FCN-8s structure (though more aptly 1s here since we upsample back to the original input size) # this jet will have another conv layer in the final upsample # difference here is that instead of combining softmax layers in the jet, we'll upsample before the conv_f* layer # this will certainly make the model slower, but should give us better predictions... # The awkward part here is combining the intermediate conv layers when they have different filter shapes # We could: # concat them # have intermediate conv layers that bring them to the shape needed then merge them # in the interests of speed we'll just concat them, though we'll have a ton of filters at the end inp = ll.InputLayer(shape=(None, 1, None, None), name='input') conv1 = ll.Conv2DLayer(inp, num_filters=32, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_1') bn1 = ll.BatchNormLayer(conv1, name='bn1') conv2 = ll.Conv2DLayer(conv1, num_filters=64, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_2') bn2 = ll.BatchNormLayer(conv2, name='bn2') mp1 = ll.MaxPool2DLayer(conv2, 2, stride=2, name='mp1') # 2x downsample conv3 = ll.Conv2DLayer(mp1, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_1') bn3 = ll.BatchNormLayer(conv3, name='bn3') conv4 = ll.Conv2DLayer(conv3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_2') bn4 = ll.BatchNormLayer(conv4, name='bn4') mp2 = ll.MaxPool2DLayer(conv4, 2, stride=2, name='mp2') # 4x downsample conv5 = ll.Conv2DLayer(mp2, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_1') bn5 = ll.BatchNormLayer(conv5, name='bn5') conv6 = ll.Conv2DLayer(conv5, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_2') bn6 = ll.BatchNormLayer(conv6, name='bn6') mp3 = ll.MaxPool2DLayer(conv6, 2, stride=2, name='mp3') # 8x downsample conv7 = ll.Conv2DLayer(mp3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_1') bn7 = ll.BatchNormLayer(conv7, name='bn7') conv8 = ll.Conv2DLayer(conv7, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_2') bn8 = ll.BatchNormLayer(conv8, name='bn8') # f 68 s 8 # now start the upsample ## FIRST UPSAMPLE PREDICTION (akin to FCN-32s) up8 = ll.Upscale2DLayer( bn8, 8, name='upsample_8x') # take loss here, 8x upsample from 8x downsample conv_f8 = ll.Conv2DLayer(up8, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_8xpred') softmax_8 = Softmax4D(conv_f8, name='4dsoftmax_8x') ## COMBINE BY UPSAMPLING CONV 8 AND CONV 6 conv_8_up2 = ll.Upscale2DLayer(bn8, 2, name='upsample_c8_2') # 4x downsample concat_c8_c6 = ll.ConcatLayer([conv_8_up2, bn6], axis=1, name='concat_c8_c6') up4 = ll.Upscale2DLayer( concat_c8_c6, 4, name='upsample_4x') # take loss here, 4x upsample from 4x downsample conv_f4 = ll.Conv2DLayer(up4, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_4xpred') softmax_4 = Softmax4D(conv_f4, name='4dsoftmax_4x') # 4x downsample ## COMBINE BY UPSAMPLING CONCAT_86 AND CONV 4 concat_86_up2 = ll.Upscale2DLayer( concat_c8_c6, 2, name='upsample_concat_86_2') # 2x downsample concat_ct86_c4 = ll.ConcatLayer([concat_86_up2, bn4], axis=1, name='concat_ct86_c4') up2 = ll.Upscale2DLayer( concat_ct86_c4, 2, name='upsample_2x' ) # final loss here, 2x upsample from a 2x downsample conv_f2 = ll.Conv2DLayer(up2, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_2xpred') softmax_2 = Softmax4D(conv_f2, name='4dsoftmax_2x') ## COMBINE BY UPSAMPLING CONCAT_864 AND CONV 2 concat_864_up2 = ll.Upscale2DLayer( concat_ct86_c4, 2, name='upsample_concat_86_2') # no downsample concat_864_c2 = ll.ConcatLayer([concat_864_up2, bn2], axis=1, name='concat_ct864_c2') conv_f1 = ll.Conv2DLayer(concat_864_c2, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_1xpred') softmax_1 = Softmax4D(conv_f1, name='4dsoftmax_1x') # this is where up1 would go but that doesn't make any sense return [softmax_8, softmax_4, softmax_2, softmax_1]
def build_segmenter_jet_2(): # downsample down to a small region, then upsample all the way back up, using jet architecture # recreate basic FCN-8s structure (though more aptly 1s here since we upsample back to the original input size) # this jet will have another conv layer in the final upsample inp = ll.InputLayer(shape=(None, 1, None, None), name='input') conv1 = ll.Conv2DLayer(inp, num_filters=32, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_1') bn1 = ll.BatchNormLayer(conv1, name='bn1') conv2 = ll.Conv2DLayer(bn1, num_filters=64, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_2') bn2 = ll.BatchNormLayer(conv2, name='bn2') mp1 = ll.MaxPool2DLayer(bn2, 2, stride=2, name='mp1') # 2x downsample conv3 = ll.Conv2DLayer(mp1, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_1') bn3 = ll.BatchNormLayer(conv3, name='bn3') conv4 = ll.Conv2DLayer(bn3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_2') bn4 = ll.BatchNormLayer(conv4, name='bn4') mp2 = ll.MaxPool2DLayer(bn4, 2, stride=2, name='mp2') # 4x downsample conv5 = ll.Conv2DLayer(mp2, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_1') bn5 = ll.BatchNormLayer(conv5, name='bn5') conv6 = ll.Conv2DLayer(bn5, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_2') bn6 = ll.BatchNormLayer(conv6, name='bn6') mp3 = ll.MaxPool2DLayer(bn6, 2, stride=2, name='mp3') # 8x downsample conv7 = ll.Conv2DLayer(mp3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_1') bn7 = ll.BatchNormLayer(conv7, name='bn7') conv8 = ll.Conv2DLayer(bn7, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_2') bn8 = ll.BatchNormLayer(conv8, name='bn8') # f 68 s 8 # now start the upsample ## FIRST UPSAMPLE PREDICTION (akin to FCN-32s) conv_f8 = ll.Conv2DLayer(bn8, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_8xpred') softmax_8 = Softmax4D(conv_f8, name='4dsoftmax_8x') up8 = ll.Upscale2DLayer( softmax_8, 8, name='upsample_8x') # take loss here, 8x upsample from 8x downsample ## COMBINE BY UPSAMPLING SOFTMAX 8 AND PRED ON CONV 6 softmax_4up = ll.Upscale2DLayer(softmax_8, 2, name='upsample_4x_pre') # 4x downsample conv_f6 = ll.Conv2DLayer(bn6, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_4xpred') softmax_4 = Softmax4D(conv_f6, name='4dsoftmax_4x') # 4x downsample softmax_4_merge = ll.ElemwiseSumLayer([softmax_4, softmax_4up], coeffs=0.5, name='softmax_4_merge') up4 = ll.Upscale2DLayer( softmax_4_merge, 4, name='upsample_4x') # take loss here, 4x upsample from 4x downsample ## COMBINE BY UPSAMPLING SOFTMAX_4_MERGE AND CONV 4 softmax_2up = ll.Upscale2DLayer(softmax_4_merge, 2, name='upsample_2x_pre') # 2x downsample conv_f4 = ll.Conv2DLayer(bn4, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_2xpred') softmax_2 = Softmax4D(conv_f4, name='4dsoftmax_2x') softmax_2_merge = ll.ElemwiseSumLayer([softmax_2, softmax_2up], coeffs=0.5, name='softmax_2_merge') up2 = ll.Upscale2DLayer( softmax_2_merge, 2, name='upsample_2x' ) # final loss here, 2x upsample from a 2x downsample ## COMBINE BY UPSAMPLING SOFTMAX_2_MERGE AND CONV 2 softmax_1up = ll.Upscale2DLayer( softmax_2_merge, 2, name='upsample_1x_pre') # 1x downsample (i.e. no downsample) conv_f2 = ll.Conv2DLayer(bn2, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_1xpred') softmax_1 = Softmax4D(conv_f2, name='4dsoftmax_1x') softmax_1_merge = ll.ElemwiseSumLayer([softmax_1, softmax_1up], coeffs=0.5, name='softmax_1_merge') # this is where up1 would go but that doesn't make any sense return [up8, up4, up2, softmax_1_merge]
def build_segmenter_upsample(): # downsample down to a small region, then upsample all the way back up # Note: w/o any learning on the upsampler, we're limited in how far we can downsample # there will always be an error signal unless the loss fn is run on downsampled targets... inp = ll.InputLayer(shape=(None, 1, None, None), name='input') conv1 = ll.Conv2DLayer(inp, num_filters=32, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_1') bn1 = ll.BatchNormLayer(conv1, name='bn1') conv2 = ll.Conv2DLayer(bn1, num_filters=64, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv1_2') bn2 = ll.BatchNormLayer(conv2, name='bn2') mp1 = ll.MaxPool2DLayer(bn2, 2, stride=2, name='mp1') # 2x downsample conv3 = ll.Conv2DLayer(mp1, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_1') bn3 = ll.BatchNormLayer(conv3, name='bn3') conv4 = ll.Conv2DLayer(bn3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv2_2') bn4 = ll.BatchNormLayer(conv4, name='bn4') mp2 = ll.MaxPool2DLayer(bn4, 2, stride=2, name='mp2') # 4x downsample conv5 = ll.Conv2DLayer(mp2, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_1') bn5 = ll.BatchNormLayer(conv5, name='bn5') conv6 = ll.Conv2DLayer(bn5, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv3_2') bn6 = ll.BatchNormLayer(conv6, name='bn6') mp3 = ll.MaxPool2DLayer(bn6, 2, stride=2, name='mp3') # 8x downsample conv7 = ll.Conv2DLayer(mp3, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_1') bn7 = ll.BatchNormLayer(conv7, name='bn7') conv8 = ll.Conv2DLayer(bn7, num_filters=128, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=rectify, name='conv4_2') bn8 = ll.BatchNormLayer(conv8, name='bn8') # f 68 s 8 # now start the upsample up = ll.Upscale2DLayer(bn8, 8, name='upsample_8x') conv_f = ll.Conv2DLayer(up, num_filters=2, filter_size=(3, 3), pad='same', W=Orthogonal(), nonlinearity=linear, name='conv_final') softmax = Softmax4D(conv_f, name='4dsoftmax') return [softmax]
def deconv_res_block(l_inp, increase_dim=False, fixed_nchan=None, **kwargs): def projection(l_inp, fixed_nchan): # twice normal channels when projecting! l = g_register(ll.Upscale2DLayer(l_inp, 2, mode='repeat')) if fixed_nchan: n_filters = fixed_nchan else: n_filters = l_inp.output_shape[1] // 2 l = g_register( deconv(l, (None, n_filters, l_inp.output_shape[2] * 2, l_inp.output_shape[3] * 2), filter_size=(1, 1), stride=(1, 1), nonlinearity=None, b=None)) return l def filters_increase_dims(l, increase_dims, fixed_nchan): in_num_filters = l.output_shape[1] map_shape = l.output_shape[2:] if increase_dims: first_stride = (2, 2) if fixed_nchan: out_num_filters = fixed_nchan else: out_num_filters = in_num_filters // 2 map_shape = (map_shape[0] * 2, map_shape[1] * 2) else: first_stride = (1, 1) if fixed_nchan: out_num_filters = fixed_nchan else: out_num_filters = in_num_filters return out_num_filters, first_stride, map_shape # first figure filters/strides n_filters, first_stride, map_shape = filters_increase_dims( l_inp, increase_dim, fixed_nchan) l = l_inp if first_stride == (2, 2): print 'first_stride', first_stride l = g_register(ll.Upscale2DLayer(l, 2, mode='repeat')) l = g_bn(l) l = g_register( deconv(l, ( None, n_filters, ) + map_shape, filter_size=(3, 3), stride=(1, 1), nonlinearity=g_nl)) l = g_bn(g_dp(l)) l = g_register( deconv(l, ( None, n_filters, ) + map_shape, filter_size=(3, 3), stride=(1, 1), nonlinearity=g_nl)) if increase_dim: p = projection(l_inp, fixed_nchan) else: # Identity shortcut p = l_inp l = sumlayer([l, p]) return l
def _invert_Conv2DLayer(self, layer, feeder): def _check_padding_same(): for s, p in zip(layer.filter_size, layer.pad): if s % 2 != 1: return False elif s // 2 != p: return False return True # Warning they are swapped here. feeder = self._put_rectifiers(feeder, layer) f_s = layer.filter_size if layer.pad == 'same' or _check_padding_same(): pad = 'same' elif layer.pad == 'valid' or layer.pad == (0, 0): pad = 'full' else: raise RuntimeError("Define your padding as full or same.") # By definition the # Flip filters must be on to be a proper deconvolution. num_filters = L.get_output_shape(layer.input_layer)[1] if layer.stride == (4, 4): # Todo: clean this! print("Applying alexnet hack.") feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) print("Applying alexnet hack part 2.") conv_layer = output_layer output_layer = L.SliceLayer(L.SliceLayer(output_layer, slice(0, -3), axis=3), slice(0, -3), axis=2) output_layer.W = conv_layer.W elif layer.stride == (2, 2): # Todo: clean this! Seems to be the same code as for AlexNet above. print("Applying GoogLeNet hack.") feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) else: # Todo: clean this. Repetitions all over. output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) return output_layer
def build_autoencoder(layer, nonlinearity='same', b=init.Constant(0.), learnable_conv=False): """ Unfolds a stack of layers into a symmetric autoencoder with tied weights. Given a :class:`Layer` instance, this function builds a symmetric autoencoder with tied weights. Parameters ---------- layer : a :class:`Layer` instance or a tuple The :class:`Layer` instance with respect to which a symmetric autoencoder is built. nonlinearity : 'same', list, callable, or None The nonlinearities that are applied to the decoding layer. If 'same', each decoder layer has the same nonlinearity as its corresponding encoder layer. If a list is provided, it must contain nonlinearities for each decoding layer. Otherwise, if a single nonlinearity is provided, it is applied to all decoder layers. If set to ``None``, all nonlinearities for the decoder layers are set to lasagne.nonlinearities.identity. b : callable, Theano shared variable, numpy array, list or None An initializer for the decoder biases. By default, all decoder biases are initialized to lasagne.init.Constant(0.). If a shared variable or a numpy array is provided, the shape must match the incoming shape (only in case all incoming shapes are the same). Additianlly, a list containing initializers for the biases of each decoder layer can be provided. If set to ``None``, the decoder layers will have no biases, and pass through their input instead. Returns ------- layer: :class:`Layer` instance The output :class:`Layer` of the symmetric autoencoder with tied weights. encoder: :class:`Layer` instance The code :class:`Layer` of the autoencoder (see Notes) Notes ----- The encoder (input) :class:`Layer` is changed using `unfold_bias_and_nonlinearity_layers`. Therefore, this layer is not the code layer anymore, because it has got its bias and nonlinearity stripped off. Examples -------- >>> from lasagne.layers import InputLayer, DenseLayer >>> from lasagne.layers import build_autoencoder >>> l_in = InputLayer((100, 20)) >>> l1 = DenseLayer(l_in, num_units=50) >>> l2 = DenseLayer(l1, num_units=10) >>> l_ae, l2 = build_autoencoder(l2, nonlinearity='same', b=None) """ if isinstance(nonlinearity, (tuple, list)): n_idx = 0 if isinstance(b, (tuple, list)): b_idx = 0 encoder = unfold_bias_and_nonlinearity_layers(layer) layers = get_all_layers(encoder) autoencoder_layers = [encoder] kwargs_b = dict(b=None) kwargs_n = dict(nonlinearity=nonlinearities.identity) for i, layer in enumerate(layers[::-1]): incoming = autoencoder_layers[-1] if isinstance(layer, InputLayer): continue elif isinstance(layer, BiasLayer): if b is None: kwargs_b = dict(b=None) elif isinstance(b, (tuple, list)): kwargs_b = dict(b=b[b_idx]) b_idx += 1 else: kwargs_b = dict(b=b) elif isinstance(layer, NonlinearityLayer): if nonlinearity == 'same': kwargs_n = dict(nonlinearity=layer.nonlinearity) elif nonlinearity is None: kwargs_n = dict(nonlinearity=nonlinearities.identity) elif isinstance(nonlinearity, (tuple, list)): kwargs_n = dict(nonlinearity=nonlinearity[n_idx]) n_idx += 1 else: kwargs_n = dict(nonlinearity=nonlinearity) elif isinstance(layer, DropoutLayer): a_layer = DropoutLayer(incoming=incoming, p=layer.p, rescale=layer.rescale) autoencoder_layers.append(a_layer) elif isinstance(layer, GaussianNoiseLayer): a_layer = GaussianNoiseLayer(incoming=incoming, sigma=layer.sigma) autoencoder_layers.append(a_layer) elif isinstance(layer, L.Conv2DLayer): a_layer = L.TransposedConv2DLayer(incoming=incoming, num_filters=layer.input_shape[1], filter_size=layer.filter_size, stride=layer.stride, crop=layer.pad, untie_biases=layer.untie_biases, b=None, nonlinearity=None) elif isinstance(layer, L.MaxPool2DLayer): a_layer = L.Upscale2DLayer(incoming=incoming, scale_factor=layer.pool_size) elif isinstance(layer, L.BatchNormLayer): a_layer = L.BatchNormLayer(incoming) else: a_layer = InverseLayer(incoming=incoming, layer=layer) if hasattr(layer, 'b'): a_layer = BiasLayer(incoming=a_layer, **kwargs_b) if hasattr(layer, 'nonlinearity'): a_layer = NonlinearityLayer(incoming=a_layer, **kwargs_n) autoencoder_layers.append(a_layer) return autoencoder_layers, encoder