def cnn_module(bottom, num_out): conv1 = conv_bn_relu_layer(bottom, 64) conv2 = conv_bn_relu_layer(conv1, 64) pool1 = L.Pooling(conv2, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) conv3 = conv_bn_relu_layer(pool1, 64) conv4 = conv_bn_relu_layer(conv3, 64) pool2 = L.Pooling(conv4, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) conv5 = conv_bn_relu_layer(pool2, 64) conv6 = conv_bn_relu_layer(conv5, 64) conv6_upsample = L.Interp(conv6, interp_param=dict(zoom_factor=4)) conv6_upsample_crop = L.Crop(conv6_upsample, conv2) conv4_upsample = L.Interp(conv4, interp_param=dict(zoom_factor=2)) conv4_upsample_crop = L.Crop(conv4_upsample, conv2) conv_concat = L.Concat(bottom, conv2, conv4_upsample_crop, conv6_upsample_crop) conv7 = conv_relu_layer(conv_concat, num_out) conv_comb = L.Concat(bottom, conv7) return conv_comb
def edge_path(n, bottom): n.res5c_1, n.res5c_1_bn, n.res5c_1_scale, n.res5c_1_relu = \ _conv_bn_scale_relu(bottom, nout=16, bias_term=False, kernel_size=1, stride=1, pad=0, weight_filler={"type": "msra"}) n.res5c_up1, n.res5c_up1_bn, n.res5c_up1_scale, n.res5c_up1_relu = \ _deconv_bn_scale_relu(n.res5c_1_relu, nout=16, kernel_size=8, stride=4, pad=0) crop_bottoms = [n.res5c_up1_relu, n.res2c_relu] n.res5c_crop1 = L.Crop(*crop_bottoms, crop_param={'axis': 2}) ## crop 1/4 n.res5c_up2, n.res5c_up2_bn, n.res5c_up2_scale, n.res5c_up2_relu = \ _deconv_bn_scale_relu(n.res5c_crop1, nout=16, kernel_size=8, stride=4, pad=0) crop_bottoms = [n.res5c_up2_relu, n.data] n.res5c_crop2 = L.Crop(*crop_bottoms, crop_param={'axis': 2}) ## crop 1 -> from res backbonb n.unet1a_conv1_edge, n.unet1a_bn_conv1_edge, n.unet1a_scale_conv1_edge, n.unet1a_conv1_relu_edge = \ _conv_bn_scale_relu(n.data, nout=8, bias_term=False, kernel_size=3, stride=1, pad=1, weight_filler={"type": "msra"}) n.unet1a_conv2_edge, n.unet1a_bn_conv2_edge, n.unet1a_scale_conv2_edge, n.unet1a_conv2_relu_edge = \ _conv_bn_scale_relu(n.unet1a_conv1_relu_edge, nout=4, bias_term=False, kernel_size=3, stride=1, pad=1, weight_filler={"type": "msra"}) n.unet1a_conv3_edge, n.unet1a_bn_conv3_edge, n.unet1a_scale_conv3_edge, n.unet1a_conv3_relu_edge = \ _conv_bn_scale_relu(n.unet1a_conv2_relu_edge, nout=16, bias_term=False, kernel_size=3, stride=1, pad=1, weight_filler={"type": "msra"}) ## 1 -> from data directly
def decoder_path(n, bottom): n.unet3a_deconv_up, n.unet3a_bn3a_deconv, n.unet3a_scale3a_deconv, n.unet3a_deconv_relu = \ _deconv_bn_scale_relu(bottom, nout=256, kernel_size=7, stride=4, pad=0) crop_bottoms = [n.unet3a_deconv_relu, n.res2c_relu] n.unet3a_crop = L.Crop(*crop_bottoms, crop_param={ 'axis': 2, 'offset': 1 }) ## crop 1/4 concat_layers = [n.unet3a_crop, n.res2c_relu] n.unet3a_concat = caffe.layers.Concat( *concat_layers, concat_param=dict(concat_dim=1)) ## concat with res2c _resnet_block('6a', n, n.unet3a_concat, 128) _resnet_block('6b', n, n.res6a_relu, 4, branch1=True, initial_stride=1) ## res 6 n.unet1a_deconv_up, n.unet1a_bn1a_deconv, n.unet1a_scale1a_deconv, n.unet1a_deconv_relu = \ _deconv_bn_scale_relu(n.res6b_relu, nout=16, kernel_size=7, stride=4, pad=0) ## deconv crop_bottoms = [n.unet1a_deconv_relu, n.data] n.unet1a_crop = L.Crop(*crop_bottoms, crop_param={ 'axis': 2, 'offset': 1 }) ## crop 1 -> occlusion cue
def gradient_x(bottom): dummy_data = L.DummyData(dummy_data_param=dict( shape=[dict(dim=[1, 1, 100, 99])])) crop_1 = L.Crop(bottom, dummy_data, crop_param=dict(offset=[0, 1])) crop_2 = L.Crop(bottom, dummy_data, crop_param=dict(offset=[0, 0])) diff = L.Eltwise(crop_1, crop_2, eltwise_param=dict(operation=P.Eltwise.SUM, coeff=[1.0, -1.0])) gradient_x = L.AbsVal(diff) return gradient_x
def ori_path(n, bottom): _aspp_block(n, bottom) ## aspp module n.aspp_up1, n.aspp_up1_bn, n.aspp_up1_scale, n.aspp_up1_relu = \ _deconv_bn_scale_relu(n.aspp_concat, nout=32, kernel_size=8, stride=4, pad=0) crop_bottoms = [n.aspp_up1_relu, n.res2c_relu] n.aspp_crop1 = L.Crop(*crop_bottoms, crop_param={'axis': 2}) ## crop 1/4 n.aspp_up2, n.aspp_up2_bn, n.aspp_up2_scale, n.aspp_up2_relu = \ _deconv_bn_scale_relu(n.aspp_crop1, nout=16, kernel_size=8, stride=4, pad=0) crop_bottoms = [n.aspp_up2_relu, n.data] n.aspp_crop2 = L.Crop(*crop_bottoms, crop_param={'axis': 2}) ## crop 1
def deconv_crop_layer(bottom, bottom2,num_out, size_kerbel,size_stride,num_offset): deconv1 = L.Deconvolution(bottom, convolution_param = dict(num_output = num_out, kernel_size = size_kerbel, stride = size_stride), param = [{'lr_mult':0,'decay_mult':1},{'lr_mult':0, 'decay_mult':0}]) feature_dsn = L.Crop(deconv1,bottom2, crop_param = dict(axis = 2, offset = num_offset)) return feature_dsn
def test_crop(self): n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.reference1 = L.Input(shape=make_shape([1, 1, 32, 32])) n.crop1 = L.Crop(n.input1, n.reference1, axis=2, offset=[16, 16]) n.conv1 = L.Convolution(n.crop1, num_output=10, kernel_size=5, weight_filler=make_weight_filler(), bias_filler=make_bias_filler()) self._test_model(*self._netspec_to_model(n, 'crop'))
def test_crop_of_crop(self): """ Map coordinates through Crop layer: crop an already-cropped output to the input and check change in offset. """ n = coord_net_spec() offset = random.randint(0, 10) ax, a, b = coord_map_from_to(n.deconv, n.data) n.crop = L.Crop(n.deconv, n.data, axis=2, offset=offset) ax_crop, a_crop, b_crop = coord_map_from_to(n.crop, n.data) self.assertEquals(ax, ax_crop) self.assertEquals(a, a_crop) self.assertEquals(b + offset, b_crop)
def make_cunet(): netoffset = 28 ch = 3 input_size = 256 + netoffset * 2 assert (input_size % 4 == 0) data = L.Input(name="input", shape=dict(dim=[1, ch, input_size, input_size])) u1 = unet1(data, ch=ch, deconv=False) u2 = unet2(u1, ch=ch, deconv=False) crop = L.Crop(u1, u2, crop_param=dict(axis=2, offset=20)) cadd = L.Eltwise(crop, u2, operation=P.Eltwise.SUM) return to_proto(cadd)
def test_crop(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.reference1 = L.Input(shape=make_shape([1, 1, 32, 32])) n.crop1 = L.Crop(n.input1, n.reference1, axis=2, offset=[16, 16]) n.conv1 = L.Convolution(n.crop1, num_output=10, kernel_size=5, weight_filler=make_weight_filler(), bias_filler=make_bias_filler()) return n
def unet_branch(bottom, insert_f, i, o, depad): pool = L.Convolution(bottom, kernel_size=2, stride=2, num_output=i, pad=0) relu1 = L.ReLU(pool, in_place=True, negative_slope=0.1) feat = insert_f(relu1) unpool = L.Deconvolution(feat, convolution_param=dict(num_output=o, kernel_size=2, pad=0, stride=2)) relu2 = L.ReLU(unpool, in_place=True, negative_slope=0.1) crop = L.Crop(bottom, relu2, crop_param=dict(axis=2, offset=depad)) cadd = L.Eltwise(crop, relu2, operation=P.Eltwise.SUM) return cadd
def crop(top_from, top_to): """ Define a Crop layer to crop a top (from) to another top (to) by determining the coordinate mapping between the two and net spec'ing the axis and shift parameters of the crop. """ ax, a, b = coord_map_from_to(top_from, top_to) assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) assert (np.round(b) == b).all(), 'cannot crop noninteger offset ' \ '(b = {})'.format(b) return L.Crop(top_from, top_to, crop_param=dict(axis=ax + 1, # +1 for first cropping dim. offset=list(-np.round(b).astype(int))))
def BuildNet(sport, depth, split, version): n = BuildBaseNet(sport, depth, split) last = n.conv8 # zu wenig parameter if version == 'fixedfixedlearned': n.upsample1 = upsample('upsample1', last, depth, 4, 'upsample_weight') n.upsample2 = upsample('upsample2', n.upsample1, depth, 4, 'upsample_weight') n.deconv = deconv('deconv', n.upsample2, depth, 1, 0.01, 0) last = n.deconv # doesnt fit memory elif version == 'fixedlearnedlearned': n.upsample1 = upsample('upsample1', last, depth, 4, 'upsample_weight') n.deconv1 = deconv('deconv1', n.upsample1, depth, 4, 0.01, 0) n.deconv2 = deconv('deconv2', n.deconv1, depth, 4, 0.01, 0) last = n.deconv2 # sinnlos glaub ich elif version == 'learnedmiddle': n.deconv1 = deconv('deconv1', last, depth, 4, 0.01, 0) n.deconv2 = deconv('deconv2', n.deconv1, depth, 4, 0.01, 0) n.deconv3 = deconv('deconv3', n.deconv2, depth, 2, 0.01, 0) last = n.deconv3 elif version == 'learnedsmall': n.deconv1 = deconv('deconv1', last, depth, 4, 0.01, 0) n.deconv2 = deconv('deconv2', n.deconv1, depth, 4, 0.01, 0) last = n.deconv2 # doesnt fit memory elif version == 'learnedbig': n.deconv1 = deconv('deconv1', last, depth, 4, 0.01, 0) n.deconv2 = deconv('deconv2', n.deconv1, depth, 4, 0.01, 0) n.deconv3 = deconv('deconv3', n.deconv2, depth, 4, 0.01, 0) last = n.deconv3 elif version == 'fixedlearned': n.upsample = upsample('upsample', last, depth, 2, 'upsample_weight') n.deconv1 = deconv('deconv', n.upsample, depth, 8, 0.01, 0) last = n.deconv1 else: print 'NO KNOWN VERSION YOU FOUL' return if split == 'train' or split == 'test': n.crop = L.Crop(last, n.label, axis=2, offset=0) n.softmax = L.SoftmaxWithLoss(n.crop, n.label, loss_param=dict(ignore_label=-1, normalize=False)) if split == 'deploy': #n.crop = L.Crop(last, n.data, axis=2, offset=0) pass return n.to_proto()
def cnn_module(bottom, num_out): conv1 = conv_bn_relu_layer(bottom, 64) conv2 = conv_bn_relu_layer(conv1, 64) pool1 = L.Pooling(conv2, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) conv3 = conv_bn_relu_layer(pool1, 64) conv4 = conv_bn_relu_layer(conv3, 64) pool2 = L.Pooling(conv4, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) conv5 = conv_bn_relu_layer(pool2, 64) conv6 = conv_bn_relu_layer(conv5, 64) conv6_upsample = L.Interp(conv6, interp_param=dict(zoom_factor=4)) conv6_upsample_crop = L.Crop(conv6_upsample, conv2) conv4_upsample = L.Interp(conv4, interp_param=dict(zoom_factor=2)) conv4_upsample_crop = L.Crop(conv4_upsample, conv2) conv_concat = L.Concat(bottom, conv2, conv4_upsample_crop, conv6_upsample_crop) # the feature layer of del conv_dim = conv_relu_feature_layer(conv_concat, 256) conv_dsp = L.Convolution(conv_dim, convolution_param=dict(num_output= 64, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{'lr_mult': 1, 'decay_mult': 1}, {'lr_mult': 2, 'decay_mult': 0}]) # the layer of ssn conv7 = conv_relu_layer(conv_concat, num_out) conv_comb = L.Concat(bottom, conv7) return conv_comb, conv_dsp
def upconv_concat(bottom, ks, stride, nout, pad, crop_offset, cat): upconv = L.Deconvolution(bottom, convolution_param=dict(num_output=nout, kernel_size=ks, stride=stride, pad=pad, weight_filler=dict( type='constant', value=1)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) crop_cat = L.Crop(cat, upconv, axis=2, offset=crop_offset) return upconv, L.Concat(upconv, crop_cat) # channel up to nout *2
def FCN(images_lmdb, labels_lmdb, batch_size, include_acc=False): # net definition n.data = L.Data(source=images_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1, transform_param=dict(crop_size=0, mean_value=[77], mirror=False)) n.label = L.Data(source=labels_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1) n.conv1, n.relu1 = conv_relu(n.data, ks=5, nout=100, stride=2, pad=50, bias_value=0.1) n.pool1 = max_pool(n.relu1, ks=2, stride=2) n.conv2, n.relu2 = conv_relu(n.pool1, ks=5, nout=200, stride=2, bias_value=0.1) n.pool2 = max_pool(n.relu2, ks=2, stride=2) n.conv3, n.relu3 = conv_relu(n.pool2, ks=3, nout=300, stride=1, bias_value=0.1) n.conv4, n.relu4 = conv_relu(n.relu3, ks=3, nout=300, stride=1, bias_value=0.1) n.drop = L.Dropout(n.relu4, dropout_ratio=0.1, in_place=True) n.score_classes, _= conv_relu(n.drop, ks=1, nout=2, weight_std=0.01, bias_value=0.1) n.upscore = L.Deconvolution(n.score_classes) n.score = L.Crop(n.upscore,n.data) n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(normalize=True)) if include_acc: n.accuracy = L.Accuracy(n.score, n.label) return n.to_proto() else: return n.to_proto()
def net(split): n = caffe.NetSpec() loss_param = dict(normalize=False) if split == 'train': data_params = dict(mean=(104.00699, 116.66877, 122.67892)) # 图像与标签 data_params['root'] = './datasets/CTW1500_Total_TCB' data_params['source'] = "CTW1500_Total_TCB.lst" data_params['shuffle'] = True data_params['ignore_label'] = -1 n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \ param_str=str(data_params)) if data_params.has_key('ignore_label'): loss_param['ignore_label'] = int(data_params['ignore_label']) elif split == 'test': n.data = L.Input(name='data', input_param=dict(shape=dict(dim=[1, 3, 500, 500]))) else: raise Exception("Invalid phase") # The first conv stage n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) # # ===================== prepare lstm inputs ===================== n.pool1 = max_pool(n.relu1_2) # The second conv stage n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) # The third conv stage n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100, 1, 200, 0]) n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100, 1, 200, 0]) n.concat_conv33 = L.Concat(n.conv3_dilation1, n.conv3_dilation2, n.conv3_dilation3, n.conv3_dilation4, concat_param=dict({'concat_dim': 1})) # # ===================== prepare lstm inputs ===================== n.im2col_conv33 = L.Im2col(n.concat_conv33, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv33 = L.Transpose( n.im2col_conv33, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv33 = L.Reshape(n.im2col_transpose_conv33, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) n.lstm_conv33 = L.Lstm(n.lstm_input_conv33, lstm_param=dict(num_output=128, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) # ===================== rlstm =================== n.rlstm_input_conv33 = L.Reverse(n.lstm_input_conv33, name='lstm_reverse1_conv33', reverse_param=dict(axis=0)) n.rlstm_output_conv33 = L.Lstm(n.rlstm_input_conv33, name='rlstm_conv33', lstm_param=dict(num_output=128)) n.rlstm_conv33 = L.Reverse(n.rlstm_output_conv33, name='lstm_reverse2_conv33', reverse_param=dict(axis=0)) # ===================== merge lstm_conv33 and rlstm_conv33 n.merge_lstm_rlstm_conv33 = L.Concat(n.lstm_conv33, n.rlstm_conv33, concat_param=dict(axis=2)) n.lstm_output_reshape_conv33 = L.Reshape(n.merge_lstm_rlstm_conv33, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv33 = L.Transpose(n.lstm_output_reshape_conv33, transpose_param=dict(dim=[2, 3, 1, 0])) n.pool3 = max_pool(n.relu3_3) # The fourth conv stage n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100, 1, 200, 0]) n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100, 1, 200, 0]) n.concat_conv43 = L.Concat(n.conv4_dilation1, n.conv4_dilation2, n.conv4_dilation3, n.conv4_dilation4, concat_param=dict({'concat_dim': 1})) # # ===================== prepare lstm inputs ===================== n.im2col_conv43 = L.Im2col(n.concat_conv43, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv43 = L.Transpose( n.im2col_conv43, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv43 = L.Reshape(n.im2col_transpose_conv43, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) n.lstm_conv43 = L.Lstm(n.lstm_input_conv43, lstm_param=dict(num_output=256, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) # ===================== rlstm =================== n.rlstm_input_conv43 = L.Reverse(n.lstm_input_conv43, name='lstm_reverse1_conv43', reverse_param=dict(axis=0)) n.rlstm_output_conv43 = L.Lstm(n.rlstm_input_conv43, name='rlstm_conv43', lstm_param=dict(num_output=256)) n.rlstm_conv43 = L.Reverse(n.rlstm_output_conv43, name='lstm_reverse2_conv43', reverse_param=dict(axis=0)) # ===================== merge lstm_conv43 and rlstm_conv43 n.merge_lstm_rlstm_conv43 = L.Concat(n.lstm_conv43, n.rlstm_conv43, concat_param=dict(axis=2)) n.lstm_output_reshape_conv43 = L.Reshape(n.merge_lstm_rlstm_conv43, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv43 = L.Transpose(n.lstm_output_reshape_conv43, transpose_param=dict(dim=[2, 3, 1, 0])) n.pool4 = max_pool(n.relu4_3) n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100, 1, 200, 0]) n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100, 1, 200, 0]) n.concat_conv53 = L.Concat(n.conv5_dilation1, n.conv5_dilation2, n.conv5_dilation3, n.conv5_dilation4, concat_param=dict({'concat_dim': 1})) # The fiveth conv stage # ===================== prepare lstm inputs ===================== n.im2col_conv53 = L.Im2col(n.concat_conv53, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv53 = L.Transpose( n.im2col_conv53, transpose_param=dict(dim=[3, 2, 0, 1])) n.lstm_input_conv53 = L.Reshape(n.im2col_transpose_conv53, reshape_param=dict(shape=dict(dim=-1), axis=1, num_axes=2)) n.lstm_conv53 = L.Lstm(n.lstm_input_conv53, lstm_param=dict(num_output=256, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) # ===================== rlstm =================== n.rlstm_input_conv53 = L.Reverse(n.lstm_input_conv53, name='lstm_reverse1_conv53', reverse_param=dict(axis=0)) n.rlstm_output_conv53 = L.Lstm(n.rlstm_input_conv53, name='rlstm_conv53', lstm_param=dict(num_output=256)) n.rlstm_conv53 = L.Reverse(n.rlstm_output_conv53, name='lstm_reverse2_conv53', reverse_param=dict(axis=0)) # ===================== merge lstm_conv53 and rlstm_conv53 n.merge_lstm_rlstm_conv53 = L.Concat(n.lstm_conv53, n.rlstm_conv53, concat_param=dict(axis=2)) n.lstm_output_reshape_conv53 = L.Reshape(n.merge_lstm_rlstm_conv53, reshape_param=dict( shape=dict(dim=[-1, 1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv53 = L.Transpose(n.lstm_output_reshape_conv53, transpose_param=dict(dim=[2, 3, 1, 0])) # # DSN3 n.score_dsn3 = conv1x1(n.lstm_output_conv33, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn3_up = upsample(n.score_dsn3, stride=4) n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data) if split == 'train': n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) # # DSN4 n.score_dsn4 = conv1x1(n.lstm_output_conv43, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn4_up = upsample(n.score_dsn4, stride=8) n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data) if split == 'train': n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) # DSN5 n.score_dsn5 = conv1x1(n.lstm_output_conv53, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn5_up = upsample(n.score_dsn5, stride=16) n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data) if split == 'train': n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) if split == 'test': n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) # ############### concatenation and pass through attention model ######### n.concat_upscore = L.Concat(n.upscore_dsn3, n.upscore_dsn4, n.upscore_dsn5, name='concat', concat_param=dict({'concat_dim': 1})) n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', num_output=1, kernel_size=1, param=[ dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0) ], weight_filler=dict(type='constant', value=0.2), engine=1) if split == 'test': n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) if split == 'train': n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) return n.to_proto()
def create_UNet(data_lmdb, label_lmdb, batch_size, mean_file, phase): n = caffe.NetSpec() n.data = L.Data(source=data_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1, transform_param=dict(mean_file=mean_file), include=dict(phase=phase)) n.label = L.Data(source=label_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1, transform_param=dict(mean_value=0), include=dict(phase=phase)) # encoder => level 1 n.conv1_1, n.relu1_1 = conv_relu( n.data, 3, 1, 64, 0, True) # conv_relu(bottom, kernel_size, stride, nout, pad): n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 3, 1, 64, 0, True) n.pool1 = L.Pooling(n.relu1_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 2 n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 3, 1, 128, 0, True) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 3, 1, 128, 0, True) n.pool2 = L.Pooling(n.relu2_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 3 n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 3, 1, 256, 0, True) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 3, 1, 256, 0, True) n.pool3 = L.Pooling(n.relu3_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 4 n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 3, 1, 512, 0, True) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 3, 1, 512, 0, True) n.pool4 = L.Pooling(n.relu4_2, pool=P.Pooling.MAX, kernel_size=2, stride=2) # encoder => level 5 n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 3, 1, 1024, 0, True) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 3, 1, 1024, 0, True) # (bottom, ks, stride, nout, pad, crop_offset, cat) n.upconv1, n.concat1 = upconv_concat(n.relu5_2, 2, 2, 512, 0, 4, n.relu4_2) # decoder => level 1 n.conv6_1, n.relu6_1 = conv_relu(n.concat1, 3, 1, 512, 0, True) n.conv6_2, n.relu6_2 = conv_relu(n.relu6_1, 3, 1, 512, 0, True) n.upconv2, n.concat2 = upconv_concat(n.relu6_2, 2, 2, 256, 0, 16, n.relu3_2) # decoder => level 2 n.conv7_1, n.relu7_1 = conv_relu(n.concat2, 3, 1, 256, 0, True) n.conv7_2, n.relu7_2 = conv_relu(n.relu7_1, 3, 1, 256, 0, True) n.upconv3, n.concat3 = upconv_concat(n.relu7_2, 2, 2, 128, 0, 40, n.relu2_2) # decoder => level 3 n.conv8_1, n.relu8_1 = conv_relu(n.concat3, 3, 1, 128, 0, True) n.conv8_2, n.relu8_2 = conv_relu(n.relu8_1, 3, 1, 128, 0, True) n.upconv4, n.concat4 = upconv_concat(n.relu8_2, 2, 2, 64, 0, 88, n.relu1_2) # decoder => level 4 n.conv9_1, n.relu9_1 = conv_relu(n.concat4, 3, 1, 64, 0, True) n.conv9_2, n.relu9_2 = conv_relu(n.relu9_1, 3, 1, 64, 0, True) n.score = L.Convolution(n.relu9_2, kernel_size=1, num_output=3, pad=0) n.labelcrop = L.Crop(n.label, n.score, crop_param={ 'axis': 2, 'offset': 92 }) n.loss = L.SoftmaxWithLoss(n.score, n.labelcrop, loss_param={'ignore_label': 3}, propagate_down=[True, False]) n.argmax = L.ArgMax(n.score, argmax_param={'axis': 1}, include=dict(phase=caffe.TEST)) n.acc, accuracy_by_class = L.Accuracy(n.score, n.labelcrop, accuracy_param={'axis': 1}, include=dict(phase=caffe.TEST), ntop=2) # n.confmat = L.Python(n.argmax, n.labelcrop, python_param={'module':'python_confmat','layer':'PythonConfMat', 'param_str': '{"test_iter":3780}'}, include=dict(phase=caffe.TEST)) return n.to_proto()
def convert(keras_model, caffe_net_file, caffe_params_file): caffe_net = caffe.NetSpec() net_params = dict() outputs = dict() shape = () input_str = '' for layer in keras_model.layers: name = layer.name print( 'processing' + name + "================================================================================" ) layer_type = type(layer).__name__ config = layer.get_config() blobs = layer.get_weights() blobs_num = len(blobs) top2 = 'qinxiao' if type(layer.output) == list: # raise Exception('Layers with multiply outputs are not supported') top = layer.output[0].name else: top = layer.output.name if type(layer.input) != list: bottom = layer.input.name # data if layer_type == 'InputLayer' or not hasattr(caffe_net, 'data'): input_name = 'data' caffe_net[input_name] = L.Layer() input_shape = config['batch_input_shape'] input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input_name + '"', 1, input_shape[3], input_shape[1], input_shape[2]) outputs[layer.input.name] = input_name if layer_type == 'InputLayer': continue # conv if layer_type == 'Conv2D' or layer_type == 'Convolution2D': strides = config['strides'] kernel_size = config['kernel_size'] dilation = config['dilation_rate'] kwargs = {'num_output': config['filters']} if dilation[0] == dilation[1]: kwargs['dilation'] = dilation[0] if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] if not config['use_bias']: kwargs['bias_term'] = False # kwargs['param']=[dict(lr_mult=0)] else: # kwargs['param']=[dict(lr_mult=0), dict(lr_mult=0)] pass set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if config['activation'] == 'relu': name_s = name + 's' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name + 's' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'tanh': caffe_net[name_s] = L.TanH(caffe_net[name], in_place=True) elif config['activation'] == 'linear': pass else: raise Exception('Unsupported activation ' + config['activation']) elif layer_type == 'Conv2DTranspose': # Stride if layer.strides is None: strides = (1, 1) else: strides = layer.strides use_bias = config['use_bias'] param = dict(bias_term=use_bias) # Padding if layer.padding == 'same': # Calculate the padding for 'same' padding = [layer.kernel_size[0] / 2, layer.kernel_size[1] / 2] else: padding = [0, 0] # If padding is valid(aka no padding) param['pad'] = padding[0] if strides[0] == 2: param['pad'] = 0 param['kernel_size'] = layer.kernel_size[0] param['stride'] = strides[0] param['num_output'] = layer.filters # if strides[0] == strides[1]: # kwargs['stride'] = strides[0] # else: # kwargs['stride_h'] = strides[0] # kwargs['stride_w'] = strides[1] caffe_net[name] = L.Deconvolution(caffe_net[outputs[bottom]], convolution_param=param) # caffe_net[name] = L.Deconvolution(caffe_net[outputs[bottom]], **kwargs) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if config['activation'] == 'relu': name_s = name + 's' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name + 's' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'tanh': caffe_net[name_s] = L.TanH(caffe_net[name], in_place=True) elif config['activation'] == 'linear': pass else: raise Exception('Unsupported activation ' + config['activation']) if name == 'Deconv_2': name_crop = name + '_crop' caffe_net.data1 = L.Input(shape=dict(dim=[1, 512, 90, 90])) caffe_net[name_crop] = L.Crop(caffe_net[name], caffe_net.data1, axis=1, offset=0) if name == 'Deconv_3': name_crop = name + '_crop' caffe_net.data2 = L.Input(shape=dict(dim=[1, 256, 180, 180])) caffe_net[name_crop] = L.Crop(caffe_net[name], caffe_net.data2, axis=1, offset=0) elif layer_type == 'BatchNormalization': param = dict() variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) # print('blobs'+str(blobs_num)) # print(blobs) # print('config') # print(config) if config['scale']: gamma = np.array(blobs[0]) sparam = [dict(lr_mult=1), dict(lr_mult=1)] else: gamma = np.ones(mean.shape, dtype=np.float32) # sparam = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=1, decay_mult=1)] sparam = [dict(lr_mult=0), dict(lr_mult=1)] # sparam = [dict(lr_mult=0), dict(lr_mult=0)] if config['center']: beta = np.array(blobs[-3]) param['bias_term'] = True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term'] = False # caffe_net[name] = L.BatchNorm(caffe_net[outputs[bottom]], moving_average_fraction=layer.momentum, eps=layer.epsilon) caffe_net[name] = L.BatchNorm( caffe_net[outputs[bottom]], moving_average_fraction=layer.momentum, eps=layer.epsilon) # param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0)]) # param = [dict(lr_mult=1), dict(lr_mult=1), dict(lr_mult=0)]) net_params[name] = (mean, variance, np.array(1.0)) name_s = name + '_scale' caffe_net[name_s] = L.Scale( caffe_net[name], in_place=True, param=sparam, scale_param={'bias_term': config['center']}) net_params[name_s] = (gamma, beta) elif layer_type == 'Activation': if config['activation'] == 'relu': # caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) if len(layer.input.consumers()) > 1: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) else: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'tanh': if len(layer.input.consumers()) > 1: caffe_net[name] = L.TanH(caffe_net[outputs[bottom]]) else: caffe_net[name] = L.TanH(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'relu6': caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) elif config['activation'] == 'softmax': caffe_net[name] = L.Softmax(caffe_net[outputs[bottom]], in_place=True) else: raise Exception('Unsupported activation ' + config['activation']) elif layer_type == 'range': kwargs = {} kwargs['pool'] = P.Pooling.MAX # config['padding']='same' pool_size = (3, 3) strides = (2, 2) config['pool_size'] = pool_size config['strides'] = strides if pool_size[0] != pool_size[1]: raise Exception('Unsupported pool_size') if strides[0] != strides[1]: raise Exception('Unsupported strides') caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=pool_size[0], stride=strides[0], **kwargs) elif layer_type == 'MaxUnpooling2D': kwargs = {} kwargs['unpool'] = P.Pooling.MAX # config['padding']='same' unpool_size = (3, 3) strides = (2, 2) config['unpool_size'] = pool_size config['strides'] = strides if unpool_size[0] != unpool_size[1]: raise Exception('Unsupported pool_size') if strides[0] != strides[1]: raise Exception('Unsupported strides') caffe_net[name] = L.Unpooling(caffe_net[outputs[bottom]], unpool=P.Unpooling.MAX, kernel_size=3, unpool_h=360, unpool_w=360) elif layer_type == 'Add': layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Eltwise(*layers) else: raise Exception('Unsupported layer type: ' + layer_type) outputs[top] = name if name == 'Deconv_2': outputs[top] = name + '_crop' if name == 'Deconv_3': outputs[top] = name + '_crop' net_proto = input_str + '\n' + 'layer {' + 'layer {'.join( str(caffe_net.to_proto()).split('layer {')[2:]) f = open(caffe_net_file, 'w') f.write(net_proto) print("prototxt is done!") f.close() caffe_model = caffe.Net(caffe_net_file, caffe.TEST) for layer in caffe_model.params.keys(): if 'up_sampling2d' in layer: continue for n in range(0, len(caffe_model.params[layer])): print('layer:', layer) print("n:", n) print((caffe_model.params[layer][n].data[...]).shape) print((net_params[layer][n]).shape) caffe_model.params[layer][n].data[...] = net_params[layer][n] caffe_model.save(caffe_params_file)
def simple_net(split, initialize_fc8=False, cur_shape=None, next_shape=None, batch_size=1, num_threads=1, max_queue_size=5): #Get crop layer parameters tmp_net = caffe.NetSpec() tmp_net.im, tmp_net.label = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) conv_vgg(tmp_net, tmp_net.im, suffix='', last_layer_pad=0, first_layer_pad=100) tmp_net.fc6, tmp_net.relu6 = conv_relu(tmp_net.conv5_3, 4096, ks=7, dilation=4) tmp_net.fc7, tmp_net.relu7 = conv_relu(tmp_net.relu6, 4096, ks=1, pad=0) tmp_net.fc8 = L.Convolution(tmp_net.relu7, kernel_size=1, num_output=2) tmp_net.upscore = L.Deconvolution(tmp_net.fc8, convolution_param=dict(kernel_size=16, stride=8, num_output=2)) ax, a, b = coord_map_from_to(tmp_net.upscore, tmp_net.im) assert (a == 1).all(), 'scale mismatch on crop (a = {})'.format(a) assert (b <= 0).all(), 'cannot crop negative offset (b = {})'.format(b) assert (np.round(b) == b ).all(), 'cannot crop noninteger offset (b = {})'.format(b) # #Create network n = caffe.NetSpec() if split == 'train': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'val': pydata_params = dict(batch_size=batch_size, im_shape=tuple(next_shape), num_threads=num_threads, max_queue_size=max_queue_size) n.cur_im, n.masked_im, n.next_im, n.label = L.Python( module='coco_transformed_datalayers_prefetch', layer='CocoTransformedDataLayerPrefetch', ntop=4, param_str=str(pydata_params)) elif split == 'deploy': n.cur_im, n.label_1 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.masked_im, n.label_2 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) n.next_im, n.label_3 = L.MemoryData(batch_size=1, channels=3, height=244, width=244, ntop=2) else: raise Exception if cur_shape is None or next_shape is None: concat_pad = np.zeros((2, )) else: concat_pad = (next_shape - cur_shape) / 2.0 / 8.0 if not all(concat_pad == np.round(concat_pad)): raise Exception conv_vgg(n, n.cur_im, suffix='c', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.masked_im, suffix='m', last_layer_pad=concat_pad, first_layer_pad=100) conv_vgg(n, n.next_im, suffix='n', last_layer_pad=0, first_layer_pad=100) # concatination n.concat1 = L.Concat(n.relu5_3c, n.relu5_3m, n.relu5_3n) # fully conv n.fc6, n.relu6 = conv_relu(n.concat1, 4096, ks=7, dilation=4) if split == 'train': n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True) n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0) n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True) n.fc8 = L.Convolution(n.drop7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) else: n.fc7, n.relu7 = conv_relu(n.relu6, 4096, ks=1, pad=0) if initialize_fc8: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=.01), num_output=2) else: n.fc8 = L.Convolution(n.relu7, kernel_size=1, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], num_output=2) n.upscore = L.Deconvolution(n.fc8, convolution_param=dict( kernel_size=16, stride=8, num_output=2, group=2, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) n.score = L.Crop( n.upscore, n.next_im, crop_param=dict( axis=ax + 1, # +1 for first cropping dim. offset=list(-np.round(b).astype(int)))) if split != 'deploy': n.loss = L.SoftmaxWithLoss(n.score, n.label, loss_param=dict(ignore_label=255)) else: n.prop = L.Softmax(n.score) return n
def custom_net(hdf5, batch_size): # define your own net! n = caffe.NetSpec() #keep this data layer for all networks #HDF5 DATA LAYER n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2) # n.conv_d0a_b = L.Convolution(n.data,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier')) # n.relu_d0b = L.ReLU(n.conv_d0a_b) # n.conv_d0b_c = L.Convolution(n.relu_d0b,kernel_size=3,num_output=64,pad=0,weight_filler=dict(type='xavier')) # n.relu_d0c = L.ReLU(n.conv_d0b_c) # n.pool_d0c_1a = L.Pooling(n.relu_d0c, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv_d0a_b, n.relu_d0b = conv_relu(n.data, 64) n.conv_d0b_c, n.relu_d0c = conv_relu(n.relu_d0b, 64) n.pool_d0c_1a = max_pool(n.relu_d0c) # n.conv_d1a_b = L.Convolution(n.pool_d0c_1a,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier')) # n.relu_d1b = L.ReLU(n.conv_d1a_b) # n.conv_d1b_c = L.Convolution(n.relu_d1b,kernel_size=3,num_output=128,pad=0,weight_filler=dict(type='xavier')) # n.relu_d1c = L.ReLU(n.conv_d1b_c) # n.pool_d1c_2a = L.Pooling(n.relu_d1c, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv_d1a_b, n.relu_d1b = conv_relu(n.pool_d0c_1a, 128) n.conv_d1b_c, n.relu_d1c = conv_relu(n.relu_d1b, 128) n.pool_d1c_2a = max_pool(n.relu_d1c) # n.conv_d2a_b = L.Convolution(n.pool_d1c_2a,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier')) # n.relu_d2b = L.ReLU(n.conv_d2a_b) # n.conv_d2b_c = L.Convolution(n.relu_d2b,kernel_size=3,num_output=256,pad=0,weight_filler=dict(type='xavier')) # n.relu_d2c = L.ReLU(n.conv_d2b_c) # n.pool_d2c_3a = L.Pooling(n.relu_d2c, kernel_size=2,stride = 2,pool = P.Pooling.MAX) n.conv_d2a_b, n.relu_d2b = conv_relu(n.pool_d1c_2a, 256) n.conv_d2b_c, n.relu_d2c = conv_relu(n.relu_d2b, 256) n.pool_d2c_3a = max_pool(n.relu_d2c) # n.conv_d3a_b = L.Convolution(n.pool_d2c_3a,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier')) # n.relu_d3b = L.ReLU(n.conv_d3a_b) # n.conv_d3b_c = L.Convolution(n.relu_d3b,kernel_size=3,num_output=512,pad=0,weight_filler=dict(type='xavier')) # n.relu_d3c = L.ReLU(n.conv_d3b_c) # n.dropout_d3c = L.Dropout(n.relu_d3c,dropout_ratio=0.5) # n.pool_d3c_4a = L.Pooling(n.relu_d3c, kernel_size=2,stride = 2,pool = P.Pooling.MAX) n.conv_d3a_b, n.relu_d3b = conv_relu(n.pool_d2c_3a, 512) n.conv_d3b_c, n.relu_d3c = conv_relu(n.relu_d3b, 512) n.dropout_d3c = L.Dropout(n.relu_d3c, dropout_ratio=0.5) n.pool_d3c_4a = max_pool(n.dropout_d3c) # n.conv_d4a_b = L.Convolution(n.pool_d3c_4a,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier')) # n.relu_d4b = L.ReLU(n.conv_d4a_b) # n.conv_d4b_c = L.Convolution(n.relu_d4b,kernel_size=3,num_output=1024,pad=0,weight_filler=dict(type='xavier')) # n.relu_d4c = L.ReLU(n.conv_d4b_c) # n.dropout_d4c = L.Dropout(n.relu_d4c,dropout_ratio=0.5) # #n.upconv_d4c_u3a = L.DeConvolution(n.dropout_d4c,num_output = 512, pad=0, kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_d4c_u3a = L.Deconvolution(n.dropout_d4c) # n.relu_u3a = L.ReLU(n.upconv_d4c_u3a) n.conv_d4a_b, n.relu_d4b = conv_relu(n.pool_d3c_4a, 1024) n.conv_d4b_c, n.relu_d4c = conv_relu(n.relu_d4b, 1024) n.dropout_d4c = L.Dropout(n.relu_d4c, dropout_ratio=0.5) n.upconv_d4c_u3a, n.relu_u3a = deconv_relu(n.dropout_d4c, 512) # n.crop_d3c_d3cc = L.Crop(n.relu_d3c,n.relu_u3a) # n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a,n.crop_d3c_d3cc) # n.conv_u3b_c = L.Convolution(n.concat_d3cc_u3a_b,num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u3c = L.ReLU(n.conv_u3b_c) # n.conv_u3c_d = L.Convolution(n.relu_u3c, num_output=512,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u3d = L.ReLU(n.conv_u3c_d) # #n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d, num_output=256,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u3d_u2a = L.Deconvolution(n.relu_u3d) # n.relu_u2a = L.ReLU(n.upconv_u3d_u2a) n.crop_d3c_d3cc = L.Crop(n.relu_d3c, n.relu_u3a) n.concat_d3cc_u3a_b = L.Concat(n.relu_u3a, n.crop_d3c_d3cc) n.conv_u3b_c, n.relu_u3c = conv_relu(n.concat_d3cc_u3a_b, 512) n.conv_u3c_d, n.relu_u3d = conv_relu(n.relu_u3c, 512) n.upconv_u3d_u2a, n.relu_u2a = deconv_relu(n.relu_u3d, 256) # n.crop_d2c_d2cc = L.Crop(n.relu_d2c,n.relu_u2a) # n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a,n.crop_d2c_d2cc) # n.conv_u2b_c = L.Convolution(n.concat_d2cc_u2a_b,num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u2c = L.ReLU(n.conv_u2b_c) # n.conv_u2c_d = L.Convolution(n.relu_u2c, num_output=256,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u2d = L.ReLU(n.conv_u2c_d) # #n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d, num_output=128,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u2d_u1a = L.Deconvolution(n.relu_u2d) # n.relu_u1a = L.ReLU(n.upconv_u2d_u1a) n.crop_d2c_d2cc = L.Crop(n.relu_d2c, n.relu_u2a) n.concat_d2cc_u2a_b = L.Concat(n.relu_u2a, n.crop_d2c_d2cc) n.conv_u2b_c, n.relu_u2c = conv_relu(n.concat_d2cc_u2a_b, 256) n.conv_u2c_d, n.relu_u2d = conv_relu(n.relu_u2c, 256) n.upconv_u2d_u1a, n.relu_u1a = deconv_relu(n.relu_u2d, 128) # n.crop_d1c_d1cc = L.Crop(n.relu_d1c,n.relu_u1a) # n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a,n.crop_d1c_d1cc) # n.conv_u1b_c = L.Convolution(n.concat_d1cc_u1a_b,num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u1c = L.ReLU(n.conv_u1b_c) # n.conv_u1c_d = L.Convolution(n.relu_u1c, num_output=128,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u1d = L.ReLU(n.conv_u1c_d) # #n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d, num_output=64,pad =0,kernel_size=2,stride=2,weight_filler=dict(type='xavier')) # n.upconv_u1d_u0a = L.Deconvolution(n.relu_u1d) # n.relu_u0a = L.ReLU(n.upconv_u1d_u0a) n.crop_d1c_d1cc = L.Crop(n.relu_d1c, n.relu_u1a) n.concat_d1cc_u1a_b = L.Concat(n.relu_u1a, n.crop_d1c_d1cc) n.conv_u1b_c, n.relu_u1c = conv_relu(n.concat_d1cc_u1a_b, 128) n.conv_u1c_d, n.relu_u1d = conv_relu(n.relu_u1c, 128) n.upconv_u1d_u0a, n.relu_u0a = deconv_relu(n.relu_u1d, 128) # n.crop_d0c_d0cc = L.Crop(n.relu_d0c,n.relu_u0a) # n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a,n.crop_d0c_d0cc) # n.conv_u0b_c = L.Convolution(n.concat_d0cc_u0a_b,num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u0c = L.ReLU(n.conv_u0b_c) # n.conv_u0c_d = L.Convolution(n.relu_u0c, num_output=64,pad=0,kernel_size=3,weight_filler=dict(type='xavier')) # n.relu_u0d = L.ReLU(n.conv_u0c_d) n.crop_d0c_d0cc = L.Crop(n.relu_d0c, n.relu_u0a) n.concat_d0cc_u0a_b = L.Concat(n.relu_u0a, n.crop_d0c_d0cc) n.conv_u0b_c, n.relu_u0c = conv_relu(n.concat_d0cc_u0a_b, 64) n.conv_u0c_d, n.relu_u0d = conv_relu(n.relu_u0c, 64) n.conv_u0d_score = L.Convolution( n.relu_u0d, num_output=2, pad=0, kernel_size=1, weight_filler=dict(type='xavier'), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) # keep this loss layer for all networks n.loss = L.SoftmaxWithLoss(n.conv_u0d_score, n.label, loss_param=dict(ignore_label=2)) return n.to_proto()
def unet_2d_bn(dim_data, dim_label, num_class, ignore_label=255, phase="train"): net = caffe.NetSpec() ############ d0 ############ net.data = L.Input(input_param=dict(shape=dict(dim=dim_data))) if phase == "train": net.label = L.Input(input_param=dict(shape=dict(dim=dim_label))) net.label_weight = L.Input(input_param=dict(shape=dict(dim=dim_label))) net.d0b_conv, net.d0b_bn, net.d0b_scale, net.d0b_relu = conv_bn_scale_relu( net.data, 64, pad=1, kernel_size=3, stride=1, phase=phase) net.d0c_conv, net.d0c_bn, net.d0c_scale, net.d0c_relu = conv_bn_scale_relu( net.d0b_relu, 64, pad=1, kernel_size=3, stride=1, phase=phase) ############ d1 ############ net.d1a_pool = max_pool(net.d0c_relu, pad=0, kernel_size=2, stride=2) net.d1b_conv, net.d1b_bn, net.d1b_scale, net.d1b_relu = conv_bn_scale_relu( net.d1a_pool, 128, pad=1, kernel_size=3, stride=1, phase=phase) net.d1c_conv, net.d1c_bn, net.d1c_scale, net.d1c_relu = conv_bn_scale_relu( net.d1b_relu, 128, pad=1, kernel_size=3, stride=1, phase=phase) ############ d2 ############ net.d2a_pool = max_pool(net.d1c_relu, pad=0, kernel_size=2, stride=2) net.d2b_conv, net.d2b_bn, net.d2b_scale, net.d2b_relu = conv_bn_scale_relu( net.d2a_pool, 256, pad=1, kernel_size=3, stride=1, phase=phase) net.d2c_conv, net.d2c_bn, net.d2c_scale, net.d2c_relu = conv_bn_scale_relu( net.d2b_relu, 256, pad=1, kernel_size=3, stride=1, phase=phase) ############ d3 ############ net.d3a_pool = max_pool(net.d2c_relu, pad=0, kernel_size=2, stride=2) net.d3b_conv, net.d3b_bn, net.d3b_scale, net.d3b_relu = conv_bn_scale_relu( net.d3a_pool, 512, pad=1, kernel_size=3, stride=1, phase=phase) net.d3c_conv, net.d3c_bn, net.d3c_scale, net.d3c_relu = conv_bn_scale_relu( net.d3b_relu, 512, pad=1, kernel_size=3, stride=1, phase=phase) ############ d4 ############ net.d4a_pool = max_pool(net.d3c_relu, pad=0, kernel_size=2, stride=2) net.d4b_conv, net.d4b_bn, net.d4b_scale, net.d4b_relu = conv_bn_scale_relu( net.d4a_pool, 1024, pad=1, kernel_size=3, stride=1, phase=phase) net.d4c_conv, net.d4c_bn, net.d4c_scale, net.d4c_relu = conv_bn_scale_relu( net.d4b_relu, 1024, pad=1, kernel_size=3, stride=1, phase=phase) ############ u3 ############ ### a ### First Deconvolution net.u3a_dconv, net.u3a_bn, net.u3a_scale, net.u3a_relu = deconv_bn_scale_relu( net.d4c_relu, 512, pad=0, kernel_size=2, stride=2, phase=phase) ### b ### Crop and Concat net.u3b_crop = L.Crop(net.u3a_relu, net.d3c_relu, axis=2, offset=0) #net.u3b_crop = crop(net.u3a_relu, net.d3c_relu) net.u3b_concat = L.Concat(net.u3b_crop, net.d3c_relu, axis=1) ### c ### net.u3c_conv, net.u3c_bn, net.u3c_scale, net.u3c_relu = conv_bn_scale_relu( net.u3b_concat, 512, pad=1, kernel_size=3, stride=1, phase=phase) ### d ### net.u3d_conv, net.u3d_bn, net.u3d_scale, net.u3d_relu = conv_bn_scale_relu( net.u3c_relu, 512, pad=1, kernel_size=3, stride=1, phase=phase) ############ u2 ############ ### a ### Second Deconvolution net.u2a_dconv, net.u2a_bn, net.u2a_scale, net.u2a_relu = deconv_bn_scale_relu( net.u3d_relu, 256, pad=0, kernel_size=2, stride=2, phase=phase) ### b ### Crop and Concat net.u2b_crop = L.Crop(net.u2a_relu, net.d2c_relu, axis=2, offset=0) #net.u2b_crop = crop(net.u2a_relu, net.d2c_relu) net.u2b_concat = L.Concat(net.u2b_crop, net.d2c_relu, axis=1) ### c ### net.u2c_conv, net.u2c_bn, net.u2c_scale, net.u2c_relu = conv_bn_scale_relu( net.u2b_concat, 256, pad=1, kernel_size=3, stride=1, phase=phase) ### d ### net.u2d_conv, net.u2d_bn, net.u2d_scale, net.u2d_relu = conv_bn_scale_relu( net.u2c_relu, 256, pad=1, kernel_size=3, stride=1, phase=phase) ############ u1 ############ ### a ### Third Deconvolution net.u1a_dconv, net.u1a_bn, net.u1a_scale, net.u1a_relu = deconv_bn_scale_relu( net.u2d_relu, 128, pad=0, kernel_size=2, stride=2, phase=phase) ### b ### Crop and Concat net.u1b_crop = L.Crop(net.u1a_relu, net.d1c_relu, axis=2, offset=0) #net.u1b_crop = crop(net.u1a_relu, net.d1c_relu) net.u1b_concat = L.Concat(net.u1b_crop, net.d1c_relu, axis=1) ### c ### net.u1c_conv, net.u1c_bn, net.u1c_scale, net.u1c_relu = conv_bn_scale_relu( net.u1b_concat, 128, pad=1, kernel_size=3, stride=1, phase=phase) ### d ### net.u1d_conv, net.u1d_bn, net.u1d_scale, net.u1d_relu = conv_bn_scale_relu( net.u1c_relu, 128, pad=1, kernel_size=3, stride=1, phase=phase) ############ u0 ############ ### a ### Fourth Deconvolution net.u0a_dconv, net.u0a_bn, net.u0a_scale, net.u0a_relu = deconv_bn_scale_relu( net.u1d_relu, 64, pad=0, kernel_size=2, stride=2, phase=phase) ### b ### Crop and Concat net.u0b_crop = L.Crop(net.u0a_relu, net.d0c_relu, axis=2, offset=0) #net.u0b_crop = crop(net.u0a_relu, net.d0c_relu) net.u0b_concat = L.Concat(net.u0b_crop, net.d0c_relu, axis=1) ### c ### net.u0c_conv, net.u0c_bn, net.u0c_scale, net.u0c_relu = conv_bn_scale_relu( net.u0b_concat, 64, pad=1, kernel_size=3, stride=1, phase=phase) ### d ### net.u0d_conv, net.u0d_bn, net.u0d_scale, net.u0d_relu = conv_bn_scale_relu( net.u0c_relu, 64, pad=1, kernel_size=3, stride=1, phase=phase) ############ Score ############ net.score = L.Convolution( net.u0d_relu, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=num_class, pad=0, kernel_size=1, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) ############ Loss ############ if phase == "train": net.loss = L.WeightedSoftmaxWithLoss( net.score, net.label, net.label_weight, phase=0, loss_weight=1, loss_param=dict(ignore_label=ignore_label)) return net.to_proto()
def construct_fcn(image_lmdb, contour_lmdb, batch_size=1, include_acc=False): net = caffe.NetSpec() # args for convlution layers weight_filler = dict(type='gaussian', mean=0.0, std=0.01) bias_filler = dict(type='constant', value=0.1) param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)] net.data = L.Data(source=image_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1, transform_param=dict(crop_size=0, mean_value=[77], mirror=False)) net.label = L.Data(source=contour_lmdb, backend=P.Data.LMDB, batch_size=batch_size, ntop=1) # conv-relu-pool 1 net.conv1 = L.Convolution(net.data, kernel_size=5, stride=2, num_output=100, pad=50, group=1, weight_filler=weight_filler, bias_filler=bias_filler, param=param) net.relu1 = L.ReLU(net.conv1, in_place=True) net.pool1 = L.Pooling(net.relu1, pool=P.Pooling.MAX, kernel_size=2, stride=2) # conv-relu-pool 2 net.conv2 = L.Convolution(net.pool1, kernel_size=5, stride=2, num_output=200, pad=0, group=1, weight_filler=weight_filler, bias_filler=bias_filler, param=param) net.relu2 = L.ReLU(net.conv2, in_place=True) net.pool2 = L.Pooling(net.relu2, pool=P.Pooling.MAX, kernel_size=2, stride=2) net.conv3 = L.Convolution(net.pool2, kernel_size=3, stride=1, num_output=300, pad=0, group=1, weight_filler=weight_filler, bias_filler=bias_filler, param=param) net.relu3 = L.ReLU(net.conv3, in_place=True) net.conv4 = L.Convolution(net.relu3, kernel_size=3, stride=1, num_output=300, pad=0, group=1, weight_filler=weight_filler, bias_filler=bias_filler, param=param) net.relu4 = L.ReLU(net.conv4, in_place=True) net.drop = L.Dropout(net.relu4, dropout_ratio=0.1, in_place=True) net.score_classes = L.Convolution(net.drop, kernel_size=1, stride=1, num_output=2, pad=0, group=1, weight_filler=weight_filler, bias_filler=bias_filler, param=param) net.upscore = L.Deconvolution(net.score_classes) net.score = L.Crop(net.upscore, net.data) net.loss = L.SoftmaxWithLoss(net.score, net.label, loss_param=dict(normalize=True)) if include_acc: net.accuracy = L.Accuracy(net.score, net.label) return net.to_proto()
def jsonToPrototxt(net, net_name): # assumption: a layer can accept only one input blob # the data layer produces two blobs: data and label # the loss layer requires two blobs: <someData> and label # the label blob is hardcoded. # layers name have to be unique # custom DFS of the network input_dim = None def get_iterable(x): if isinstance(x, collections.Iterable): return x else: return (x, ) stack = [] layersProcessed = {} processOrder = [] blobNames = {} for layerId in net: layersProcessed[layerId] = False blobNames[layerId] = { 'bottom': [], 'top': [], } blobId = 0 def isProcessPossible(layerId): inputs = net[layerId]['connection']['input'] for layerId in inputs: if layersProcessed[layerId] is False: return False return True # finding the data layer for layerId in net: if (net[layerId]['info']['type'] == 'Data' or net[layerId]['info']['type'] == 'Input' or net[layerId]['info']['type'] == 'HDF5Data'): stack.append(layerId) def changeTopBlobName(layerId, newName): blobNames[layerId]['top'] = newName while len(stack): i = len(stack) - 1 while isProcessPossible(stack[i]) is False: i = i - 1 layerId = stack[i] stack.remove(stack[i]) inputs = net[layerId]['connection']['input'] if len(inputs) > 0: if len(inputs) == 2 and (net[inputs[0]]['info']['phase'] is not None) \ and (net[inputs[1]]['info']['phase']): commonBlobName = blobNames[inputs[0]]['top'] changeTopBlobName(inputs[1], commonBlobName) blobNames[layerId]['bottom'] = commonBlobName else: inputBlobNames = [] for inputId in inputs: inputBlobNames.extend(blobNames[inputId]['top']) blobNames[layerId]['bottom'] = inputBlobNames blobNames[layerId]['top'] = ['blob' + str(blobId)] blobId = blobId + 1 for outputId in net[layerId]['connection']['output']: if outputId not in stack: stack.append(outputId) layersProcessed[layerId] = True processOrder.append(layerId) ns_train = caffe.NetSpec() ns_test = caffe.NetSpec() for layerId in processOrder: layer = net[layerId] layerParams = layer['params'] layerType = layer['info']['type'] layerPhase = layer['info']['phase'] if (layerType == 'Data' or layerType == 'Input'): # This is temporary # Has to be improved later # If we have data layer then it is converted to input layer with some default dimensions ''' data_param = {} if layerParams['source'] != '': data_param['source'] = layerParams['source'] # hardcoding mnsit dataset -change this later if layerPhase is not None: if int(layerPhase) == 0: data_param['source'] = 'examples/mnist/mnist_train_lmdb' elif int(layerPhase) == 1: data_param['source'] = 'examples/mnist/mnist_test_lmdb' if layerParams['batch_size'] != '': data_param['batch_size'] = int(float(layerParams['batch_size'])) if layerParams['backend'] != '': backend = layerParams['backend'] if(backend == 'LEVELDB'): backend = 0 elif(backend == 'LMDB'): backend = 1 data_param['backend'] = backend transform_param = {} if layerParams['scale'] != '': transform_param['scale'] = float(layerParams['scale']) if layerPhase is not None: caffeLayer = get_iterable(L.Data( ntop=1, transform_param=transform_param, data_param=data_param, include={ 'phase': int(layerPhase) })) if int(layerPhase) == 0: #for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_train[key] = value elif int(layerPhase) == 1: #for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_test[key] = value else: for ns in (ns_train,ns_test): caffeLayer = get_iterable(L.Data( ntop=2, transform_param=transform_param, data_param=data_param)) #for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value ''' if 'dim' not in layerParams: layerParams['dim'] = '10,3,224,224' input_dim = layerParams['dim'] if layerPhase is not None: caffeLayer = get_iterable( L.Input(input_param={ 'shape': { 'dim': map(int, layerParams['dim'].split(',')) } }, include={'phase': int(layerPhase)})) if int(layerPhase) == 0: # for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_train[key] = value elif int(layerPhase) == 1: # for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_test[key] = value else: for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Input( input_param={ 'shape': { 'dim': map(int, layerParams['dim'].split( ',')) } })) # for key, value in zip(blobNames[layerId]['top'] + ['label'], caffeLayer): for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Crop'): crop_param = {} if layerParams['axis'] != '': crop_param['axis'] = int(float(layerParams['axis'])) if layerParams['offset'] != '': crop_param['offset'] = int(float(layerParams['offset'])) for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Crop(*[ns[x] for x in blobNames[layerId]['bottom']], crop_param=crop_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Convolution'): convolution_param = {} if layerParams['kernel_h'] != '': convolution_param['kernel_h'] = int( float(layerParams['kernel_h'])) if layerParams['kernel_w'] != '': convolution_param['kernel_w'] = int( float(layerParams['kernel_w'])) if layerParams['stride_h'] != '': convolution_param['stride_h'] = int( float(layerParams['stride_h'])) if layerParams['stride_w'] != '': convolution_param['stride_w'] = int( float(layerParams['stride_w'])) if layerParams['num_output'] != '': convolution_param['num_output'] = int( float(layerParams['num_output'])) if layerParams['pad_h'] != '': convolution_param['pad_h'] = int(float(layerParams['pad_h'])) if layerParams['pad_w'] != '': convolution_param['pad_w'] = int(float(layerParams['pad_w'])) if layerParams['weight_filler'] != '': convolution_param['weight_filler'] = {} convolution_param['weight_filler']['type'] = layerParams[ 'weight_filler'] if layerParams['bias_filler'] != '': convolution_param['bias_filler'] = {} convolution_param['bias_filler']['type'] = layerParams[ 'bias_filler'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Convolution( *[ns[x] for x in blobNames[layerId]['bottom']], convolution_param=convolution_param, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }])) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Deconvolution'): convolution_param = {} if layerParams['kernel_h'] != '': convolution_param['kernel_h'] = int( float(layerParams['kernel_h'])) if layerParams['kernel_w'] != '': convolution_param['kernel_w'] = int( float(layerParams['kernel_w'])) if layerParams['stride_h'] != '': convolution_param['stride_h'] = int( float(layerParams['stride_h'])) if layerParams['stride_w'] != '': convolution_param['stride_w'] = int( float(layerParams['stride_w'])) if layerParams['num_output'] != '': convolution_param['num_output'] = int( float(layerParams['num_output'])) if layerParams['pad_h'] != '': convolution_param['pad_h'] = int(float(layerParams['pad_h'])) if layerParams['pad_w'] != '': convolution_param['pad_w'] = int(float(layerParams['pad_w'])) if layerParams['weight_filler'] != '': convolution_param['weight_filler'] = {} convolution_param['weight_filler']['type'] = layerParams[ 'weight_filler'] if layerParams['bias_filler'] != '': convolution_param['bias_filler'] = {} convolution_param['bias_filler']['type'] = layerParams[ 'bias_filler'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Deconvolution( *[ns[x] for x in blobNames[layerId]['bottom']], convolution_param=convolution_param, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }])) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'ReLU'): inplace = layerParams['inplace'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.ReLU(*[ns[x] for x in blobNames[layerId]['bottom']], in_place=inplace)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Pooling'): pooling_param = {} if layerParams['kernel_h'] != '': pooling_param['kernel_h'] = int(float(layerParams['kernel_h'])) if layerParams['kernel_w'] != '': pooling_param['kernel_w'] = int(float(layerParams['kernel_w'])) if layerParams['stride_h'] != '': pooling_param['stride_h'] = int(float(layerParams['stride_h'])) if layerParams['stride_w'] != '': pooling_param['stride_w'] = int(float(layerParams['stride_w'])) if layerParams['pad_h'] != '': pooling_param['pad_h'] = int(float(layerParams['pad_h'])) if layerParams['pad_w'] != '': pooling_param['pad_w'] = int(float(layerParams['pad_w'])) if layerParams['pool'] != '': pool = layerParams['pool'] if (pool == 'MAX'): pool = 0 elif (pool == 'AVE'): pool = 1 elif (pool == 'STOCHASTIC'): pool = 2 pooling_param['pool'] = pool for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Pooling(*[ns[x] for x in blobNames[layerId]['bottom']], pooling_param=pooling_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'InnerProduct'): inner_product_param = {} if layerParams['num_output'] != '': inner_product_param['num_output'] = int( float(layerParams['num_output'])) if layerParams['weight_filler'] != '': inner_product_param['weight_filler'] = {} inner_product_param['weight_filler']['type'] = layerParams[ 'weight_filler'] if layerParams['bias_filler'] != '': inner_product_param['bias_filler'] = {} inner_product_param['bias_filler']['type'] = layerParams[ 'bias_filler'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.InnerProduct( *[ns[x] for x in blobNames[layerId]['bottom']], inner_product_param=inner_product_param, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }])) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'SoftmaxWithLoss'): pass for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.SoftmaxWithLoss( # try L['SoftmaxWithLoss'] *([ns[x] for x in blobNames[layerId]['bottom']]))) # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Accuracy'): pass if layerPhase is not None: caffeLayer = get_iterable( L.Accuracy( *([ns[x] for x in blobNames[layerId]['bottom']]), # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]), include={'phase': int(layerPhase)})) if int(layerPhase) == 0: for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_train[key] = value elif int(layerPhase) == 1: for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns_test[key] = value else: for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Accuracy( *([ns[x] for x in blobNames[layerId]['bottom']]))) # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Dropout'): # inplace dropout? caffe-tensorflow do not work inplace = layerParams['inplace'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Dropout(*[ns[x] for x in blobNames[layerId]['bottom']], in_place=inplace)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'LRN'): for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.LRN(*[ns[x] for x in blobNames[layerId]['bottom']])) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Concat'): for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Concat(*[ns[x] for x in blobNames[layerId]['bottom']], ntop=len(blobNames[layerId]['top']))) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Eltwise'): eltwise_param = {} if layerParams['operation'] != '': elt = layerParams['operation'] if (elt == 'PROD'): elt = 0 elif (elt == 'SUM'): elt = 1 elif (elt == 'MAX'): elt = 2 else: elt = 1 # Default is sum eltwise_param['operation'] = elt for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Eltwise(*[ns[x] for x in blobNames[layerId]['bottom']], eltwise_param=eltwise_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Softmax'): for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Softmax(*([ns[x] for x in blobNames[layerId]['bottom']]))) # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Embed'): for ns in (ns_train, ns_test): print ns.tops caffeLayer = get_iterable( L.Embed(*[ns[x] for x in blobNames[layerId]['bottom']], param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }])) # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'LSTM'): recurrent_param = {} if layerParams['num_output'] != '': recurrent_param['num_output'] = int(layerParams['num_output']) if layerParams['weight_filler'] != '': recurrent_param['weight_filler'] = { 'type': layerParams['weight_filler'] } if layerParams['bias_filler'] != '': recurrent_param['bias_filler'] = { 'type': layerParams['bias_filler'] } for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.LSTM(*[ns[x] for x in blobNames[layerId]['bottom']], recurrent_param=recurrent_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Reshape'): reshape_param = { 'shape': { 'dim': map(int, layerParams['dim'].split(',')) } } for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Reshape(*[ns[x] for x in blobNames[layerId]['bottom']], reshape_param=reshape_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'HDF5Data'): layerPhase = layer['info']['phase'] hdf5_data_param = {} if layerParams['source'] != '': hdf5_data_param['source'] = layerParams['source'] if layerParams['batch_size'] != '': hdf5_data_param['batch_size'] = layerParams['batch_size'] for ns in (ns_train, ns_test): if layerPhase is not None: caffeLayer = get_iterable( L.HDF5Data( *[ns[x] for x in blobNames[layerId]['bottom']], hdf5_data_param=hdf5_data_param, include={'phase': int(layerPhase)})) else: caffeLayer = get_iterable( L.HDF5Data( *[ns[x] for x in blobNames[layerId]['bottom']], hdf5_data_param=hdf5_data_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'BatchNorm'): batch_norm_param = {} if layerParams['use_global_stats'] != '': batch_norm_param['use_global_stats'] = layerParams[ 'use_global_stats'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.BatchNorm(*[ns[x] for x in blobNames[layerId]['bottom']], batch_norm_param=batch_norm_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Scale'): scale_param = {} if layerParams['bias_term'] != '': scale_param['bias_term'] = layerParams['bias_term'] for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Scale(*[ns[x] for x in blobNames[layerId]['bottom']], scale_param=scale_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value elif (layerType == 'Eltwise'): eltwise_param = {} if layerParams['operation'] != '': eltwise_param['operation'] = int(layerParams['operation']) for ns in (ns_train, ns_test): caffeLayer = get_iterable( L.Eltwise(*[ns[x] for x in blobNames[layerId]['bottom']], eltwise_param=eltwise_param)) for key, value in zip(blobNames[layerId]['top'], caffeLayer): ns[key] = value train = 'name: "' + net_name + '"\n' + str(ns_train.to_proto()) test = str(ns_test.to_proto()) # merge the train and test prototxt to get a single train_test prototxt testIndex = [m.start() for m in re.finditer('layer', test)] previousIndex = -1 for i in range(len(testIndex)): if i < len(testIndex) - 1: layer = test[testIndex[i]:testIndex[i + 1]] else: layer = test[testIndex[i]:] a = train.find(layer) if a != -1: l = test[testIndex[previousIndex + 1]:testIndex[i]] train = train[0:a] + l + train[a:] previousIndex = i if previousIndex < len(testIndex) - 1: l = test[testIndex[previousIndex + 1]:] train = train + l prototxt = train return prototxt, input_dim
def net(split): n = caffe.NetSpec() loss_param = dict(normalize=False) if split=='train': data_params = dict(mean=(104.00699, 116.66877, 122.67892)) # 图像与标签 data_params['root'] = './datasets/Total_Text_WSR' data_params['source'] = "Total_Text_WSR.lst" data_params['shuffle'] = True data_params['ignore_label'] = -1 n.data, n.label = L.Python(module='pylayer_old', layer='ImageLabelmapDataLayer', ntop=2, \ param_str=str(data_params)) if data_params.has_key('ignore_label'): loss_param['ignore_label'] = int(data_params['ignore_label']) elif split == 'test': n.data = L.Input(name = 'data', input_param=dict(shape=dict(dim=[1,3,500,500]))) else: raise Exception("Invalid phase") #第一个卷积阶段 n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=1) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64) n.pool1 = max_pool(n.relu1_2) #第二个卷积阶段 n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128) n.pool2 = max_pool(n.relu2_2) #第三个卷积阶段 n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256) # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 64, kernel: 3*3 n.conv3_dilation1 = conv_dilation01(n.conv3_3, mult=[100,1,200,0]) n.conv3_dilation2 = conv_dilation03(n.conv3_3, mult=[100,1,200,0]) n.conv3_dilation3 = conv_dilation05(n.conv3_3, mult=[100,1,200,0]) n.conv3_dilation4 = conv_dilation07(n.conv3_3, mult=[100,1,200,0]) # 在Channel维度上进行拼接 n.concat_conv33 = L.Concat(n.conv3_dilation1, n.conv3_dilation2, n.conv3_dilation3, n.conv3_dilation4, concat_param=dict({'concat_dim':1})) # MCFE模块后接BLSTM module # # ===================== prepare lstm inputs ===================== n.im2col_conv33 = L.Im2col(n.concat_conv33, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv33 = L.Transpose(n.im2col_conv33, transpose_param =dict(dim=[3,2,0,1])) n.lstm_input_conv33 = L.Reshape(n.im2col_transpose_conv33, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv33 = L.Lstm(n.lstm_input_conv33,lstm_param =dict(num_output=128,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) #后向LSTM n.rlstm_input_conv33 = L.Reverse(n.lstm_input_conv33, name='lstm_reverse1_conv33', reverse_param =dict(axis=0)) n.rlstm_output_conv33= L.Lstm(n.rlstm_input_conv33, name='rlstm_conv33', lstm_param =dict(num_output=128)) n.rlstm_conv33 = L.Reverse(n.rlstm_output_conv33, name='lstm_reverse2_conv33', reverse_param =dict(axis=0)) # lstm_conv33 和 rlstm_conv33经过Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv33 = L.Concat(n.lstm_conv33, n.rlstm_conv33, concat_param=dict(axis=2)) n.lstm_output_reshape_conv33 = L.Reshape(n.merge_lstm_rlstm_conv33, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv33 = L.Transpose(n.lstm_output_reshape_conv33,transpose_param=dict(dim=[2,3,1,0])) n.pool3 = max_pool(n.relu3_3) # 第四个卷积阶段 n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512) # 第三个卷积阶段最后一个卷积层,接一个MCFE模块, Channel: 128, kernel: 3*3 n.conv4_dilation1 = conv_dilation1(n.conv4_3, mult=[100,1,200,0]) n.conv4_dilation2 = conv_dilation3(n.conv4_3, mult=[100,1,200,0]) n.conv4_dilation3 = conv_dilation5(n.conv4_3, mult=[100,1,200,0]) n.conv4_dilation4 = conv_dilation7(n.conv4_3, mult=[100,1,200,0]) # 在Channel维度上进行拼接, n*(c1+c2+...+ck)*h*w n.concat_conv43 = L.Concat(n.conv4_dilation1, n.conv4_dilation2, n.conv4_dilation3, n.conv4_dilation4, concat_param=dict({'concat_dim':1})) # BLSTM module # # ===================== prepare lstm inputs ===================== n.im2col_conv43 = L.Im2col(n.concat_conv43, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv43 = L.Transpose(n.im2col_conv43, transpose_param =dict(dim=[3,2,0,1])) n.lstm_input_conv43 = L.Reshape(n.im2col_transpose_conv43, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv43 = L.Lstm(n.lstm_input_conv43,lstm_param =dict(num_output=256,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) # 后向LSTM n.rlstm_input_conv43 = L.Reverse(n.lstm_input_conv43, name='lstm_reverse1_conv43', reverse_param =dict(axis=0)) n.rlstm_output_conv43= L.Lstm(n.rlstm_input_conv43, name='rlstm_conv43', lstm_param =dict(num_output=256)) n.rlstm_conv43 = L.Reverse(n.rlstm_output_conv43, name='lstm_reverse2_conv43', reverse_param =dict(axis=0)) #lstm_conv43 和 rlstm_conv43经Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv43 = L.Concat(n.lstm_conv43, n.rlstm_conv43, concat_param=dict(axis=2)) n.lstm_output_reshape_conv43 = L.Reshape(n.merge_lstm_rlstm_conv43, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv43 = L.Transpose(n.lstm_output_reshape_conv43,transpose_param=dict(dim=[2,3,1,0])) n.pool4 = max_pool(n.relu4_3) # The fiveth conv stage n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512) n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512) n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512) # MCFE inception module, Channel: 128, kernel: 3*3 n.conv5_dilation1 = conv_dilation1(n.conv5_3, mult=[100,1,200,0]) n.conv5_dilation2 = conv_dilation3(n.conv5_3, mult=[100,1,200,0]) n.conv5_dilation3 = conv_dilation5(n.conv5_3, mult=[100,1,200,0]) n.conv5_dilation4 = conv_dilation7(n.conv5_3, mult=[100,1,200,0]) n.concat_conv53 = L.Concat(n.conv5_dilation1, n.conv5_dilation2, n.conv5_dilation3, n.conv5_dilation4, concat_param=dict({'concat_dim':1})) # BLSTM module # ===================== prepare lstm inputs ===================== n.im2col_conv53 = L.Im2col(n.concat_conv53, convolution_param=dict(kernel_size=3, pad=1)) n.im2col_transpose_conv53 = L.Transpose(n.im2col_conv53, transpose_param =dict(dim=[3,2,0,1])) n.lstm_input_conv53 = L.Reshape(n.im2col_transpose_conv53, reshape_param =dict(shape=dict(dim=-1), axis=1, num_axes=2)) # 前向LSTM n.lstm_conv53 = L.Lstm(n.lstm_input_conv53,lstm_param =dict(num_output=256,weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), clipping_threshold=1)) #后向LSTM n.rlstm_input_conv53 = L.Reverse(n.lstm_input_conv53, name='lstm_reverse1_conv53', reverse_param =dict(axis=0)) n.rlstm_output_conv53= L.Lstm(n.rlstm_input_conv53, name='rlstm_conv53', lstm_param =dict(num_output=256)) n.rlstm_conv53 = L.Reverse(n.rlstm_output_conv53, name='lstm_reverse2_conv53', reverse_param =dict(axis=0)) # lstm_conv53和rlstm_conv53经过Concat拼接,n*c*(h1+h2+...+hk)*w n.merge_lstm_rlstm_conv53 = L.Concat(n.lstm_conv53, n.rlstm_conv53, concat_param=dict(axis=2)) n.lstm_output_reshape_conv53 = L.Reshape(n.merge_lstm_rlstm_conv53, reshape_param=dict(shape=dict(dim=[-1,1]), axis=1, num_axes=1)) # transpose size of output as (N, C, H, W) n.lstm_output_conv53 = L.Transpose(n.lstm_output_reshape_conv53,transpose_param=dict(dim=[2,3,1,0])) # 第三个阶段,BLSTM的输出,经过1x1的卷积降维,4x上采样,裁剪成与原图像大小相同 n.score_dsn3 = conv1x1(n.lstm_output_conv33, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn3_up = upsample(n.score_dsn3, stride=4) n.upscore_dsn3 = L.Crop(n.score_dsn3_up, n.data) # BalanceCrossEntropyLoss if split=='train': n.loss3 = L.BalanceCrossEntropyLoss(n.upscore_dsn3, n.label, loss_param=loss_param) if split=='test': n.sigmoid_dsn3 = L.Sigmoid(n.upscore_dsn3) #第四个阶段,BLSTM的输出,经过1x1的卷积降维,8x上采样,裁剪成与原图像大小相同 n.score_dsn4 = conv1x1(n.lstm_output_conv43, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn4_up = upsample(n.score_dsn4, stride=8) n.upscore_dsn4 = L.Crop(n.score_dsn4_up, n.data) # BalanceCrossEntropyLoss if split=='train': n.loss4 = L.BalanceCrossEntropyLoss(n.upscore_dsn4, n.label, loss_param=loss_param) if split=='test': n.sigmoid_dsn4 = L.Sigmoid(n.upscore_dsn4) # 第五个阶段,BLSTM的输出,经过1x1的卷积降维,16x上采样,裁剪成与原图像大小相同 n.score_dsn5 = conv1x1(n.lstm_output_conv53, lr=[0.01, 1, 0.02, 0], wf=dict(type='gaussian', std=0.01)) n.score_dsn5_up = upsample(n.score_dsn5, stride=16) n.upscore_dsn5 = L.Crop(n.score_dsn5_up, n.data) # BalanceCrossEntropyLoss if split=='train': n.loss5 = L.BalanceCrossEntropyLoss(n.upscore_dsn5, n.label, loss_param=loss_param) if split=='test': n.sigmoid_dsn5 = L.Sigmoid(n.upscore_dsn5) # 将三个阶段的输出,在Channel维度上进行拼接,作为Attention模块的输入 n.concat_upscore = L.Concat(n.upscore_dsn3, n.upscore_dsn4, n.upscore_dsn5, name='concat', concat_param=dict({'concat_dim':1})) # upscore_dsn3,upscore_dsn4,upscore_dsn5经3X3的卷积, 降维 n.output_mask_product03 = L.Convolution(n.upscore_dsn3, num_output=1, kernel_size=3,pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product04 = L.Convolution(n.upscore_dsn4, num_output=1, kernel_size=3,pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) n.output_mask_product05 = L.Convolution(n.upscore_dsn5, num_output=1, kernel_size=3,pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant'), engine=1) ### Attention 模块 # 第一个卷积层num_output=512, kernel_size:3x3 n.att_conv1_mask_512 = L.Convolution(n.concat_upscore, num_output=512, kernel_size=3,pad=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.relu_att_conv1 = L.ReLU(n.att_conv1_mask_512, in_place=True) n.drop_att_conv1_mask = L.Dropout(n.relu_att_conv1, dropout_ratio=0.5, in_place=True) # 第二个卷积层num_output=3, kernel_size:1x1 n.att_fc_mask_512 = L.Convolution(n.drop_att_conv1_mask, num_output=3, kernel_size=1, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], engine=1) n.attention = L.Softmax(n.att_fc_mask_512) # 生成三个注意力权重 n.attention3,n.attention4,n.attention5= L.Slice(n.attention, name='slice_attention', slice_param=dict(axis=1, slice_point=[1,2]), ntop=3) # 注意力权重与feature map相乘,进行融合 n.output_mask3 = L.Eltwise(n.attention3, n.output_mask_product03,operation=P.Eltwise.PROD) n.output_mask4 = L.Eltwise(n.attention4, n.output_mask_product04,operation=P.Eltwise.PROD) n.output_mask5 = L.Eltwise(n.attention5, n.output_mask_product05,operation=P.Eltwise.PROD) n.output_fusion = L.Eltwise(n.output_mask3, n.output_mask4, n.output_mask5, operation=P.Eltwise.SUM) #作为对比,不经过Attention模块, 将三个阶段的输出,在Channel维度上进行拼接,经1X1的卷积,输出 n.upscore_fuse = L.Convolution(n.concat_upscore, name='new-score-weighting', num_output=1, kernel_size=1, param=[dict(lr_mult=0.001, decay_mult=1), dict(lr_mult=0.002, decay_mult=0)], weight_filler=dict(type='constant', value=0.2), engine=1) if split=='train': n.loss_fuse = L.BalanceCrossEntropyLoss(n.upscore_fuse, n.label, loss_param=loss_param) n.loss_output_fusion = L.BalanceCrossEntropyLoss(n.output_fusion, n.label, loss_param=loss_param) if split=='test': n.sigmoid_fuse = L.Sigmoid(n.upscore_fuse) n.sigmoid_output_fusion= L.Sigmoid(n.output_fusion) return n.to_proto()
def UNet3DBN(input_dims, class_nums, phase="TRAIN"): net = caffe.NetSpec() ############ d0 ############ ### a ### net.data = L.Input(input_param=dict(shape=dict(dim=input_dims))) if phase == "TRAIN": net.label = L.Input(input_param=dict(shape=dict(dim=input_dims))) net.label_weight = L.Input(input_param=dict(shape=dict( dim=input_dims))) ### b ### net.d0b_conv = L.Convolution( net.data, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=32, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d0b_bn = L.BatchNorm( net.d0b_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d0b_bn = L.BatchNorm(net.d0b_conv, use_global_stats=1) net.d0b_scale = L.Scale(net.d0b_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d0b_relu = L.ReLU(net.d0b_scale, in_place=True, engine=1) ### c ### net.d0c_conv = L.Convolution( net.d0b_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=32, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d0c_bn = L.BatchNorm( net.d0c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d0c_bn = L.BatchNorm(net.d0c_conv, use_global_stats=1) net.d0c_scale = L.Scale(net.d0c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d0c_relu = L.ReLU(net.d0c_scale, in_place=True, engine=1) ############ d1 ############ ### a ### First pooling net.d1a_pool = L.PoolingND(net.d0c_scale, pool=0, kernel_size=[2, 2, 1], stride=[2, 2, 1], engine=1) ### b ### net.d1b_conv = L.Convolution( net.d1a_pool, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=64, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d1b_bn = L.BatchNorm( net.d1b_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d1b_bn = L.BatchNorm(net.d1b_conv, use_global_stats=1) net.d1b_scale = L.Scale(net.d1b_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d1b_relu = L.ReLU(net.d1b_scale, in_place=True, engine=1) ### c ### net.d1c_conv = L.Convolution( net.d1b_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=64, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d1c_bn = L.BatchNorm( net.d1c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d1c_bn = L.BatchNorm(net.d1c_conv, use_global_stats=1) net.d1c_scale = L.Scale(net.d1c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d1c_relu = L.ReLU(net.d1c_scale, in_place=True, engine=1) ############ d2 ############ ### a ### net.d2a_pool = L.PoolingND(net.d1c_scale, pool=0, kernel_size=2, stride=2, engine=1) ### b ### net.d2b_conv = L.Convolution( net.d2a_pool, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=128, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d2b_bn = L.BatchNorm( net.d2b_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d2b_bn = L.BatchNorm(net.d2b_conv, use_global_stats=1) net.d2b_scale = L.Scale(net.d2b_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d2b_relu = L.ReLU(net.d2b_scale, in_place=True, engine=1) ### c ### net.d2c_conv = L.Convolution( net.d2b_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=128, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d2c_bn = L.BatchNorm( net.d2c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d2c_bn = L.BatchNorm(net.d2c_conv, use_global_stats=1) net.d2c_scale = L.Scale(net.d2c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d2c_relu = L.ReLU(net.d2c_scale, in_place=True, engine=1) ############ d3 ############ ### a ### Third Pooling net.d3a_pool = L.PoolingND(net.d2c_scale, pool=0, kernel_size=2, stride=2, engine=1) ### b ### net.d3b_conv = L.Convolution( net.d3a_pool, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=256, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d3b_bn = L.BatchNorm( net.d3b_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d3b_bn = L.BatchNorm(net.d3b_conv, use_global_stats=1) net.d3b_scale = L.Scale(net.d3b_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d3b_relu = L.ReLU(net.d3b_scale, in_place=True, engine=1) ### c ### net.d3c_conv = L.Convolution( net.d3b_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=256, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.d3c_bn = L.BatchNorm( net.d3c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.d3c_bn = L.BatchNorm(net.d3c_conv, use_global_stats=1) net.d3c_scale = L.Scale(net.d3c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.d3c_relu = L.ReLU(net.d3c_scale, in_place=True, engine=1) ############ u2 ############ ### a ### First Deconvolution net.u2a_dconv = L.Deconvolution(net.d3c_scale, param=[dict(lr_mult=1, decay_mult=1)], convolution_param=dict(num_output=128, pad=0, kernel_size=2, stride=2, weight_filler=dict( type='gaussian', std=0.001), bias_term=0, engine=1)) if phase == "TRAIN": net.u2a_bn = L.BatchNorm( net.u2a_dconv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u2a_bn = L.BatchNorm(net.u2a_dconv, use_global_stats=1) net.u2a_scale = L.Scale(net.u2a_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u2a_relu = L.ReLU(net.u2a_scale, in_place=True, engine=1) ### b ### Crop and Concat net.u2b_crop = L.Crop(net.u2a_scale, net.d2c_scale, axis=2, offset=0) net.u2b_concat = L.Concat(net.u2b_crop, net.d2c_scale, axis=1) ### c ### net.u2c_conv = L.Convolution( net.u2b_concat, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=128, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u2c_bn = L.BatchNorm( net.u2c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u2c_bn = L.BatchNorm(net.u2c_conv, use_global_stats=1) net.u2c_scale = L.Scale(net.u2c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u2c_relu = L.ReLU(net.u2c_scale, in_place=True, engine=1) ### d ### net.u2d_conv = L.Convolution( net.u2c_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=128, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u2d_bn = L.BatchNorm( net.u2d_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u2d_bn = L.BatchNorm(net.u2d_conv, use_global_stats=1) net.u2d_scale = L.Scale(net.u2d_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u2d_relu = L.ReLU(net.u2d_scale, in_place=True, engine=1) ############ u1 ############ ### a ### Second Deconvolution net.u1a_dconv = L.Deconvolution(net.u2d_scale, param=[dict(lr_mult=1, decay_mult=1)], convolution_param=dict(num_output=64, pad=0, kernel_size=2, stride=2, weight_filler=dict( type='gaussian', std=0.001), bias_term=0, engine=1)) if phase == "TRAIN": net.u1a_bn = L.BatchNorm( net.u1a_dconv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u1a_bn = L.BatchNorm(net.u1a_dconv, use_global_stats=1) net.u1a_scale = L.Scale(net.u1a_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u1a_relu = L.ReLU(net.u1a_scale, in_place=True, engine=1) ### b ### Crop and Concat net.u1b_crop = L.Crop(net.u1a_scale, net.d1c_scale, axis=2, offset=0) net.u1b_concat = L.Concat(net.u1b_crop, net.d1c_scale, axis=1) ### c ### net.u1c_conv = L.Convolution( net.u1b_concat, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=64, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u1c_bn = L.BatchNorm( net.u1c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u1c_bn = L.BatchNorm(net.u1c_conv, use_global_stats=1) net.u1c_scale = L.Scale(net.u1c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u1c_relu = L.ReLU(net.u1c_scale, in_place=True, engine=1) ### d ### net.u1d_conv = L.Convolution( net.u1c_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=64, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u1d_bn = L.BatchNorm( net.u1d_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u1d_bn = L.BatchNorm(net.u1d_conv, use_global_stats=1) net.u1d_scale = L.Scale(net.u1d_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u1d_relu = L.ReLU(net.u1d_scale, in_place=True, engine=1) ############ u0 ############ ### a ### Third Deconvolution net.u0a_dconv = L.Deconvolution(net.u1d_scale, param=[dict(lr_mult=1, decay_mult=1)], convolution_param=dict( num_output=32, pad=0, kernel_size=[2, 2, 1], stride=[2, 2, 1], weight_filler=dict(type='gaussian', std=0.001), bias_term=0, engine=1)) if phase == "TRAIN": net.u0a_bn = L.BatchNorm( net.u0a_dconv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u0a_bn = L.BatchNorm(net.u0a_dconv, use_global_stats=1) net.u0a_scale = L.Scale(net.u0a_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u0a_relu = L.ReLU(net.u0a_scale, in_place=True, engine=1) ### b ### Crop and Concat net.u0b_crop = L.Crop(net.u0a_scale, net.d0c_scale, axis=2, offset=0) net.u0b_concat = L.Concat(net.u0b_crop, net.d0c_scale, axis=1) ### c ### net.u0c_conv = L.Convolution( net.u0b_concat, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=32, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u0c_bn = L.BatchNorm( net.u0c_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u0c_bn = L.BatchNorm(net.u0c_conv, use_global_stats=1) net.u0c_scale = L.Scale(net.u0c_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u0c_relu = L.ReLU(net.u0c_scale, in_place=True, engine=1) ### d ### net.u0d_conv = L.Convolution( net.u0c_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=32, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) if phase == "TRAIN": net.u0d_bn = L.BatchNorm( net.u0d_conv, use_global_stats=0, param=[dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0)]) else: net.u0d_bn = L.BatchNorm(net.u0d_conv, use_global_stats=1) net.u0d_scale = L.Scale(net.u0d_bn, axis=1, filler=dict(type='constant', value=1), bias_term=1, bias_filler=dict(type='constant', value=0)) net.u0d_relu = L.ReLU(net.u0d_scale, in_place=True, engine=1) ############ Score ############ net.u0d_score = L.Convolution( net.u0d_scale, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], num_output=class_nums, pad=1, kernel_size=3, stride=1, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0), engine=1) ############ Loss ############ if phase == "TRAIN": net.loss = L.WeightedSoftmaxWithLoss(net.u0d_score, net.label, net.label_weight, phase=0, loss_weight=1, loss_param=dict(ignore_label=255)) return net.to_proto()
def convert(keras_model, caffe_net_file, caffe_params_file): caffe_net = caffe.NetSpec() net_params = dict() outputs = dict() shape = () input_str = '' for layer in keras_model.layers: name = layer.name layer_type = type(layer).__name__ config = layer.get_config() blobs = layer.get_weights() blobs_num = len(blobs) if type(layer.output) == list: raise Exception('Layers with multiply outputs are not supported') else: top = layer.output.name if type(layer.input) != list: bottom = layer.input.name if layer_type == 'InputLayer' or len(caffe_net.tops) == 0: input_name = 'data' caffe_net[input_name] = L.Layer() input_shape = config['batch_input_shape'] input_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input_name + '"', 1, input_shape[3], input_shape[1], input_shape[2]) outputs[layer.input.name] = input_name if layer_type == 'InputLayer': continue if layer_type == 'Conv2D' or layer_type == 'Convolution2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': config['filters']} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] if not config['use_bias']: kwargs['bias_term'] = False #kwargs['param']=[dict(lr_mult=0)] else: #kwargs['param']=[dict(lr_mult=0), dict(lr_mult=0)] pass set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blobs[0] = np.array(blobs[0]).transpose(3, 2, 0, 1) net_params[name] = blobs if config['activation'] == 'relu': name_s = name + 's' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name + 's' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation ' + config['activation']) elif layer_type == 'Conv2DTranspose': stride = config['strides'] kernel_size = config['kernel_size'] channels = config['filters'] group = config['group'] w = layer.input_shape[1] h = layer.input_shape[2] out_w = math.ceil(w / float(stride[1])) pad_w = int((kernel_size[1] * out_w - (kernel_size[1] - strides[1]) * (out_w - 1) - w) / 2) out_h = math.ceil(h / float(strides[0])) pad_h = int((kernel_size[0] * out_h - (kernel_size[0] - strides[0]) * (out_h - 1) - h) / 2) if not config['use_bias']: bias_flag = False else: bias_flag = True if pad_w == 0: caffe_net[name] = L.Deconvolution( caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, group=channels, kernel_size=kernel_size, stride=stride, weight_filler=dict(type='bilinear'), bias_term=bias_flag), param=dict(lr_mult=0, decay_mult=0)) else: if pad_w == pad_h: config_caffe['pad'] = pad_w else: config_caffe['pad_h'] = pad_h config_caffe['pad_w'] = pad_w caffe_net[name] = L.Deconvolution( caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, group=channels, kernel_size=kernel_size, stride=stride, pad=pad, weight_filler=dict(type='bilinear'), bias_term=bias_flag), param=dict(lr_mult=0, decay_mult=0)) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1, ) + blob.shape net_params[name] = blob if config['activation'] == 'relu': name_s = name + 's' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name + 's' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation ' + config['activation']) # 深度可分离卷积 elif layer_type == 'DepthwiseConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1, ) + blob.shape net_params[name] = blob if config['activation'] == 'relu': name_s = name + 's' caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif config['activation'] == 'sigmoid': name_s = name + 's' caffe_net[name_s] = L.Sigmoid(caffe_net[name], in_place=True) elif config['activation'] == 'linear': #do nothing pass else: raise Exception('Unsupported activation ' + config['activation']) elif layer_type == 'SeparableConv2D': strides = config['strides'] kernel_size = config['kernel_size'] kwargs = {'num_output': layer.input_shape[3]} if kernel_size[0] == kernel_size[1]: kwargs['kernel_size'] = kernel_size[0] else: kwargs['kernel_h'] = kernel_size[0] kwargs['kernel_w'] = kernel_size[1] if strides[0] == strides[1]: kwargs['stride'] = strides[0] else: kwargs['stride_h'] = strides[0] kwargs['stride_w'] = strides[1] set_padding(config, layer.input_shape, kwargs) kwargs['group'] = layer.input_shape[3] kwargs['bias_term'] = False caffe_net[name] = L.Convolution(caffe_net[outputs[bottom]], **kwargs) blob = np.array(blobs[0]).transpose(2, 3, 0, 1) blob.shape = (1, ) + blob.shape net_params[name] = blob name2 = name + '_' kwargs = { 'num_output': config['filters'], 'kernel_size': 1, 'bias_term': config['use_bias'] } caffe_net[name2] = L.Convolution(caffe_net[name], **kwargs) if config['use_bias'] == True: blob2 = [] blob2.append(np.array(blobs[1]).transpose(3, 2, 0, 1)) blob2.append(np.array(blobs[2])) blob2[0].shape = (1, ) + blob2[0].shape else: blob2 = np.array(blobs[1]).transpose(3, 2, 0, 1) blob2.shape = (1, ) + blob2.shape net_params[name2] = blob2 name = name2 elif layer_type == 'BatchNormalization': param = dict() variance = np.array(blobs[-1]) mean = np.array(blobs[-2]) if config['scale']: gamma = np.array(blobs[0]) sparam = [dict(lr_mult=1), dict(lr_mult=1)] else: gamma = np.ones(mean.shape, dtype=np.float32) #sparam=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=1, decay_mult=1)] sparam = [dict(lr_mult=0), dict(lr_mult=1)] #sparam=[dict(lr_mult=0), dict(lr_mult=0)] if config['center']: beta = np.array(blobs[-3]) param['bias_term'] = True else: beta = np.zeros(mean.shape, dtype=np.float32) param['bias_term'] = False caffe_net[name] = L.BatchNorm(caffe_net[outputs[bottom]], in_place=True) #param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1), dict(lr_mult=0, decay_mult=0)]) #param=[dict(lr_mult=1), dict(lr_mult=1), dict(lr_mult=0)]) net_params[name] = (mean, variance, np.array(1.0)) name_s = name + 's' caffe_net[name_s] = L.Scale( caffe_net[name], in_place=True, param=sparam, scale_param={'bias_term': config['center']}) net_params[name_s] = (gamma, beta) elif layer_type == 'Dense': caffe_net[name] = L.InnerProduct(caffe_net[outputs[bottom]], num_output=config['units'], weight_filler=dict(type='xavier')) if config['use_bias']: weight = np.array(blobs[0]).transpose(1, 0) if type(layer._inbound_nodes[0].inbound_layers[0] ).__name__ == 'Flatten': flatten_shape = layer._inbound_nodes[0].inbound_layers[ 0].input_shape for i in range(weight.shape[0]): weight[i] = np.array(weight[i].reshape( flatten_shape[1], flatten_shape[2], flatten_shape[3]).transpose( 2, 0, 1).reshape(weight.shape[1])) net_params[name] = (weight, np.array(blobs[1])) else: net_params[name] = (blobs[0]) name_s = name + 's' if config['activation'] == 'softmax': caffe_net[name_s] = L.Softmax(caffe_net[name], in_place=True) elif config['activation'] == 'relu': caffe_net[name_s] = L.ReLU(caffe_net[name], in_place=True) elif layer_type == 'Activation': if config['activation'] == 'relu': #caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) if len(layer.input.consumers()) > 1: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) else: caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'relu6': #TODO caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]]) elif config['activation'] == 'softmax': caffe_net[name] = L.Softmax(caffe_net[outputs[bottom]], in_place=True) elif config['activation'] == 'sigmoid': # name_s = name+'s' caffe_net[name] = L.Sigmoid(caffe_net[outputs[bottom]], in_place=True) else: raise Exception('Unsupported activation ' + config['activation']) elif layer_type == 'Cropping2D': shape = layer.output_shape ddata = L.DummyData(shape=dict( dim=[1, shape[3], shape[1], shape[2]])) layers = [] layers.append(caffe_net[outputs[bottom]]) layers.append(ddata) #TODO caffe_net[name] = L.Crop(*layers) elif layer_type == 'Concatenate' or layer_type == 'Merge': layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Concat(*layers, axis=1) elif layer_type == 'Add': layers = [] for i in layer.input: layers.append(caffe_net[outputs[i.name]]) caffe_net[name] = L.Eltwise(*layers) elif layer_type == 'Flatten': caffe_net[name] = L.Flatten(caffe_net[outputs[bottom]]) elif layer_type == 'Reshape': shape = config['target_shape'] if len(shape) == 3: #shape = (layer.input_shape[0], shape[2], shape[0], shape[1]) shape = (1, shape[2], shape[0], shape[1]) elif len(shape) == 1: #shape = (layer.input_shape[0], 1, 1, shape[0]) shape = (1, 1, 1, shape[0]) elif len(shape) == 2: shape = (0, shape[1], -1, 0) caffe_net[name] = L.Reshape( caffe_net[outputs[bottom]], reshape_param={'shape': { 'dim': list(shape) }}) elif layer_type == 'MaxPooling2D' or layer_type == 'AveragePooling2D': kwargs = {} if layer_type == 'MaxPooling2D': kwargs['pool'] = P.Pooling.MAX else: kwargs['pool'] = P.Pooling.AVE pool_size = config['pool_size'] strides = config['strides'] if pool_size[0] != pool_size[1]: raise Exception('Unsupported pool_size') if strides[0] != strides[1]: raise Exception('Unsupported strides') set_padding(config, layer.input_shape, kwargs) caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=pool_size[0], stride=strides[0], **kwargs) elif layer_type == 'Dropout': caffe_net[name] = L.Dropout( caffe_net[outputs[bottom]], dropout_param=dict(dropout_ratio=config['rate'])) elif layer_type == 'GlobalAveragePooling2D': caffe_net[name] = L.Pooling( caffe_net[outputs[bottom]], pool=P.Pooling.AVE, pooling_param=dict(global_pooling=True)) elif layer_type == 'UpSampling2D': if config['size'][0] != config['size'][1]: raise Exception('Unsupported upsampling factor') factor = config['size'][0] kernel_size = 2 * factor - factor % 2 stride = factor pad = int(math.ceil((factor - 1) / 2.0)) channels = layer.input_shape[-1] caffe_net[name] = L.Deconvolution( caffe_net[outputs[bottom]], convolution_param=dict(num_output=channels, group=channels, kernel_size=kernel_size, stride=stride, pad=pad, weight_filler=dict(type='bilinear'), bias_term=False), param=dict(lr_mult=0, decay_mult=0)) elif layer_type == 'LeakyReLU': caffe_net[name] = L.ReLU(caffe_net[outputs[bottom]], negative_slope=config['alpha'], in_place=True) # Caffe中没有ZeroPadding2D存在,因此需要避免这个Op的应用,即将Padding写进卷积/反卷积/Pooling等层中 #elif layer_type=='ZeroPadding2D': # padding=config['padding'] # caffe_net[name] = L.Pooling(caffe_net[outputs[bottom]], kernel_size=1, # stride=1, pad_h=padding[0][0]+padding[0][1], pad_w=padding[1][0]+padding[1][1], pool=P.Pooling.AVE) else: raise Exception('Unsupported layer type: ' + layer_type) outputs[top] = name #replace empty layer with input blob net_proto = input_str + '\n' + 'layer {' + 'layer {'.join( str(caffe_net.to_proto()).split('layer {')[2:]) f = open(caffe_net_file, 'w') f.write(net_proto) f.close() caffe_model = caffe.Net(caffe_net_file, caffe.TEST) for layer in caffe_model.params.keys(): if 'up_sampling2d' in layer: continue for n in range(0, len(caffe_model.params[layer])): caffe_model.params[layer][n].data[...] = net_params[layer][n] caffe_model.save(caffe_params_file)
def deeplab(padimg, img, spixel_indices): # Conv1 conv1_1 = L.Convolution(padimg, convolution_param=dict(num_output=64, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv1_1 = L.ReLU(conv1_1, in_place=True) conv1_2 = L.Convolution(conv1_1, convolution_param=dict(num_output=64, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv1_2 = L.ReLU(conv1_2, in_place=True) pool1 = L.Pooling(conv1_2, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) # Conv2 conv2_1 = L.Convolution(pool1, convolution_param=dict(num_output=128, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv2_1 = L.ReLU(conv2_1, in_place=True) conv2_2 = L.Convolution(conv2_1, convolution_param=dict(num_output=128, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv2_2 = L.ReLU(conv2_2, in_place=True) pool2 = L.Pooling(conv2_2, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) # Conv3 conv3_1 = L.Convolution(pool2, convolution_param=dict(num_output=256, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv3_1 = L.ReLU(conv3_1, in_place=True) conv3_2 = L.Convolution(conv3_1, convolution_param=dict(num_output=256, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv3_2 = L.ReLU(conv3_2, in_place=True) conv3_3 = L.Convolution(conv3_2, convolution_param=dict(num_output=256, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv3_3 = L.ReLU(conv3_3, in_place=True) pool3 = L.Pooling(conv3_3, pooling_param=dict(kernel_size=3, stride=2, pad=1, pool=P.Pooling.MAX)) # Conv4 conv4_1 = L.Convolution(pool3, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv4_1 = L.ReLU(conv4_1, in_place=True) conv4_2 = L.Convolution(conv4_1, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv4_2 = L.ReLU(conv4_2, in_place=True) conv4_3 = L.Convolution(conv4_2, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv4_3 = L.ReLU(conv4_3, in_place=True) pool4 = L.Pooling(conv4_3, pooling_param=dict(kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX)) # #Conv5 conv5_1 = L.Convolution(pool4, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=2, dilation=2, engine=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv5_1 = L.ReLU(conv5_1, in_place=True) conv5_2 = L.Convolution(conv5_1, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=2, dilation=2, engine=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv5_2 = L.ReLU(conv5_2, in_place=True) conv5_3 = L.Convolution(conv5_2, convolution_param=dict(num_output=512, kernel_size=3, stride=1, pad=2, dilation=2, engine=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) conv5_3 = L.ReLU(conv5_3, in_place=True) pool5 = L.Pooling(conv5_3, pooling_param=dict(kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX)) pool5a = L.Pooling(pool5, pooling_param=dict(kernel_size=3, stride=1, pad=1, pool=P.Pooling.MAX)) #FC-6 fc6 = L.Convolution(pool5a, convolution_param=dict(num_output=1024, kernel_size=3, pad=12, dilation=12, engine=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) fc6 = L.ReLU(fc6, in_place=True) fc6 = L.Dropout(fc6, dropout_param=dict(dropout_ratio=0.5), in_place=True) #FC-7 fc7 = L.Convolution(fc6, convolution_param=dict(num_output=1024, kernel_size=1), param=[{ 'lr_mult': 0, 'decay_mult': 0 }, { 'lr_mult': 0, 'decay_mult': 0 }]) fc7 = L.ReLU(fc7, in_place=True) fc7 = L.Dropout(fc7, dropout_param=dict(dropout_ratio=0.5), in_place=True) #FC-8 fc8_deeplab = L.Convolution( fc7, convolution_param=dict(num_output=2, kernel_size=1, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)), param=[{ 'lr_mult': 1, 'decay_mult': 1 }, { 'lr_mult': 2, 'decay_mult': 0 }]) #Interpolate fc8_interp = L.Interp(fc8_deeplab, interp_param=dict(zoom_factor=8)) #Crop to match required dimensions fc8_crop = L.Crop(fc8_interp, img, crop_param=dict(axis=2, offset=[0, 0])) return fc8_crop
def cn_LF(split): n = caffe.NetSpec() pydata_params = dict(split=split, mean=(88.386, 99.768,107.622), seed=1337) if split == 'train': pydata_params['train_dir'] = '../data/train' pylayer = 'TrainDataLayer' else: pydata_params['val_dir'] = '../data/val' pylayer = 'ValDataLayer' n.data, n.label = L.Python(module='colorname_layers', layer=pylayer, ntop=2, param_str=str(pydata_params)) # the base net n.LF_conv1_1=L.Convolution(n.data, kernel_size=3, stride=3, num_output=64, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.LF_relu1_1=L.ReLU(n.LF_conv1_1, in_place=True) n.LF_conv1_2=L.Convolution(n.LF_relu1_1, kernel_size=3, stride=3, num_output=64, param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0)) n.LF_relu1_2=L.ReLU(n.LF_conv1_2, in_place=True) n.conv2, n.relu2 = conv_relu(n.LF_relu1_2, 64,param=[dict(lr_mult=10, decay_mult=1), dict(lr_mult=20, decay_mult=0)]) n.drop2= L.Dropout(n.relu2, dropout_ratio=0.1,in_place=True) n.conv1_2, n.relu1_2 = conv_relu(n.drop2, 64) n.drop1_2= L.Dropout(n.relu1_2, dropout_ratio=0.1,in_place=True) n.pool1 = max_pool(n.drop1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128) n.drop2_1= L.Dropout(n.relu2_1, dropout_ratio=0.1,in_place=True) n.conv2_2, n.relu2_2 = conv_relu(n.drop2_1, 128) n.drop2_2= L.Dropout(n.relu2_2, dropout_ratio=0.1,in_place=True) n.pool2 = max_pool(n.drop2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256) n.drop3_1= L.Dropout(n.relu3_1, dropout_ratio=0.2,in_place=True) n.conv3_2, n.relu3_2 = conv_relu(n.drop3_1, 256) n.drop3_2= L.Dropout(n.relu3_2, dropout_ratio=0.2,in_place=True) n.conv3_3, n.relu3_3 = conv_relu(n.drop3_2, 256) n.drop3_3= L.Dropout(n.relu3_3, dropout_ratio=0.2,in_place=True) n.pool3 = max_pool(n.drop3_3) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512) n.drop4_1= L.Dropout(n.relu4_1, dropout_ratio=0.2,in_place=True) n.conv4_2, n.relu4_2 = conv_relu(n.drop4_1, 512) n.drop4_2= L.Dropout(n.relu4_2, dropout_ratio=0.2,in_place=True) n.conv4_3, n.relu4_3 = conv_relu(n.drop4_2, 512) n.drop4_3= L.Dropout(n.relu4_3, dropout_ratio=0.2,in_place=True) n.pool4 = max_pool(n.drop4_3, stride=1) n.conv5_1, n.relu5_1 = conv_relu_dia(n.pool4, 512, pad=2,dilation=2) n.drop5_1= L.Dropout(n.relu5_1, dropout_ratio=0.3,in_place=True) n.conv5_2, n.relu5_2 = conv_relu_dia(n.drop5_1, 512, pad=2, dilation=2) n.drop5_2= L.Dropout(n.relu5_2, dropout_ratio=0.3,in_place=True) n.conv5_3, n.relu5_3 = conv_relu_dia(n.drop5_2, 512, pad=2, dilation=2) n.drop5_3= L.Dropout(n.relu5_3, dropout_ratio=0.3,in_place=True) n.pool5 = max_pool(n.drop5_3, stride=1) #hole=6 n.fc6_1, n.relu6_1 = conv_relu_dia(n.pool5, 1024, pad=6,dilation=6) n.drop6_1= L.Dropout(n.relu6_1, dropout_ratio=0.5,in_place=True) n.fc7_1, n.relu7_1 = conv_relu(n.relu6_1, 1024, ks=1, pad=0) n.drop7_1= L.Dropout(n.relu7_1, dropout_ratio=0.5,in_place=True) n.fc8_1=L.Convolution(n.drop7_1,kernel_size=1, num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) #hole=12 n.fc6_2, n.relu6_2 = conv_relu_dia(n.pool5, 1024, pad=12,dilation=12) n.drop6_2= L.Dropout(n.relu6_2, dropout_ratio=0.5,in_place=True) n.fc7_2, n.relu7_2 = conv_relu(n.relu6_2, 1024, ks=1, pad=0) n.drop7_2= L.Dropout(n.relu7_2, dropout_ratio=0.5,in_place=True) n.fc8_2=L.Convolution(n.drop7_2,kernel_size=1, num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) #hole=18 n.fc6_3, n.relu6_3 = conv_relu_dia(n.pool5, 1024, pad=18,dilation=18) n.drop6_3= L.Dropout(n.relu6_3, dropout_ratio=0.5,in_place=True) n.fc7_3, n.relu7_3 = conv_relu(n.relu6_3, 1024, ks=1, pad=0) n.drop7_3= L.Dropout(n.relu7_3, dropout_ratio=0.5,in_place=True) n.fc8_3=L.Convolution(n.drop7_3,kernel_size=1, num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) #hole=24 n.fc6_4, n.relu6_4 = conv_relu_dia(n.pool5, 1024, pad=24,dilation=24) n.drop6_4= L.Dropout(n.relu6_4, dropout_ratio=0.5,in_place=True) n.fc7_4, n.relu7_4 = conv_relu(n.relu6_4, 1024, ks=1, pad=0) n.drop7_4= L.Dropout(n.relu7_4, dropout_ratio=0.5,in_place=True) n.fc8_4=L.Convolution(n.drop7_4,kernel_size=1, num_output=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant', value=0), param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)]) n.fc8= L.Eltwise(n.fc8_1, n.fc8_2,n.fc8_3,n.fc8_4, operation=P.Eltwise.SUM) n.fc8_shrink=L.Interp(n.fc8,interp_param=dict(zoom_factor=8 )) n.socre=L.Crop(n.fc8_shrink,n.label); # n.loss = L.SoftmaxWithLoss(n.socre, n.label, loss_param=dict(ignore_label=255)) return n.to_proto()
def generate_model(split, config): n = caffe.NetSpec() dataset = config.dataset batch_size = config.N mode_str = str(dict(dataset=dataset, split=split, batch_size=batch_size)) n.image1, n.image2, n.label, n.sample_weights, n.feat_crop = L.Python( module=config.data_provider, layer=config.data_provider_layer, param_str=mode_str, ntop=5) ################################ # the base net (VGG-16) branch 1 n.conv1_1, n.relu1_1 = conv_relu(n.image1, 64, param_names=('conv1_1_w', 'conv1_1_b'), fix_param=True, finetune=False) n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64, param_names=('conv1_2_w', 'conv1_2_b'), fix_param=True, finetune=False) n.pool1 = max_pool(n.relu1_2) n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128, param_names=('conv2_1_w', 'conv2_1_b'), fix_param=True, finetune=False) n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128, param_names=('conv2_2_w', 'conv2_2_b'), fix_param=True, finetune=False) n.pool2 = max_pool(n.relu2_2) n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256, param_names=('conv3_1_w', 'conv3_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256, param_names=('conv3_2_w', 'conv3_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256, param_names=('conv3_3_w', 'conv3_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.pool3 = max_pool(n.relu3_3) # spatial L2 norm n.pool3_lrn = L.LRN(n.pool3, local_size=513, alpha=513, beta=0.5, k=1e-16) n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512, param_names=('conv4_1_w', 'conv4_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512, param_names=('conv4_2_w', 'conv4_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512, param_names=('conv4_3_w', 'conv4_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) # spatial L2 norm n.relu4_3_lrn = L.LRN(n.relu4_3, local_size=1025, alpha=1025, beta=0.5, k=1e-16) #n.pool4 = max_pool(n.relu4_3) #n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512, # param_names=('conv5_1_w', 'conv5_1_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512, # param_names=('conv5_2_w', 'conv5_2_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512, # param_names=('conv5_3_w', 'conv5_3_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) # upsampling feature map #n.relu5_3_upsampling = L.Deconvolution(n.relu5_3, # convolution_param=dict(num_output=512, # group=512, # kernel_size=4, # stride=2, # pad=1, # bias_term=False, # weight_filler=dict(type='bilinear')), # param=[dict(lr_mult=0, decay_mult=0)]) # spatial L2 norm #n.relu5_3_lrn = L.LRN(n.relu5_3_upsampling, local_size=1025, alpha=1025, beta=0.5, k=1e-16) # concat all skip features #n.feat_all1 = n.relu4_3_lrn n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, concat_param=dict(axis=1)) #n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, n.relu5_3_lrn, concat_param=dict(axis=1)) n.feat_all1_crop = L.Crop(n.feat_all1, n.feat_crop, crop_param=dict(axis=2, offset=[ config.query_featmap_H // 3, config.query_featmap_W // 3 ])) ################################ # the base net (VGG-16) branch 2 n.conv1_1_p, n.relu1_1_p = conv_relu(n.image2, 64, param_names=('conv1_1_w', 'conv1_1_b'), fix_param=True, finetune=False) n.conv1_2_p, n.relu1_2_p = conv_relu(n.relu1_1_p, 64, param_names=('conv1_2_w', 'conv1_2_b'), fix_param=True, finetune=False) n.pool1_p = max_pool(n.relu1_2_p) n.conv2_1_p, n.relu2_1_p = conv_relu(n.pool1_p, 128, param_names=('conv2_1_w', 'conv2_1_b'), fix_param=True, finetune=False) n.conv2_2_p, n.relu2_2_p = conv_relu(n.relu2_1_p, 128, param_names=('conv2_2_w', 'conv2_2_b'), fix_param=True, finetune=False) n.pool2_p = max_pool(n.relu2_2_p) n.conv3_1_p, n.relu3_1_p = conv_relu(n.pool2_p, 256, param_names=('conv3_1_w', 'conv3_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_2_p, n.relu3_2_p = conv_relu(n.relu3_1_p, 256, param_names=('conv3_2_w', 'conv3_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv3_3_p, n.relu3_3_p = conv_relu(n.relu3_2_p, 256, param_names=('conv3_3_w', 'conv3_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.pool3_p = max_pool(n.relu3_3_p) # spatial L2 norm n.pool3_lrn_p = L.LRN(n.pool3_p, local_size=513, alpha=513, beta=0.5, k=1e-16) n.conv4_1_p, n.relu4_1_p = conv_relu(n.pool3_p, 512, param_names=('conv4_1_w', 'conv4_1_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_2_p, n.relu4_2_p = conv_relu(n.relu4_1_p, 512, param_names=('conv4_2_w', 'conv4_2_b'), fix_param=config.fix_vgg, finetune=config.finetune) n.conv4_3_p, n.relu4_3_p = conv_relu(n.relu4_2_p, 512, param_names=('conv4_3_w', 'conv4_3_b'), fix_param=config.fix_vgg, finetune=config.finetune) # spatial L2 norm n.relu4_3_lrn_p = L.LRN(n.relu4_3_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16) #n.pool4_p = max_pool(n.relu4_3_p) #n.conv5_1_p, n.relu5_1_p = conv_relu(n.pool4_p, 512, # param_names=('conv5_1_w', 'conv5_1_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_2_p, n.relu5_2_p = conv_relu(n.relu5_1_p, 512, # param_names=('conv5_2_w', 'conv5_2_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) #n.conv5_3_p, n.relu5_3_p = conv_relu(n.relu5_2_p, 512, # param_names=('conv5_3_w', 'conv5_3_b'), # fix_param=config.fix_vgg, # finetune=config.finetune) # upsampling feature map #n.relu5_3_upsampling_p = L.Deconvolution(n.relu5_3_p, # convolution_param=dict(num_output=512, # group=512, # kernel_size=4, # stride=2, # pad=1, # bias_term=False, # weight_filler=dict(type='bilinear')), # param=[dict(lr_mult=0, decay_mult=0)]) # spatial L2 norm #n.relu5_3_lrn_p = L.LRN(n.relu5_3_upsampling_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16) # concat all skip features #n.feat_all2 = n.relu4_3_lrn_p n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, concat_param=dict(axis=1)) #n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, n.relu5_3_lrn_p, concat_param=dict(axis=1)) # Dyn conv layer n.fcn_scores = L.DynamicConvolution(n.feat_all2, n.feat_all1_crop, convolution_param=dict( num_output=1, kernel_size=11, stride=1, pad=5, bias_term=False)) # scale scores with zero mean 0.01196 -> 0.02677 n.fcn_scaled_scores = L.Power(n.fcn_scores, power_param=dict(scale=0.01196, shift=-1.0, power=1)) # Loss Layer n.loss = L.WeightedSigmoidCrossEntropyLoss(n.fcn_scaled_scores, n.label, n.sample_weights) return n.to_proto()