def net_deploy(deploy_prototxt, model): from ofnet import ofnet n = caffe.NetSpec() n.data = L.Input(shape=[dict(dim=[1, 3, 224, 224])]) ofnet(n, is_train=False) n.sigmoid_edge = L.Sigmoid(n.unet1b_edge) with open('ofnet_eval.prototxt', 'w') as f: f.write(str(n.to_proto())) ## write network net = caffe.Net(deploy_prototxt, model, caffe.TEST) return net
def add_block_se(self, bottom, num_output): layer1 = self.conv_prelu(bottom, num_output) layer2 = self.conv_prelu(layer1, num_output) pool = L.Pooling(layer2, pool=1, global_pooling=True) conv3 = self.conv(pool, num_output / 16, kernel_size=1, stride=1, pad=0) pr3 = L.PReLU(conv3, in_place=True) conv4 = self.conv(pr3, num_output, kernel_size=1, stride=1, pad=0) prob = L.Sigmoid(conv4, in_place=True) output = L.Axpy(prob, layer2, bottom) return output
def ip_factory(bottom, nout): ip = L.InnerProduct(bottom, num_output=nout, normalize_scale=2.0, weight_filler=dict(type='xavier')) sigmoid = L.Sigmoid(ip, in_place=True) scale = L.Scale( sigmoid, bias_term=True, filler=dict(type='constant', value=1.0 / sigmoid_scale), bias_filler=dict(type='constant', value=-0.5 / sigmoid_scale), param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) return scale
def fc_sigmoid(bottom, nout, fix_param=False, finetune=False): if fix_param: mult = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)] fc = L.InnerProduct(bottom, num_output=nout, param=mult) else: if finetune: mult = [dict(lr_mult=0.1, decay_mult=1), dict(lr_mult=0.2, decay_mult=0)] fc = L.InnerProduct(bottom, num_output=nout, param=mult) else: mult = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)] filler = dict(type='xavier') fc = L.InnerProduct(bottom, num_output=nout, param=mult, weight_filler=filler) return fc, L.Sigmoid(fc, in_place=True)
def resnet_mask_rcnn_test(self): channals = self.channals data, rois = self.data_layer_test(with_roi=True) pre_traned_fixed = True conv1 = self.conv_factory("conv1", data, 7, channals, 2, 3, bias_term=True, fixed=pre_traned_fixed) pool1 = self.pooling_layer(3, 2, 'MAX', 'pool1', conv1) index = 1 out = pool1 if self.module == "normal": residual_block = self.residual_block else: residual_block = self.residual_block_basic for i in self.stages[:-1]: index += 1 for j in range(i): if j == 0: if index == 2: stride = 1 else: stride = 2 out = residual_block("res" + str(index) + ascii_lowercase[j], out, channals, stride, fixed=pre_traned_fixed) else: out = residual_block("res" + str(index) + ascii_lowercase[j], out, channals, fixed=pre_traned_fixed) channals *= 2 mask_feat_aligned = self.roi_align("mask", out, rois) out = mask_feat_aligned index += 1 for j in range(self.stages[-1]): if j == 0: stride = 1 out = residual_block("res" + str(index) + ascii_lowercase[j], out, channals, stride) else: out = residual_block("res" + str(index) + ascii_lowercase[j], out, channals) # for mask prediction out = L.Deconvolution(out, name = "mask_deconv1",convolution_param=dict(kernel_size=2, stride=2, num_output=256, pad=0, bias_term=False, weight_filler=dict(type='msra'), bias_filler=dict(type='constant'))) out = L.BatchNorm(out, name="bn_mask_deconv1",in_place=True, batch_norm_param=dict(use_global_stats=self.deploy)) out = L.Scale(out, name = "scale_mask_deconv1", in_place=True, scale_param=dict(bias_term=True)) out = L.ReLU(out, name="mask_deconv1_relu", in_place=True) mask_out = self.conv_factory("mask_out", out, 1, self.classes-1, 1, 0, bias_term=True) self.net["mask_prob"] = L.Sigmoid(mask_out) return self.net.to_proto()
def se_unit(bottom, nout): global_pool = L.Pooling(bottom, pooling_param=dict(pool=1, global_pooling=True)) fc1 = L.InnerProduct(global_pool, num_output=nout / 16, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="msra")) relu = L.ReLU(fc1, in_place=True) fc2 = L.InnerProduct(relu, num_output=nout, param=[dict(lr_mult=1, decay_mult=1)], weight_filler=dict(type="msra")) sigmoid = L.Sigmoid(fc2) scale = L.Scale(bottom, sigmoid, axis=0) return global_pool, fc1, relu, fc2, sigmoid, scale
def se(bottom, planes): pooling = L.Pooling(bottom, pool=P.Pooling.AVE, global_pooling=True) fc1 = L.InnerProduct(pooling, num_output=planes, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) relu = L.ReLU(fc1, in_place=True) fc2 = L.InnerProduct(relu, num_output=planes, bias_term=True, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) model = L.Sigmoid(fc2, in_place=True) se = L.Scale(bottom, model, scale_param=dict(axis=0)) return se
def createAutoencoder(hdf5, input_size, batch_size, phase): n = caffe.NetSpec() if phase == "inference": n.data = L.Input(input_param={'shape': {'dim': [1, input_size]}}) else: n.data = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=1) n.ip1 = L.InnerProduct(n.data, num_output=256, weight_filler=dict(type='xavier')) n.bottleneck = L.Sigmoid(n.ip1, in_place=True) n.decode = L.InnerProduct(n.bottleneck, num_output=input_size, weight_filler=dict(type='xavier')) n.loss = L.EuclideanLoss(n.decode, n.data) return n.to_proto()
def net(): n = caffe.NetSpec() n.data = L.Input(input_param=dict(shape=dict(dim=data_shape))) n.dataout = L.Convolution(n.data, param=[ dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0) ], kernel_size=7, stride=2, num_output=64, pad=3, weight_filler=dict(type="xavier", std=0.03), bias_filler=dict(type='constant', value=0.2)) n.sig1 = L.Sigmoid(n.dataout, in_place=True) return n.to_proto()
def convLayerSigmoid(prev, param_name=None, bn=False, **kwargs): if param_name: name1 = param_name + '_kernels' name2 = param_name + '_bias' conv = L.Convolution( prev, param=[dict(lr_mult=1, name=name1), dict(lr_mult=2, name=name2)], weight_filler=dict(type='msra'), **kwargs) else: conv = L.Convolution(prev, param=[dict(lr_mult=1), dict(lr_mult=2)], weight_filler=dict(type='msra'), **kwargs) sigmoid = L.Sigmoid(conv, in_place=True) return sigmoid
def apply_activation(layer, bottom): if keras.activations.serialize(layer.activation) == 'relu': return L.ReLU(bottom, in_place=True) elif keras.activations.serialize(layer.activation) == 'softmax': return L.Softmax( bottom) # Cannot extract axis from model, so default to -1 elif keras.activations.serialize(layer.activation) == 'softsign': # Needs to be implemented in caffe2dml raise Exception("softsign is not implemented") elif keras.activations.serialize(layer.activation) == 'elu': return L.ELU(bottom) elif keras.activations.serialize(layer.activation) == 'selu': # Needs to be implemented in caffe2dml raise Exception("SELU activation is not implemented") elif keras.activations.serialize(layer.activation) == 'sigmoid': return L.Sigmoid(bottom) elif keras.activations.serialize(layer.activation) == 'tanh': return L.TanH(bottom)
def mask_unit(net,input_name,idx,feature_dim,each_dim): #map_num att_map net['mask_conv'+idx]=L.Convolution(net[input_name],kernel_size=1,num_output=1, \ param = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)],\ weight_filler=dict(type="xavier", variance_norm=2), \ bias_filler=dict(type="constant")) #input ~ (-1,1), rescale to range (0,1) net['mask_map'+idx]=L.Sigmoid(net['mask_conv'+idx]) net['tile_map'+idx]=L.Tile(net['mask_map'+idx],tile_param=dict(tiles=feature_dim)) net['masked'+idx]=L.Eltwise(net[input_name],net['tile_map'+idx],\ eltwise_param=dict(operation=0)) net['pooled'+idx]=L.Pooling(net['masked'+idx],pooling_param=dict(pool=1,global_pooling=1)) net['linear'+idx]=L.InnerProduct(net['pooled'+idx], num_output=each_dim, \ param = [dict(lr_mult=1, decay_mult=1), \ dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type="xavier"), bias_filler=dict(type="constant")) return net['linear'+idx]
def conv_factory(bottom, ks, nout, stride=1, pad=0): conv = L.Convolution(bottom, kernel_size=ks, stride=stride, normalize_scale=2.0, num_output=nout, pad=pad, bias_term=False, weight_filler=dict(type='msra')) sigmoid = L.Sigmoid(conv, in_place=True) scale = L.Scale( sigmoid, bias_term=True, filler=dict(type='constant', value=1.0 / sigmoid_scale), bias_filler=dict(type='constant', value=-0.5 / sigmoid_scale), param=[dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]) return scale
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root='/home/x/data/datasets/tianchi/', batch_size=4,random=True) #data 1,64,64,64 n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) #n.conv1 32,32,32,32 n.conv1=SingleConv(n.data,32,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv2 64,16,16,16 n.conv2=SingleConv(n.conv1,64,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv3 64,8,8,8 n.conv3=SingleConv(n.conv2,128,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv4 64,4,4,4 n.conv4=SingleConv(n.conv3,256,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.deconv3 64 8,8,8 n.deconv3=Deconv(n.conv4,256,128) up3=[n.deconv3,n.conv3] #n.concat1_3 128,8,8,8 n.concat1_3=L.Concat(*up3) #n.deconv2 64,16,16,16 n.deconv2=Deconv(n.concat1_3,256,64) up2=[n.deconv2,n.conv2] #n.concat1_2 128,16,16,16 n.concat1_2=L.Concat(*up2) #n.deconv1 32,32,32,32 n.deconv1=Deconv(n.concat1_2,128,32) up1=[n.deconv1,n.conv1] #n.concat1_1 64,32,32,32 n.concat1_1=L.Concat(*up1) #n.concat1_1 32,64,64,64 n.deconv0=Deconv(n.concat1_1,64,32) n.score=L.Convolution(n.deconv0, kernel_size=1,stride=1,pad=0, num_output=1,weight_filler=dict(type='xavier')) n.probs=L.Sigmoid(n.score) n.probs_=L.Flatten(n.probs) n.label_=L.Flatten(n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def create_SE_part(net, from_layer, conv_num, index, channel_size, concat_layer_name1, concat_layer_name2): global_pool_name = "conv{}_{}_global_pool".format(conv_num, index) net[global_pool_name] = L.Pooling( net[from_layer], pooling_param=dict( pool=caffe_pb2.PoolingParameter.PoolMethod.Value('AVE'), engine=caffe_pb2.PoolingParameter.Engine.Value("CAFFE"), global_pooling=True)) down_dim_name = "conv{}_{}_1x1_down".format(conv_num, index) #down default as 1/16 of channel numbers of last layer net[down_dim_name] = L.Convolution(net[global_pool_name], convolution_param=dict( num_output=channel_size / 16, kernel_size=1, stride=1), weight_filler=dict(type="xavier")) down_dim_relu = "{}/relu".format(down_dim_name) net[down_dim_relu] = L.ReLU(net[down_dim_name], in_place=True) up_dim_name = "conv{}_{}_1x1_up".format(conv_num, index) net[up_dim_name] = L.Convolution(net[down_dim_relu], convolution_param=dict( num_output=channel_size, kernel_size=1, stride=1), weight_filler=dict(type="xavier")) up_dim_prob = "conv2_1_prob" net[up_dim_prob] = L.Sigmoid(net[up_dim_name], in_place=True) Axpy_name = 'acpy-conv{}_{}'.format(conv_num, index) # print(up_dim_name, concat_layer_name1, concat_layer_name2) net[Axpy_name] = L.Axpy(net[up_dim_name], net[concat_layer_name1], net[concat_layer_name2]) return Axpy_name
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root=opt.data_root, batch_size=4, random=True) n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.pre = SingleConv(n.data, 32, kernel_size=[3, 3, 3], stride=[1, 1, 1]) n.res = ResDown(n.pre, 128) n.res = ResBlock(n.res, 128) n.res = ResDown(n.res, 512) n.res = ResBlock(n.res, 512) n.res = ResBlock(n.res, 512) n.res = ResBlock(n.res, 512) n.res = ResBlock(n.res, 512) n.up = ResUp(n.res, 128) n.up = ResBlock(n.up, 128) n.up = ResUp(n.up, 32) n.up = ResBlock(n.up, 32) n.out = L.Convolution(n.up, kernel_size=3, stride=1, pad=1, num_output=1, weight_filler=dict(type='xavier')) n.probs = L.Sigmoid(n.out) n.probs_ = L.Flatten(n.probs) n.label_ = L.Flatten(n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def conv_sigmoid(n, name, bottom, nout, ks, stride=1, pad=0, group=1, batchnorm=False, weight_filler=dict(type='xavier')): conv = netset( n, 'conv' + name, L.Convolution(bottom, kernel_size=ks, stride=stride, num_output=nout, pad=pad, group=group, weight_filler=weight_filler)) convbatch = conv if batchnorm: batchnorm = netset( n, 'bn' + name, L.BatchNorm(conv, in_place=True, param=[{ "lr_mult": 0 }, { "lr_mult": 0 }, { "lr_mult": 0 }])) convbatch = batchnorm # Note that we don't have a scale/shift afterward, which is different from # the original Batch Normalization layer. Using a scale/shift layer lets # the network completely silence the activations in a given layer, which # is exactly the behavior that we need to prevent early on. relu = netset(n, 'sigmoid' + name, L.Sigmoid(convbatch, in_place=True)) return relu
def add_block_bn_c_bn_se(self, bottom, num_output): bn1 = L.BatchNorm(bottom, use_global_stats=False, in_place=False) bn1 = L.Scale(bn1, bias_term=True, in_place=True) conv1 = self.conv(bn1, num_output) bn2 = L.BatchNorm(conv1, use_global_stats=False, in_place=True) bn2 = L.Scale(bn2, bias_term=True, in_place=True) pr2 = L.PReLU(bn2, in_place=True) conv2 = self.conv(pr2, num_output) bn3 = L.BatchNorm(conv2, use_global_stats=False, in_place=True) bn3 = L.Scale(bn3, bias_term=True, in_place=True) pool = L.Pooling(bn3, pool=1, global_pooling=True) conv3 = self.conv(pool, num_output / 16, kernel_size=1, stride=1, pad=0) pr3 = L.PReLU(conv3, in_place=True) conv4 = self.conv(pr3, num_output, kernel_size=1, stride=1, pad=0) prob = L.Sigmoid(conv4, in_place=True) output = L.Axpy(prob, bn3, bottom) return output
def SpatialAttentionLayer(caffe_net, layer_idx, bottom_blob, out_channel, bias_term=False, masks=None): names = ['conv{}a'.format(layer_idx), 'conv{}b'.format(layer_idx), 'sig{}'.format(layer_idx), 'eltwise{}'.format(layer_idx) ] out_ch_size = out_channel if masks is not None: in_mask = masks[0] in_mask_ch_size = len(in_mask) # original output channel size assert in_mask_ch_size == out_ch_size out_ch_size = int(np.sum(in_mask)) # pruned output channel size start_bottom_blob = bottom_blob caffe_net[names[0]] = L.Convolution(bottom_blob, num_output=1, bias_term=bias_term, pad=0, kernel_size=1, stride=1) caffe_net[names[1]] = L.Convolution(caffe_net[names[0]], num_output=out_ch_size, bias_term=bias_term, pad=0, kernel_size=1, stride=1) caffe_net[names[2]] = L.Sigmoid(caffe_net[names[1]]) caffe_net[names[3]] = L.Eltwise(caffe_net[names[2]], start_bottom_blob, operation=P.Eltwise.PROD ) return caffe_net[names[3]], layer_idx + 1
def MaskNet_Val_Mask(net, from_layer="data", label="label", lr=1, decay=1): # net = YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) net.bbox, net.kps, net.mask = L.SplitLabel(net[label], name='SplitLabel', ntop=3) net = YoloNetPartCompress(net, from_layer="data", use_bn=True, use_layers=5, use_sub_layers=5, strid_conv=[1, 1, 1, 0, 0], final_pool=False, lr=0.1, decay=0.1) net.roi, net.kps_active_flags = L.TrueRoi(net[label], true_roi_param=dict(type='mask'), ntop=2) out_layer = "conv5_5" # net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=[128,128,128,128,128,128], \ # conv6_kernal_size=[3,3,3,3,3,3], pre_name="conv6",start_pool=True,lr_mult=1, decay_mult=1,n_group=1) # net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5","conv6_7"], tags=["Down","Ref","Up"], unifiedlayer="convf_mask", \ # dnsampleMethod=[["MaxPool"]],upsampleMethod="Reorg") net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"], tags=["Down","Ref"], unifiedlayer="convf_mask", \ dnsampleMethod=[["MaxPool"]]) net.roi_maps = L.RoiAlign(net.convf_mask, net.roi, roi_align_param=roi_align_param) net = MaskHeader(net,from_layer="roi_maps",out_layer="mask_maps",use_layers=mask_use_conv_layers,num_channels=channels_of_mask, \ kernel_size=kernel_size_of_mask,pad=pad_of_mask,use_deconv_layers=mask_use_deconv_layers,lr=lr,decay=decay) net.mask_sigmoid = L.Sigmoid(net.mask_maps) net.pred = L.Threshold(net.mask_sigmoid, threshold_param=dict(threshold=0.5)) net.mask_label_map, net.mask_label_flags = L.MaskGen(net.roi,net.mask,name="MaskGen",ntop=2, \ mask_gen_param=dict(height=Input_Height,width=Input_Width,resized_height=Rh_Mask,resized_width=Rw_Mask)) net.mask_eval = L.MaskEval(net.pred, net.mask_label_map, net.roi, net.kps_active_flags) return net
def MTD_TEST(net, from_layer="data", image="image", lr=1, decay=1): # net =YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) net, mbox_layers, parts_layers = MTD_BODY(net) # net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"],tags=["Down","Ref"],unifiedlayer="featuremap1",dnsampleMethod=[["Reorg"]]) # net = UnifiedMultiScaleLayers(net,layers=["conv5_5","conv6_7"],tags=["Down","Ref"],unifiedlayer="featuremap2",dnsampleMethod=[["MaxPool"]],pad=True) # mbox_layers = SSDHeader(net,data_layer="data",from_layers=["featuremap1","featuremap2"],input_height=Input_Height,input_width=Input_Width,loc_postfix='det',**ssdparam) reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] # mbox_layers.append(net.orig_data) net.detection_out = L.DenseDetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net = UnifiedMultiScaleLayers(net,layers=["conv3_3","conv4_3"],tags=["Down","Ref"],unifiedlayer="conf34",dnsampleMethod=[["MaxPool"]]) # parts_layers = SSDHeader(net,data_layer="data",from_layers=["conf34","conv5_5","conv6_7"],input_height=Input_Height,input_width=Input_Width,loc_postfix='parts',**partsparam) sigmoid_name = "parts_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(parts_layers[1]) parts_layers[1] = net[sigmoid_name] net.parts_out = L.DenseDetOut( *parts_layers, detection_output_param=parts_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.roi = L.Concat(net.detection_out, net.parts_out, axis=2) net.vis = L.VisualMtd(net.roi, net.orig_data, detection_output_param=vis_out_param) return net
def SqeezeExcitationLayer(caffe_net, layer_idx, bottom_blob, in_channel, reduced_ch, height, width, bias_term=False): names = ['gPool{}'.format(layer_idx), 'fc{}a'.format(layer_idx), 'fc{}a_relu'.format(layer_idx), 'fc{}b'.format(layer_idx), 'fc{}b_sigmoid'.format(layer_idx), 'tile{}'.format(layer_idx), 'reshape{}'.format(layer_idx), 'eltwise{}'.format(layer_idx), ] start_bottom_blob = bottom_blob caffe_net[names[0]] = L.Pooling(bottom_blob, pool=P.Pooling.AVE, global_pooling=True) caffe_net[names[1]] = L.InnerProduct(caffe_net[names[0]], num_output=reduced_ch, bias_term=bias_term) caffe_net[names[2]] = L.ReLU(caffe_net[names[1]], in_place=True) caffe_net[names[3]] = L.InnerProduct(caffe_net[names[2]], num_output=in_channel, bias_term=bias_term) caffe_net[names[4]] = L.Sigmoid(caffe_net[names[3]]) caffe_net[names[5]] = L.Tile(caffe_net[names[4]], axis = 1, tiles = height*width) caffe_net[names[6]] = L.Reshape(caffe_net[names[5]], reshape_param={'shape':{'dim': [0, in_channel, height, width]}}) caffe_net[names[7]] = L.Eltwise(caffe_net[names[6]], start_bottom_blob, operation=P.Eltwise.PROD ) return caffe_net[names[7]], layer_idx + 1
def SsdDetector(net, train=True, data_layer="data", gt_label="label", \ net_width=300, net_height=300, basenet="VGG", \ visualize=False, extra_data="data", eval_enable=True, **ssdparam): """ 创建SSD检测器。 train: TRAIN /TEST data_layer/gt_label: 数据输入和label输入。 net_width/net_height: 网络的输入尺寸 num_classes: 估计分类的数量。 basenet: "vgg"/"res101",特征网络 ssdparam: ssd检测器使用的参数列表。 返回:整个SSD检测器网络。 """ # BaseNetWork if basenet == "VGG": net = VGG16Net(net, from_layer=data_layer, fully_conv=True, reduced=True, \ dilated=True, dropout=False) base_feature_layers = ['conv4_3', 'fc7'] add_layers = 3 first_channels = 256 second_channels = 512 elif basenet == "Res101": net = ResNet101Net(net, from_layer=data_layer, use_pool5=False) # 1/8, 1/16, 1/32 base_feature_layers = ['res3b3', 'res4b22', 'res5c'] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "Res50": net = ResNet50Net(net, from_layer=data_layer, use_pool5=False) base_feature_layers = ['res3d', 'res4f', 'res5c'] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "PVA": net = PvaNet(net, from_layer=data_layer) # 1/8, 1/16, 1/32 base_feature_layers = [ 'conv4_1/incep/pre', 'conv5_1/incep/pre', 'conv5_4' ] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "Yolo": net = YoloNet(net, from_layer=data_layer) base_feature_layers = ssdparam.get("multilayers_feature_map", []) # add_layers = 2 # first_channels = 256 # second_channels = 512 feature_layers = base_feature_layers else: raise ValueError( "only VGG16, Res50/101 and PVANet are supported in current version." ) result = [] for item in feature_layers: if len(item) == 1: result.append(item[0]) continue name = "" for layers in item: name += layers tags = ["Down", "Ref"] down_methods = [["Reorg"]] UnifiedMultiScaleLayers(net,layers=item, tags=tags, \ unifiedlayer=name, dnsampleMethod=down_methods) result.append(name) feature_layers = result # Add extra layers # extralayers_use_batchnorm=True, extralayers_lr_mult=1, \ # net, feature_layers = AddSsdExtraConvLayers(net, \ # use_batchnorm=ssdparam.get("extralayers_use_batchnorm",False), \ # feature_layers=base_feature_layers, add_layers=add_layers, \ # first_channels=first_channels, second_channels=second_channels) # create ssd detector deader mbox_layers = SsdDetectorHeaders(net, \ min_ratio=ssdparam.get("multilayers_min_ratio",15), \ max_ratio=ssdparam.get("multilayers_max_ratio",90), \ boxsizes=ssdparam.get("multilayers_boxsizes", []), \ net_width=net_width, \ net_height=net_height, \ data_layer=data_layer, \ num_classes=ssdparam.get("num_classes",2), \ from_layers=feature_layers, \ use_batchnorm=ssdparam.get("multilayers_use_batchnorm",True), \ prior_variance = ssdparam.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \ normalizations=ssdparam.get("multilayers_normalizations",[]), \ aspect_ratios=ssdparam.get("multilayers_aspect_ratios",[]), \ flip=ssdparam.get("multilayers_flip",True), \ clip=ssdparam.get("multilayers_clip",False), \ inter_layer_channels=ssdparam.get("multilayers_inter_layer_channels",[]), \ kernel_size=ssdparam.get("multilayers_kernel_size",3), \ pad=ssdparam.get("multilayers_pad",1)) if train == True: loss_param = get_loss_param(normalization=ssdparam.get( "multiloss_normalization", P.Loss.VALID)) mbox_layers.append(net[gt_label]) # create loss if not ssdparam["combine_yolo_ssd"]: multiboxloss_param = get_multiboxloss_param( \ loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \ conf_loss_type=ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), \ loc_weight=ssdparam.get("multiloss_loc_weight",1), \ conf_weight=ssdparam.get("multiloss_conf_weight",1), \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \ overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \ use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \ do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \ neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \ neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \ name_to_label_file=ssdparam.get("multiloss_name_to_label_file","")) net["mbox_loss"] = L.MultiBoxLoss(*mbox_layers, \ multibox_loss_param=multiboxloss_param, \ loss_param=loss_param, \ include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: multimcboxloss_param = get_multimcboxloss_param( \ loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \ loc_weight=ssdparam.get("multiloss_loc_weight",1), \ conf_weight=ssdparam.get("multiloss_conf_weight",1), \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \ overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \ use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \ do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \ neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \ neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \ name_to_label_file=ssdparam.get("multiloss_name_to_label_file",""),\ rescore=ssdparam.get("multiloss_rescore",True),\ object_scale=ssdparam.get("multiloss_object_scale",1),\ noobject_scale=ssdparam.get("multiloss_noobject_scale",1),\ class_scale=ssdparam.get("multiloss_class_scale",1),\ loc_scale=ssdparam.get("multiloss_loc_scale",1)) net["mbox_loss"] = L.MultiMcBoxLoss(*mbox_layers, \ multimcbox_loss_param=multimcboxloss_param, \ loss_param=loss_param, \ include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) return net else: # create conf softmax layer # mbox_layers[1] if not ssdparam["combine_yolo_ssd"]: if ssdparam.get("multiloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif ssdparam.get( "multiloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_layers[1]) mbox_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") det_out_param = get_detection_out_param( \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ variance_encoded_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ conf_threshold=ssdparam.get("detectionout_conf_threshold",0.01), \ nms_threshold=ssdparam.get("detectionout_nms_threshold",0.45), \ boxsize_threshold=ssdparam.get("detectionout_boxsize_threshold",0.001), \ top_k=ssdparam.get("detectionout_top_k",30), \ visualize=ssdparam.get("detectionout_visualize",False), \ visual_conf_threshold=ssdparam.get("detectionout_visualize_conf_threshold", 0.5), \ visual_size_threshold=ssdparam.get("detectionout_visualize_size_threshold", 0), \ display_maxsize=ssdparam.get("detectionout_display_maxsize",1000), \ line_width=ssdparam.get("detectionout_line_width",4), \ color=ssdparam.get("detectionout_color",[[0,255,0],])) if visualize: mbox_layers.append(net[extra_data]) if not ssdparam["combine_yolo_ssd"]: net.detection_out = L.DetectionOutput(*mbox_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out = L.DetectionMultiMcOutput(*mbox_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if not visualize and eval_enable: # create eval layer det_eval_param = get_detection_eval_param( \ num_classes=ssdparam.get("num_classes",2), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ evaluate_difficult_gt=ssdparam.get("detectioneval_evaluate_difficult_gt",False), \ boxsize_threshold=ssdparam.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), \ iou_threshold=ssdparam.get("detectioneval_iou_threshold",[0.9,0.75,0.5]), \ name_size_file=ssdparam.get("detectioneval_name_size_file","")) net.detection_eval = L.DetectionEvaluate(net.detection_out, net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if not eval_enable: net.slience = L.Silence(net.detection_out, ntop=0, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def segmentation(n, seg_points, label, phase): ############### Params ############### num_cls = 1 ############### Params ############### top_prev, top_lattice = L.Python(seg_points, ntop=2, python_param=dict(module='bcl_layers', layer='BCLReshape')) top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) """ 1. If lattice scale too large the network will really slow and don't have good result """ # #2nd top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 64, 128, 128, 128, 64], lattic_scale=[ "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2", "0/2_1/2_2/2", "0/4_1/4_2/4", "0/8_1/8_2/8" ], loop=6, skip='concat') # # #3rd # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 64], # lattic_scale=["0*8_1*8_2*8", "0*4_1*4_2*4", "0*2_1*2_2*2", "0_1_2"], loop=4, skip='concat') # BEST NOW # top_prev = bcl_bn_relu(n, 'bcl_seg', top_prev, top_lattice, nout=[64, 128, 128, 128, 64], # lattic_scale=["0*2_1*2_2*2","0_1_2","0/2_1/2_2/2","0/4_1/4_2/4","0/8_1/8_2/8"], loop=5, skip='concat') # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 256, stride=1, pad=0, loop=1) # top_prev = conv_bn_relu(n, "conv0_seg", top_prev, 1, 128, stride=1, pad=0, loop=1) top_prev = conv_bn_relu(n, "conv1_seg", top_prev, 1, 64, stride=1, pad=0, loop=1) n.seg_preds = L.Convolution(top_prev, name="car_seg", convolution_param=dict( num_output=num_cls, kernel_size=1, stride=1, pad=0, weight_filler=dict(type='xavier'), bias_term=True, bias_filler=dict(type='constant', value=0), engine=1, ), param=[dict(lr_mult=1), dict(lr_mult=0.1)]) # Predict class if phase == "train": seg_preds = L.Permute( n.seg_preds, permute_param=dict( order=[0, 2, 3, 1])) #(B,C=1,H,W) -> (B,H,W,C=1) seg_preds = L.Reshape( seg_preds, reshape_param=dict(shape=dict( dim=[0, -1, num_cls]))) # (B,H,W,C=1)-> (B, -1, 1) # seg_weights = L.Python(label, name = "SegWeight", # python_param=dict( # module='bcl_layers', # layer='SegWeight' # )) # # seg_weights = L.Reshape(seg_weights, reshape_param=dict(shape=dict(dim=[0, -1]))) # sigmoid_seg_preds = L.Sigmoid(seg_preds) # # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights, # name = "Seg_Loss", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='DiceLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss # ), # # param_str=str(dict(focusing_parameter=2, alpha=0.25))) # # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 # sigmoid_seg_preds = L.Sigmoid(seg_preds) # # n.dice_loss = L.Python(sigmoid_seg_preds, label, #seg_weights, # name = "Seg_Loss", # loss_weight = 1, # python_param=dict( # module='bcl_layers', # layer='IoUSegLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss # )) # param_str=str(dict(focusing_parameter=2, alpha=0.25))) # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 n.seg_loss = L.Python( seg_preds, label, #seg_weights, name="Seg_Loss", loss_weight=1, python_param=dict( module='bcl_layers', layer= 'FocalLoss' #WeightFocalLoss, DiceFocalLoss, FocalLoss, DiceLoss ), param_str=str(dict(focusing_parameter=2, alpha=0.25))) # param_str=str(dict(focusing_parameter=2, alpha=0.25, dice_belta=0.5, dice_alpha=0.5, lamda=0.1))) # param_str=str(dict(alpha=0.5, belta=0.5))) #dice #1, 1 # n.seg_loss = L.SigmoidCrossEntropyLoss(seg_preds, label) # n.accuracy = L.Accuracy(n.seg_preds, label) output = None # Problem elif phase == "eval": n.output = L.Sigmoid(n.seg_preds) output = n.output return n, output
def gru_unit(self, prefix, x, cont, static=None, h=None, batch_size=100, timestep=0, gru_hidden=1000, weight_lr_mult=1, bias_lr_mult=2, weight_decay_mult=1, bias_decay_mult=0, concat_hidden=True, weight_filler=None, bias_filler=None): #assume static input already transformed if not weight_filler: weight_filler = self.uniform_weight_filler(-0.08, 0.08) if not bias_filler: bias_filler = self.constant_filler(0) if not h: h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1) def get_name(name): return '%s_%s' % (prefix, name) def get_param(weight_name, bias_name=None): #TODO: write this in terms of earlier method "init_params" w = dict(lr_mult=weight_lr_mult, decay_mult=weight_decay_mult, name=get_name(weight_name)) if bias_name is not None: b = dict(lr_mult=bias_lr_mult, decay_mult=bias_decay_mult, name=get_name(bias_name)) return [w, b] return [w] gate_dim = gru_hidden * 3 #transform x_t x = L.InnerProduct(x, num_output=gate_dim, axis=2, weight_filler=weight_filler, bias_filler=bias_filler, param=get_param('W_xc', 'b_c')) self.rename_tops(x, get_name('%d_x_transform' % timestep)) #transform h h_conted = L.Scale(h, cont, axis=0) h = L.InnerProduct(h_conted, num_output=gru_hidden * 2, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hc')) h_name = get_name('%d_h_transform' % timestep) if not hasattr(self.n, h_name): setattr(self.n, h_name, h) #gru stuff TODO: write GRUUnit in caffe? would make all this much prettier. x_transform_z_r, x_transform_hc = L.Slice(x, slice_point=gru_hidden * 2, axis=2, ntop=2) sum_items = [x_transform_z_r, h] if static: sum_items += static z_r_sum = self.sum(sum_items) z_r = L.Sigmoid(z_r_sum) z, r = L.Slice(z_r, slice_point=gru_hidden, axis=2, ntop=2) z_weighted_h = self.prod([r, h_conted]) z_h_transform = L.InnerProduct(z_weighted_h, num_output=gru_hidden, axis=2, bias_term=False, weight_filler=weight_filler, param=get_param('W_hzc')) sum_items = [x_transform_hc, z_h_transform] if static: sum_items += static hc_sum = self.sum(sum_items) hc = L.TanH(hc) zm1 = L.Power(z, scale=-1, shift=1) h_h = self.prod([zm1, h_conted]) h_hc = self.prod([z, hc]) h = self.sum([h_h, h_hc]) return h
def sigmoid(self, bottom_data): return L.Sigmoid(bottom_data)
def caffenet(netmode): # Start Caffe proto net net = caffe.NetSpec() # Specify input data structures if netmode == caffe_pb2.TEST: if netconf.loss_function == 'malis': fmaps_end = 11 if netconf.loss_function == 'euclid': fmaps_end = 11 if netconf.loss_function == 'softmax': fmaps_end = 2 net.data, net.datai = data_layer([1, 1, 44, 132, 132]) net.silence = L.Silence(net.datai, ntop=0) # Shape specs: # 00. Convolution buffer size # 01. Weight memory size # 03. Num. channels # 04. [d] parameter running value # 05. [w] parameter running value run_shape_in = [[0, 0, 1, [1, 1, 1], [44, 132, 132]]] run_shape_out = run_shape_in last_blob = implement_usknet(net, run_shape_out, 64, fmaps_end) # Implement the prediction layer if netconf.loss_function == 'malis': net.prob = L.Sigmoid(last_blob, ntop=1) if netconf.loss_function == 'euclid': net.prob = L.Sigmoid(last_blob, ntop=1) if netconf.loss_function == 'softmax': net.prob = L.Softmax(last_blob, ntop=1) for i in range(0, len(run_shape_out)): print(run_shape_out[i]) print("Max. memory requirements: %s B" % (compute_memory_buffers(run_shape_out) + compute_memory_weights(run_shape_out) + compute_memory_blobs(run_shape_out))) print("Weight memory: %s B" % compute_memory_weights(run_shape_out)) print("Max. conv buffer: %s B" % compute_memory_buffers(run_shape_out)) else: if netconf.loss_function == 'malis': net.data, net.datai = data_layer([1, 1, 44, 132, 132]) net.label, net.labeli = data_layer([1, 1, 16, 44, 44]) net.label_affinity, net.label_affinityi = data_layer( [1, 11, 16, 44, 44]) net.affinity_edges, net.affinity_edgesi = data_layer([1, 1, 11, 3]) net.silence = L.Silence(net.datai, net.labeli, net.label_affinityi, net.affinity_edgesi, ntop=0) fmaps_end = 11 if netconf.loss_function == 'euclid': net.data, net.datai = data_layer([1, 1, 44, 132, 132]) net.label, net.labeli = data_layer([1, 11, 16, 44, 44]) net.scale, net.scalei = data_layer([1, 11, 16, 44, 44]) net.silence = L.Silence(net.datai, net.labeli, net.scalei, ntop=0) fmaps_end = 11 if netconf.loss_function == 'softmax': net.data, net.datai = data_layer([1, 1, 44, 132, 132]) # Currently only supports binary classification net.label, net.labeli = data_layer([1, 1, 16, 44, 44]) net.silence = L.Silence(net.datai, net.labeli, ntop=0) fmaps_end = 2 run_shape_in = [[0, 1, 1, [1, 1, 1], [44, 132, 132]]] run_shape_out = run_shape_in # Start the actual network last_blob = implement_usknet(net, run_shape_out, 64, fmaps_end) for i in range(0, len(run_shape_out)): print(run_shape_out[i]) print("Max. memory requirements: %s B" % (compute_memory_buffers(run_shape_out) + compute_memory_weights(run_shape_out) + 2 * compute_memory_blobs(run_shape_out))) print("Weight memory: %s B" % compute_memory_weights(run_shape_out)) print("Max. conv buffer: %s B" % compute_memory_buffers(run_shape_out)) # Implement the loss if netconf.loss_function == 'malis': last_blob = L.Sigmoid(last_blob, in_place=True) net.loss = L.MalisLoss(last_blob, net.label_affinity, net.label, net.affinity_edges, ntop=0) if netconf.loss_function == 'euclid': last_blob = L.Sigmoid(last_blob, in_place=True) net.loss = L.EuclideanLoss(last_blob, net.label, net.scale, ntop=0) if netconf.loss_function == 'softmax': net.loss = L.SoftmaxWithLoss(last_blob, net.label, ntop=0) # Return the protocol buffer of the generated network return net.to_proto()
aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) conf_name = "mbox_conf" if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX: reshape_name = "{}_reshape".format(conf_name) net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes])) softmax_name = "{}_softmax".format(conf_name) net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "{}_flatten".format(conf_name) net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "{}_sigmoid".format(conf_name) net[sigmoid_name] = L.Sigmoid(net[conf_name]) mbox_layers[1] = net[sigmoid_name] net.detection_out = L.DetectionOutput(*mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) with open(test_net_file, 'w') as f: print('name: "{}_test"'.format(model_name), file=f) print(net.to_proto(), file=f) shutil.copy(test_net_file, job_dir) # Create deploy net.
def base_ae(src_train, src_test): n = caffe.NetSpec() n.data = \ L.Data(batch_size=100, backend=P.Data.LMDB, source=src_train, transform_param=dict(scale=0.0039215684), ntop=1, include=[dict(phase=caffe.TRAIN)] ) n.data_test = \ L.Data(batch_size=100, backend=P.Data.LMDB, source=src_test, transform_param=dict(scale=0.0039215684), ntop=1, include=[dict(phase=caffe.TEST)] ) n.flatdata = L.Flatten(n.data) n.encode001 = \ L.InnerProduct(n.data, num_output=64, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)], weight_filler=dict(type='gaussian', std=1, sparse=15), bias_filler=dict(type='constant', value=0) ) n.encode001neuron = L.Sigmoid(n.encode001, in_place=True) n.decode001 = L.InnerProduct( n.encode001neuron, num_output=3072, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=0)], weight_filler=dict(type='gaussian', std=1, sparse=15), bias_filler=dict(type='constant', value=0)) n.loss_x_entropy = \ L.SigmoidCrossEntropyLoss(n.decode001, n.flatdata, loss_weight=[1]) n.decode001neuron = L.Sigmoid(n.decode001, in_place=False) n.loss_l2 = \ L.EuclideanLoss(n.decode001neuron, n.flatdata, loss_weight=[0]) n_proto = n.to_proto() # fix data layer for test phase for l in n_proto.layer: if l.type.lower() == 'data' and \ [x.phase for x in l.include] == [caffe.TEST]: for t in list(l.top): l.top.remove(t) t = t.split('_test')[0] l.top.append(unicode(t)) l.name = l.name.split('_test')[0] return n_proto
def build_AlexNet_2heads(split, num_classes, batch_size, resize_w, resize_h, crop_w=0, crop_h=0, crop_margin=0, mirror=0, rotate=0, HSV_prob=0, HSV_jitter=0, train=True, deploy=False): weight_param = dict(lr_mult=1, decay_mult=1) bias_param = dict(lr_mult=2, decay_mult=0) learned_param = [weight_param, bias_param] frozen_param = [dict(lr_mult=0)] * 2 n = caffe.NetSpec() pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892)) pydata_params['dir'] = '../../../datasets/SocialMedia' pydata_params['train'] = train pydata_params['batch_size'] = batch_size pydata_params['resize_w'] = resize_w pydata_params['resize_h'] = resize_h pydata_params['crop_w'] = crop_w pydata_params['crop_h'] = crop_h pydata_params['crop_margin'] = crop_margin pydata_params['mirror'] = mirror pydata_params['rotate'] = rotate pydata_params['HSV_prob'] = HSV_prob pydata_params['HSV_jitter'] = HSV_jitter pydata_params['num_classes'] = num_classes pylayer = 'twoHeadsDataLayer' n.data, n.label, n.label_class = L.Python(module='layers_2heads', layer=pylayer, ntop=3, param_str=str(pydata_params)) # Convs n.conv1, n.relu1 = conv_relu(n.data, 11, 96, stride=4, param=learned_param) n.pool1 = max_pool(n.relu1, 3, stride=2) n.norm1 = L.LRN(n.pool1, local_size=5, alpha=1e-4, beta=0.75) n.conv2, n.relu2 = conv_relu(n.norm1, 5, 256, pad=2, group=2, param=learned_param) n.pool2 = max_pool(n.relu2, 3, stride=2) n.norm2 = L.LRN(n.pool2, local_size=5, alpha=1e-4, beta=0.75) n.conv3, n.relu3 = conv_relu(n.norm2, 3, 384, pad=1, param=learned_param) n.conv4, n.relu4 = conv_relu(n.relu3, 3, 384, pad=1, group=2, param=learned_param) n.conv5, n.relu5 = conv_relu(n.relu4, 3, 256, pad=1, group=2, param=learned_param) n.pool5 = max_pool(n.relu5, 3, stride=2) # Regression head n.fc6, n.relu6 = fc_relu(n.pool5, 4096, param=learned_param) #4096 if train: n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True, dropout_ratio=0.5) else: fc7input = n.relu6 n.fc7, n.relu7 = fc_relu(fc7input, 4096, param=learned_param) #4096 if train: n.drop7 = fc8input = L.Dropout(n.relu7, in_place=True, dropout_ratio=0.5) else: fc8input = n.relu7 n.fc8C = L.InnerProduct(fc8input, num_output=num_classes, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=learned_param) # Regression loss if not deploy: n.loss = L.SigmoidCrossEntropyLoss(n.fc8C, n.label, loss_weight=1) # Classification head n.fc6_class, n.relu6_class = fc_relu(n.pool5, 4096, param=learned_param) #4096 if train: n.drop6_class = fc7input_class = L.Dropout(n.relu6_class, in_place=True, dropout_ratio=0.5) else: fc7input_class = n.relu6_class n.fc7_class, n.relu7_class = fc_relu(fc7input_class, 4096, param=learned_param) #4096 if train: n.drop7_class = fc8input_class = L.Dropout(n.relu7_class, in_place=True, dropout_ratio=0.5) else: fc8input_class = n.relu7_class n.fc8_class = L.InnerProduct(fc8input_class, num_output=10, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0.1), param=learned_param) # Classification loss and acc if not deploy: n.loss_class = L.SoftmaxWithLoss(n.fc8_class, n.label_class, loss_weight=0.3) n.acc_class = L.Accuracy(n.fc8_class, n.label_class) # Deploy output processing if deploy: n.probs = L.Sigmoid(n.fc8C) n.probs_class = L.Softmax(n.fc8_class) with open('deploy.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name else: if train: with open('train.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name else: with open('val.prototxt', 'w') as f: f.write(str(n.to_proto())) return f.name