def SRCNN3D(hdf5name, batch_size, kernel ): n = caffe.NetSpec() n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5name, ntop=2, include=dict(phase = caffe.TRAIN)) n.conv1 = L.Convolution(n.data, kernel_size=kernel[0], num_output=64, stride = 1, pad = 0, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=0.001), bias_filler = dict(type= "constant", value=0), engine = 1 ) n.relu1 = L.ReLU(n.conv1, in_place=True, engine = 1) n.conv2 = L.Convolution(n.conv1, kernel_size=kernel[1], num_output=32, stride = 1, pad = 0, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=0.001), bias_filler = dict(type= "constant", value=0), engine = 1 ) n.relu2 = L.ReLU(n.conv2, in_place=True, engine = 1) n.conv3 = L.Convolution(n.conv2, kernel_size=kernel[2], num_output=1, stride = 1, pad = 0, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=0.001), bias_filler = dict(type= "constant", value=0), engine = 1 ) n.conv3_flat = L.Flatten(n.conv3) n.label_flat = L.Flatten(n.label) n.loss = L.EuclideanLoss(n.conv3_flat,n.label_flat) return n.to_proto()
def conv1_autoencoder(split, batch_sz): n = caffe.NetSpec() n.data, n.label = L.ImageData(image_data_param=dict(source=split, batch_size=batch_sz, new_height=height, new_width=width, is_color=False), ntop=2) n.silence = L.Silence(n.label, ntop=0) n.flatdata_i = L.Flatten(n.data) n.conv1 = conv(n.data, 5, 5, 64, pad=2) n.bn1 = L.BatchNorm(n.conv1, use_global_stats=False, in_place=True, param=[{ "lr_mult": 0 }, { "lr_mult": 0 }, { "lr_mult": 0 }]) n.scale1 = L.Scale(n.bn1, bias_term=True, in_place=True) n.relu1 = L.ReLU(n.scale1, relu_param=dict(negative_slope=0.1)) n.pool1 = max_pool(n.relu1, 2, stride=2) n.code = conv(n.pool1, 5, 5, 64, pad=2) n.upsample1 = L.Deconvolution(n.code, param=dict(lr_mult=0, decay_mult=0), convolution_param=dict( group=64, num_output=64, kernel_size=4, stride=2, pad=1, bias_term=False, weight_filler=dict(type="bilinear"))) n.deconv1 = conv(n.upsample1, 5, 5, 1, pad=2) n.debn1 = L.BatchNorm(n.deconv1, use_global_stats=False, in_place=True, param=[{ "lr_mult": 0 }, { "lr_mult": 0 }, { "lr_mult": 0 }]) n.descale1 = L.Scale(n.debn1, bias_term=True, in_place=True) n.derelu1 = L.ReLU(n.descale1, relu_param=dict(negative_slope=0.1)) n.flatdata_o = L.Flatten(n.derelu1) n.loss_s = L.SigmoidCrossEntropyLoss(n.flatdata_o, n.flatdata_i, loss_weight=1) n.loss_e = L.EuclideanLoss(n.flatdata_o, n.flatdata_i, loss_weight=0) return str(n.to_proto())
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root=opt.data_root, batch_size=4,random=True) n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.conv1=BasicConv(n.data,32,is_train=self.is_train)#(64,64,64) n.downsample1=Inception_v1(n.conv1,32,32,is_train=self.is_train)#(32,32,32) n.conv2=BasicConv(n.downsample1,64,is_train=self.is_train) n.downsample2=Inception_v1(n.conv2,64,64,is_train=self.is_train)#(16,16,16) n.conv3=BasicConv(n.downsample2,128,is_train=self.is_train) n.downsample3=Inception_v1(n.conv3,128,128,is_train=self.is_train)#(8,8,8) n.conv4=BasicConv(n.downsample3,256,is_train=self.is_train) n.downsample4=Inception_v1(n.conv4,256,256,is_train=self.is_train)#(4,4,4) n.conv4_=SingleConv(n.downsample4,128,is_train=self.is_train) n.incept4=Inception_v2(n.conv4_,128,128,is_train=self.is_train) n.deconv4=Deconv(n.incept4,128,128,is_train=self.is_train)#(8,8,8) up4=[n.deconv4,n.conv4] n.concat1_4=L.Concat(*up4) n.conv5=SingleConv(n.concat1_4,128,is_train=self.is_train) n.incept5=Inception_v2(n.conv5,128,128,is_train=self.is_train) n.deconv5=Deconv(n.incept5,128,128,is_train=self.is_train)#(16,16,16) up5=[n.deconv5,n.conv3] n.concat1_5=L.Concat(*up5) n.conv6=SingleConv(n.concat1_5,64,is_train=self.is_train) n.incept6=Inception_v2(n.conv6,64,64,is_train=self.is_train) n.deconv6=Deconv(n.incept6,64,64,is_train=self.is_train)#(32,32,32) up6=[n.deconv6,n.conv2] n.concat1_6=L.Concat(*up6) n.conv7=SingleConv(n.concat1_6,32,is_train=self.is_train) n.incept7=Inception_v2(n.conv7,32,32,is_train=self.is_train) n.deconv7=Deconv(n.incept7,32,32,is_train=self.is_train)#(64,64,64) up7=[n.deconv7,n.conv1] n.concat1_7=L.Concat(*up7) n.conv8=SingleConv(n.concat1_7,32,is_train=self.is_train) n.incept8=Inception_v2(n.conv8,32,32,is_train=self.is_train) n.conv9=L.Convolution(n.incept8, kernel_size=1,stride=1,pad=0, num_output=1,weight_filler=dict(type='xavier')) n.probs=L.Sigmoid(n.conv9) n.probs_=L.Flatten(n.probs) n.label_=L.Flatten(n.label) #n.loss=L.SoftmaxWithLoss(n.conv9,n.label) #n.loss=L.Python(module='DiceLoss', layer="DiceLossLayer", # ntop=1, bottom=[n.probs,n.label]) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def add_cnn(n, data, act, batch_size, T, K, num_step, mode='train'): n.x_flat = L.Flatten(data, axis=1, end_axis=2) n.act_flat = L.Flatten(act, axis=1, end_axis=2) if mode == 'train': x = L.Slice(n.x_flat, axis=1, ntop=T) act_slice = L.Slice(n.act_flat, axis=1, ntop=T - 1) x_set = () label_set = () x_hat_set = () silence_set = () for i in range(T): t = tag(i + 1) n.tops['x' + t] = x[i] if i < K: x_set += (x[i], ) if i < T - 1: n.tops['act' + t] = act_slice[i] if i < K - 1: silence_set += (n.tops['act' + t], ) if i >= K: label_set += (x[i], ) n.label = L.Concat(*label_set, axis=0) input_list = list(x_set) for step in range(0, num_step): step_tag = tag(step + 1) if step > 0 else '' t = tag(step + K) tp = tag(step + K + 1) input_tuple = tuple(input_list) n.tops['input' + step_tag] = L.Concat(*input_tuple, axis=1) top = add_conv_enc(n, n.tops['input' + step_tag], tag=step_tag) n.tops['x_hat' + tp] = add_decoder(n, top, n.tops['act' + t], flatten=False, tag=step_tag) input_list.pop(0) input_list.append(n.tops['x_hat' + tp]) else: top = add_conv_enc(n, n.x_flat) n.tops['x_hat' + tag(K + 1)] = add_decoder(n, top, n.act_flat, flatten=False) if mode == 'train': x_hat = () for i in range(K, T): t = tag(i + 1) x_hat += (n.tops['x_hat' + t], ) n.x_hat = L.Concat(*x_hat, axis=0) n.silence = L.Silence(*silence_set, ntop=0) n.l2_loss = L.EuclideanLoss(n.x_hat, n.label) return n
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root=opt.data_root, batch_size=4,random=True) n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.pre = SingleConv(n.data, 16,kernel_size=[3,5,5],stride=[1,1,1],padding=[1,2,2]) n.res = ResDown(n.pre, 32) n.res = ResBlock(n.res, 32) n.res = ResDown(n.res,64) # n.b1 = ResDown(n.res,64) # n.b1 = ResDown(n.res,128) # n.b2 = ResDown(n.res,64) n.b1 = ResBlock(n.res,64) n.b1 = ResBlock(n.b1,64) n.b1 = ResUp(n.b1,64) n.b2 = ResBlock(n.res,64) n.b2 = ResDown(n.b2,128) n.b2 = ResUp(n.b2,128) n.b2 = ResUp(n.b2,64) # n.b3 = ResDown(n.res,64) n.b3 = ResDown(n.res,128) # n.b3 = ResBlock(n.b3,128) n.b3 = ResDown(n.b3,128) n.b3 = ResUp(n.b3,128) n.b3 = ResUp(n.b3,128) n.b3 = ResUp(n.b3,64) # n.b3 = ResDown(n.b3,128) n.up = L.Concat(n.b1,n.b2,n.b3) n.up = ResUp(n.up,16) n.out = L.Convolution(n.up, kernel_size=3,stride=1,pad=1, num_output=1,weight_filler=dict(type='xavier')) n.out = L.Convolution(n.up, kernel_size=3,stride=1,pad=1, num_output=1,weight_filler=dict(type='xavier')) n.probs=L.Sigmoid(n.out) n.probs_=L.Flatten(n.probs) n.label_=L.Flatten(n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def add_decoder(n, bottom, act=None, flatten=True, tag=''): h = bottom if flatten: n.tops['h_flat' + tag] = L.Flatten(bottom, axis=0, end_axis=1) h = n.tops['h_flat' + tag] if act: a = act if flatten: n.tops['act_flat' + tag] = L.Flatten(act, axis=0, end_axis=1) a = n.tops['act_flat' + tag] top = add_transform(n, h, a, tag) else: top = h return add_deconv(n, top, tag)
def create_bnn_deploy_net(num_input_points, height, width): n = caffe.NetSpec() n.input_color = L.Input(shape=[dict(dim=[1, 2, 1, num_input_points])]) n.in_features = L.Input(shape=[dict(dim=[1, 4, 1, num_input_points])]) n.out_features = L.Input(shape=[dict(dim=[1, 4, height, width])]) n.scales = L.Input(shape=[dict(dim=[1, 4, 1, 1])]) n.flatten_scales = L.Flatten(n.scales, flatten_param=dict(axis=0)) n.in_scaled_features = L.Scale(n.in_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_scaled_features = L.Scale(n.out_features, n.flatten_scales, scale_param=dict(axis=1)) n.out_color_result = L.Permutohedral(n.input_color, n.in_scaled_features, n.out_scaled_features, permutohedral_param=dict( num_output=2, group=1, neighborhood_size=0, norm_type=P.Permutohedral.AFTER, offset_type=P.Permutohedral.DIAG)) return n.to_proto()
def test_flatten2(): # type: ()->caffe.NetSpec n = caffe.NetSpec() n.input1 = L.Input(shape=make_shape([6, 4, 64, 64])) n.flatten1 = L.Flatten(n.input1, axis=-3, end_axis=-2) return n
def defineTestNet(inputShape, layerNeuronNum): layerNum = len(layerNeuronNum) - 1 n = caffe.NetSpec() # n.data = L.Input(input_param=dict(shape=inputShape)) n.data, n.label = L.MemoryData(memory_data_param=dict( batch_size=inputShape[0], channels=inputShape[1], height=inputShape[2], width=inputShape[3]), ntop=2) flatdata = L.Flatten(n.data) flatdata_name = 'flatdata' n.__setattr__(flatdata_name, flatdata) for l in range(layerNum): if l == 0: encoder_name_last = flatdata_name else: encoder_name_last = relu_en_name encoder = L.InnerProduct(n[encoder_name_last], num_output=layerNeuronNum[l + 1]) encoder_name = 'encoder' + str(l + 1) n.__setattr__(encoder_name, encoder) relu_en = L.ReLU(n[encoder_name], in_place=True) relu_en_name = 'relu_en' + str(l + 1) n.__setattr__(relu_en_name, relu_en) return n.to_proto()
def MaskNet_Val_MTD(net, from_layer="data", label="label", lr=1, decay=1, visualize=False): # net = YoloNetPart(net, from_layer=from_layer, use_bn=True, use_layers=6, use_sub_layers=7, lr=0, decay=0) net, mbox_layers, parts_layers = MTD_BODY(net) net.bbox, net.parts = L.SplitLabel(net[label], name="SplitLabel", ntop=2, split_label_param=dict(add_parts=True)) # ConvBNUnitLayer(net,"conv2", "conv2_pool", use_bn=True, use_relu=True, num_output=64, # kernel_size=1, pad=0, stride=2) # net = UnifiedMultiScaleLayers(net,layers=["conv2_pool","conv3_3","conv4_3"],tags=["Down","Down","Ref"],unifiedlayer="featuremap1",dnsampleMethod=[["Conv"],["MaxPool"]],dnsampleChannels=64) # net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"],tags=["Down","Ref"],unifiedlayer="featuremap2",dnsampleMethod=[["MaxPool"]]) # net = UnifiedMultiScaleLayers(net,layers=["conv5_5","conv6_7"],tags=["Down","Ref"],unifiedlayer="featuremap3",dnsampleMethod=[["MaxPool"]],pad=True) # mbox_layers = SSDHeader(net,data_layer="data",from_layers=["featuremap1","featuremap2","featuremap3"],input_height=Input_Height,input_width=Input_Width,loc_postfix='det',**ssdparam) reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] net.detection_out = L.DenseDetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval_accu = L.DetEval( net.detection_out, net.bbox, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net = UnifiedMultiScaleLayers(net,layers=["conv2","conv3_3"],tags=["Down","Ref"],unifiedlayer="conf23",dnsampleMethod=[["Conv"]],dnsampleChannels=64) # net = UnifiedMultiScaleLayers(net,layers=["conf23","conv4_3"],tags=["Down","Ref"],unifiedlayer="conf34",dnsampleMethod=[["MaxPool"]]) # net = UnifiedMultiScaleLayers(net,layers=["conv3_3","conv4_3"],tags=["Down","Ref"],unifiedlayer="conf34",dnsampleMethod=[["MaxPool"]]) # net = UnifiedMultiScaleLayers(net,layers=["conf34","conv5_5"],tags=["Down","Ref"],unifiedlayer="conf45",dnsampleMethod=[["MaxPool"]]) # # net = UnifiedMultiScaleLayers(net,layers=["conf45","conv6_7"],tags=["Down","Ref"],unifiedlayer="conf56",dnsampleMethod=[["MaxPool"]],pad=True) # parts_layers = SSDHeader(net,data_layer="data",from_layers=["conf34","conf45","conv6_7"],input_height=Input_Height,input_width=Input_Width,loc_postfix='parts',**partsparam) # parts_layers = SSDHeader(net,data_layer="data",from_layers=["conf23","conf34","conf45","conf56"],input_height=Input_Height,input_width=Input_Width,loc_postfix='parts',**partsparam) sigmoid_name = "parts_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(parts_layers[1]) parts_layers[1] = net[sigmoid_name] net.parts_out = L.DenseDetOut( *parts_layers, detection_output_param=parts_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.parts_eval_accu = L.DetEval( net.parts_out, net.parts, detection_evaluate_param=parts_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net.out=L.Concat(net.detection_eval_accu,net.parts_eval_accu,axis=2) return net
def finetuningNet(h5, batch_size, layerNum): n = caffe.NetSpec() n.data, n.label = L.HDF5Data(source=h5, batch_size=batch_size, shuffle=True, ntop=2) flatdata = L.Flatten(n.data) flatdata_name = 'flatdata' n.__setattr__(flatdata_name, flatdata) param = learned_param for l in range(layerNum): if l == 0: encoder_name_last = flatdata_name else: encoder_name_last = relu_en_name encoder = L.InnerProduct(n[encoder_name_last], num_output=layerNeuronNum[l + 1], param=param, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=0.1)) encoder_name = 'encoder' + str(l + 1) n.__setattr__(encoder_name, encoder) relu_en = L.ReLU(n[encoder_name], in_place=True) relu_en_name = 'relu_en' + str(l + 1) n.__setattr__(relu_en_name, relu_en) for l in range(layerNum): if l == 0: decoder_name_last = relu_en_name else: decoder_name_last = relu_de_name decoder = L.InnerProduct(n[decoder_name_last], num_output=layerNeuronNum[layerNum - l - 1], param=param, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=0.1)) decoder_name = 'decoder' + str(layerNum - l) n.__setattr__(decoder_name, decoder) if l < (layerNum - 1): relu_de = L.ReLU(n[decoder_name], in_place=True) relu_de_name = 'relu_de' + str(layerNum - l) n.__setattr__(relu_de_name, relu_de) n.loss = L.EuclideanLoss(n[decoder_name], n.flatdata) return n.to_proto()
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root='/home/x/data/datasets/tianchi/', batch_size=4,random=True) #data 1,64,64,64 n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) #n.conv1 32,32,32,32 n.conv1=SingleConv(n.data,32,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv2 64,16,16,16 n.conv2=SingleConv(n.conv1,64,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv3 64,8,8,8 n.conv3=SingleConv(n.conv2,128,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.conv4 64,4,4,4 n.conv4=SingleConv(n.conv3,256,kernel_size=[3,3,3],stride=[2,2,2],padding=[1,1,1]) #n.deconv3 64 8,8,8 n.deconv3=Deconv(n.conv4,256,128) up3=[n.deconv3,n.conv3] #n.concat1_3 128,8,8,8 n.concat1_3=L.Concat(*up3) #n.deconv2 64,16,16,16 n.deconv2=Deconv(n.concat1_3,256,64) up2=[n.deconv2,n.conv2] #n.concat1_2 128,16,16,16 n.concat1_2=L.Concat(*up2) #n.deconv1 32,32,32,32 n.deconv1=Deconv(n.concat1_2,128,32) up1=[n.deconv1,n.conv1] #n.concat1_1 64,32,32,32 n.concat1_1=L.Concat(*up1) #n.concat1_1 32,64,64,64 n.deconv0=Deconv(n.concat1_1,64,32) n.score=L.Convolution(n.deconv0, kernel_size=1,stride=1,pad=0, num_output=1,weight_filler=dict(type='xavier')) n.probs=L.Sigmoid(n.score) n.probs_=L.Flatten(n.probs) n.label_=L.Flatten(n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def spp(bottom, pool1_param, pool2_param, pool3_param): pool1 = L.Pooling(bottom, pool=pool1_param['type'], kernel_size=pool1_param['kernel_size'], stride=pool1_param['stride'], pad=pool1_param['pad']) # (MAX, 3, 1, 0) pool2 = L.Pooling(bottom, pool=pool2_param['type'], kernel_size=pool2_param['kernel_size'], stride=pool2_param['stride'], pad=pool2_param['pad']) # (MAX, 3, 2, 0) pool3 = L.Pooling(bottom, pool=pool3_param['type'], kernel_size=pool3_param['kernel_size'], stride=pool3_param['stride'], pad=pool3_param['pad']) # (MAX, 5, 5, 0) flatdata1 = L.Flatten(pool1) flatdata2 = L.Flatten(pool2) flatdata3 = L.Flatten(pool3) concat = L.Concat(flatdata1, flatdata2, flatdata3) return pool1, pool2, pool3, flatdata1, flatdata2, flatdata3, concat
def seblock(bottom, o, r): m = int(o / r) gap = L.Pooling(bottom, pool=P.Pooling.AVE, global_pooling=True) linear1 = L.Convolution(gap, kernel_size=1, pad=0, stride=1, num_output=m) relu1 = L.ReLU(linear1, in_place=True) linear2 = L.Convolution(relu1, kernel_size=1, pad=0, stride=1, num_output=o) sigmoid1 = L.Sigmoid(linear2, in_place=True) flatten1 = L.Flatten(sigmoid1) return flatten1
def caffenet(train_file, test_lmdb, input_dim, batch_size=20): # Size of flattened array of single image feats = np.prod(input_dim) n = caffe.NetSpec() # Define data layers n.data, n.labels = L.ImageData( batch_size=batch_size, source=train_file, # phase == 'TRAIN' # include=[dict(phase=0)], transform_param=dict(scale=1), ntop=2) # Unused test layer # n.data_test = L.Data(name="data", batch_size=batch_size, backend=P.Data.LMDB, source=test_lmdb, # # phase == 'TEST' # include=[dict(phase=1)], # transform_param=dict(scale=1./255), ntop=1) n.flatdata = L.Flatten(n.data) # Stack of Innerproduct->sigmoid layers n.enc1 = encoder_layer(n.data, 1000) n.encn1 = L.Sigmoid(n.enc1) n.enc2 = encoder_layer(n.encn1, 500) n.encn2 = L.Sigmoid(n.enc2) n.enc3 = encoder_layer(n.encn2, 250) n.encn3 = L.Sigmoid(n.enc3) n.enc4 = encoder_layer(n.encn3, 30) n.dec4 = encoder_layer(n.enc4, 250) n.decn4 = L.Sigmoid(n.dec4) n.dec3 = encoder_layer(n.decn4, 500) n.decn3 = L.Sigmoid(n.dec3) n.dec2 = encoder_layer(n.decn3, 1000) n.decn2 = L.Sigmoid(n.dec2) n.dec1 = encoder_layer(n.decn2, feats) n.decn1 = L.Sigmoid(n.dec1) n.sig_flat_data = L.Sigmoid(n.flatdata) # Flatten the data so it can be compared to the output of the stack # Loss layers n.cross_entropy_loss = L.SigmoidCrossEntropyLoss(n.decn1, n.sig_flat_data) n.euclidean_loss = L.EuclideanLoss(n.flatdata, n.decn1) # n.f_out = L.Split(n.flatdata) # Out layer # n.out_layer = L.Split(n.data) return n.to_proto()
def InterSRReCNN3D_net(hdf5name, batch_size, layers, kernel , numkernels, padding, residual=True): n = caffe.NetSpec() n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5name, ntop=2, include=dict(phase = caffe.TRAIN)) n.conv1 = L.Convolution(n.data, kernel_size=kernel, num_output=numkernels, stride = 1, pad = padding, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=np.sqrt(2/float(2*kernel**3))), bias_filler = dict( type= "constant", value=0), engine = 1 ) n.relu1 = L.ReLU(n.conv1, in_place=True, engine = 1) for idx in range(2,layers): n['conv'+str(idx)] = L.Convolution(n['conv'+str(idx-1)], kernel_size=kernel, num_output=numkernels, stride = 1, pad = padding, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=np.sqrt(2/float(numkernels*kernel**3))), bias_filler = dict( type= "constant", value=0), engine = 1 ) n['relu'+str(idx)] = L.ReLU(n['conv'+str(idx)], in_place=True, engine = 1) n['conv'+str(layers)] = L.Convolution(n['conv'+str(layers-1)], kernel_size=kernel, num_output=1, stride = 1, pad = padding, param=[{'lr_mult':1},{'lr_mult':0.1}], weight_filler=dict(type='gaussian',std=np.sqrt(2/float(numkernels*kernel**3))), bias_filler = dict( type= "constant", value=0), engine = 1 ) if residual == True: n.out = L.Eltwise(n['conv'+str(layers)],n.data, operation= 1 ) n.out_flat = L.Flatten(n.out) else: n.out_flat = L.Flatten(n['conv'+str(layers)]) n.label_flat = L.Flatten(n.label) n.loss = L.EuclideanLoss(n.out_flat,n.label_flat) return n.to_proto()
def define_model(self): n = caffe.NetSpec() pylayer = 'ClsDataLayer' pydata_params = dict( phase='train', data_root=opt.cls_data_root, batch_size=16, ratio=5, augument=True, ) n.data, n.label = L.Python(module='data.ClsDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.conv0 = BasicConv(n.data, 16) #(40,40,40) n.conv1 = Isomorphism_incept_1(n.conv0, 36) #(40,40,40) n.downsample1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv2 = Isomorphism_incept_1(n.downsample1, 72) #(20,20,20) n.downsample2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX) n.conv3 = Isomorphism_incept_1(n.downsample2, 36) #(10,10,10) n.downsample3 = L.Pooling(n.conv3, kernel_size=2, stride=2, pool=P.Pooling.MAX) #(5,5,5) n.conv4 = L.Convolution( n.downsample3, kernel_size=3, stride=1, pad=1, #(3,3,3) num_output=16, weight_filler=dict(type='xavier')) n.flatten = L.Flatten(n.conv4) #(16*3*3*3) n.fc1 = L.InnerProduct(n.flatten, num_output=150, weight_filler=dict(type='xavier')) n.fc1_act = L.ReLU(n.fc1, engine=3) n.score = L.InnerProduct(n.fc1_act, num_output=2, weight_filler=dict(type='xavier')) n.loss = L.SoftmaxWithLoss(n.score, n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def define_model(self): n = caffe.NetSpec() pylayer = 'SegDataLayer' pydata_params = dict(phase='train', img_root=opt.data_root, batch_size=4,random=True) n.data, n.label = L.Python(module='data.SegDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) n.down1 = SingleConv(n.data,64,kernel_size=[3,3,3],stride=[2,2,2]) n.down2 = ResBlock(n.down1,64) n.down3 = ResDown(n.down2,128) n.down4 = ResBlock(n.down3,128) n.down5 = ResDown(n.down4,256) n.res = ResBlock(n.down5,256) n.res = ResBlock(n.res,256) n.res = ResBlock(n.res,256) n.up = ResUp(n.res,128) cat4 = [n.down4,n.up] n.up = L.Concat(*cat4) n.up = ResUp(n.up,64) cat2 = [n.down2,n.up] n.up = L.Concat(*cat2) n.up = ResUp(n.up,32) n.out = L.Convolution(n.up, kernel_size=3,stride=1,pad=1, num_output=1,weight_filler=dict(type='xavier')) n.probs=L.Sigmoid(n.out) n.probs_=L.Flatten(n.probs) n.label_=L.Flatten(n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def classificationNet(h5, batch_size, layerNeuronNum, layerNum, classNum, learned_param): n = caffe.NetSpec() n.data, n.label = L.HDF5Data(source=h5, batch_size=batch_size, shuffle=True, ntop=2) flatdata = L.Flatten(n.data) flatdata_name = 'flatdata' n.__setattr__(flatdata_name, flatdata) param = learned_param for l in range(layerNum): if l == 0: encoder_name_last = flatdata_name else: encoder_name_last = relu_en_name encoder = L.InnerProduct(n[encoder_name_last], num_output=layerNeuronNum[l + 1], param=param, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=0.1)) encoder_name = 'encoder' + str(l + 1) n.__setattr__(encoder_name, encoder) relu_en = L.ReLU(n[encoder_name], in_place=True) relu_en_name = 'relu_en' + str(l + 1) n.__setattr__(relu_en_name, relu_en) output = L.InnerProduct(n[relu_en_name], num_output=classNum, param=param, weight_filler=dict(type='gaussian', std=0.005), bias_filler=dict(type='constant', value=0.1)) output_name = 'output' n.__setattr__(output_name, output) n.loss = L.SoftmaxWithLoss(n[output_name], n.label) return n.to_proto()
def add_lstm_encoder(n, bottom, batch_size, lstm_dim, flatten=True, t='', tag=''): bot = bottom if flatten: n.tops['data_flat' + tag] = L.Flatten(bottom, axis=0, end_axis=1) bot = n.tops['data_flat' + tag] top = add_conv_enc(n, bot, tag) n.tops['x_reshape' + tag] = L.Reshape( top, shape=dict(dim=[-1, batch_size, 2048])) n.tops['x_gate' + t] = fc(n.tops['x_reshape' + tag], 4 * lstm_dim, weight_filler=dict(type='uniform', min=-0.08, max=0.08), param_name='Wx', axis=2) return n.tops['x_gate' + t]
def MTD_TEST(net, from_layer="data", image="image", lr=1, decay=1): # net =YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) net, mbox_layers, parts_layers = MTD_BODY(net) # net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"],tags=["Down","Ref"],unifiedlayer="featuremap1",dnsampleMethod=[["Reorg"]]) # net = UnifiedMultiScaleLayers(net,layers=["conv5_5","conv6_7"],tags=["Down","Ref"],unifiedlayer="featuremap2",dnsampleMethod=[["MaxPool"]],pad=True) # mbox_layers = SSDHeader(net,data_layer="data",from_layers=["featuremap1","featuremap2"],input_height=Input_Height,input_width=Input_Width,loc_postfix='det',**ssdparam) reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] # mbox_layers.append(net.orig_data) net.detection_out = L.DenseDetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net = UnifiedMultiScaleLayers(net,layers=["conv3_3","conv4_3"],tags=["Down","Ref"],unifiedlayer="conf34",dnsampleMethod=[["MaxPool"]]) # parts_layers = SSDHeader(net,data_layer="data",from_layers=["conf34","conv5_5","conv6_7"],input_height=Input_Height,input_width=Input_Width,loc_postfix='parts',**partsparam) sigmoid_name = "parts_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(parts_layers[1]) parts_layers[1] = net[sigmoid_name] net.parts_out = L.DenseDetOut( *parts_layers, detection_output_param=parts_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.roi = L.Concat(net.detection_out, net.parts_out, axis=2) net.vis = L.VisualMtd(net.roi, net.orig_data, detection_output_param=vis_out_param) return net
def define_model(self): n = caffe.NetSpec() pylayer = 'ClsDataLayer' pydata_params = dict(phase='train', data_root=opt.cls_data_root, batch_size=16,ratio=5,augument=True,) #n.data 40,40,40 n.data,n.label = L.Python(module='data.ClsDataLayer', layer=pylayer, ntop=2, param_str=str(pydata_params)) # 20,20 20 n.conv1=ResDownModule(n.data,32) #10,10,10 n.conv2=ResDownModule(n.conv1,64) #5,5,5 n.conv3=ResDownModule(n.conv2,128) #3,3,3 n.conv4=SingleConv(n.conv3,128,kernel_size=3,stride=1,padding=0) n.flatten=L.Flatten(n.conv4)#128*3*3*3 n.fc1=L.InnerProduct(n.flatten, num_output=250,weight_filler=dict(type='xavier')) n.fc1_act=L.ReLU(n.fc1,engine=3) n.score=L.InnerProduct(n.fc1_act, num_output=2,weight_filler=dict(type='xavier')) n.loss=L.SoftmaxWithLoss(n.score, n.label) with open(self.model_def, 'w') as f: f.write(str(n.to_proto()))
def CreateMultiBoxHead(net, data_layer="data", num_classes=[], from_layers=[], use_objectness=False, normalizations=[], use_batchnorm=True, lr_mult=1, use_scale=True, min_sizes=[], max_sizes=[], prior_variance=[0.1], aspect_ratios=[], steps=[], img_height=0, img_width=0, share_location=True, flip=True, clip=True, offset=0.5, inter_layer_depth=[], kernel_size=1, pad=0, conf_postfix='', loc_postfix='', head_postfix='ext/pm', **bn_param): assert num_classes, "must provide num_classes" assert num_classes > 0, "num_classes must be positive number" if normalizations: assert len(from_layers) == len( normalizations ), "from_layers and normalizations should have same length" assert len(from_layers) == len( min_sizes), "from_layers and min_sizes should have same length" if max_sizes: assert len(from_layers) == len( max_sizes), "from_layers and max_sizes should have same length" if aspect_ratios: assert len(from_layers) == len( aspect_ratios ), "from_layers and aspect_ratios should have same length" if steps: assert len(from_layers) == len( steps), "from_layers and steps should have same length" net_layers = net.keys() assert data_layer in net_layers, "data_layer is not in net's layers" if inter_layer_depth: assert len(from_layers) == len( inter_layer_depth ), "from_layers and inter_layer_depth should have same length" num = len(from_layers) priorbox_layers = [] loc_layers = [] conf_layers = [] objectness_layers = [] for i in range(0, num): from_layer = from_layers[i] # Get the normalize value. if normalizations: if normalizations[i] != -1: norm_name = "{}{}_norm".format(head_postfix, i + 1) net[norm_name] = L.Normalize(net[from_layer], scale_filler=dict( type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False) from_layer = norm_name # Add intermediate layers. if inter_layer_depth: if inter_layer_depth[i] > 0: inter_name = "{}{}_inter".format(head_postfix, i + 1) ConvBNLayer(net, from_layer, inter_name, use_bn=use_batchnorm, use_relu=True, lr_mult=lr_mult, num_output=inter_layer_depth[i], kernel_size=3, pad=1, stride=1, **bn_param) from_layer = inter_name # Estimate number of priors per location given provided parameters. min_size = min_sizes[i] if type(min_size) is not list: min_size = [min_size] aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] max_size = [] if len(max_sizes) > i: max_size = max_sizes[i] if type(max_size) is not list: max_size = [max_size] if max_size: assert len(max_size) == len( min_size), "max_size and min_size should have same length." if max_size: num_priors_per_location = (2 + len(aspect_ratio)) * len(min_size) else: num_priors_per_location = (1 + len(aspect_ratio)) * len(min_size) if flip: num_priors_per_location += len(aspect_ratio) * len(min_size) step = [] if len(steps) > i: step = steps[i] # Create location prediction layer. name = "{}{}_mbox_loc{}".format(head_postfix, i + 1, loc_postfix) num_loc_output = num_priors_per_location * 4 if not share_location: num_loc_output *= num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_loc_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) loc_layers.append(net[flatten_name]) # Create confidence prediction layer. name = "{}{}_mbox_conf{}".format(head_postfix, i + 1, conf_postfix) num_conf_output = num_priors_per_location * num_classes ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_conf_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) conf_layers.append(net[flatten_name]) # Create prior generation layer. name = "{}{}_mbox_priorbox".format(head_postfix, i + 1) net[name] = L.PriorBox(net[from_layer], net[data_layer], min_size=min_size, clip=clip, variance=prior_variance, offset=offset) if max_size: net.update(name, {'max_size': max_size}) if aspect_ratio: net.update(name, {'aspect_ratio': aspect_ratio, 'flip': flip}) if step: net.update(name, {'step': step}) if img_height != 0 and img_width != 0: if img_height == img_width: net.update(name, {'img_size': img_height}) else: net.update(name, {'img_h': img_height, 'img_w': img_width}) priorbox_layers.append(net[name]) # Create objectness prediction layer. if use_objectness: name = "{}{}_mbox_objectness".format(head_postfix, i + 1) num_obj_output = num_priors_per_location * 2 ConvBNLayer(net, from_layer, name, use_bn=use_batchnorm, use_relu=False, lr_mult=lr_mult, num_output=num_obj_output, kernel_size=kernel_size, pad=pad, stride=1, **bn_param) permute_name = "{}_perm".format(name) net[permute_name] = L.Permute(net[name], order=[0, 2, 3, 1]) flatten_name = "{}_flat".format(name) net[flatten_name] = L.Flatten(net[permute_name], axis=1) objectness_layers.append(net[flatten_name]) # Concatenate priorbox, loc, and conf layers. mbox_layers = [] name = "mbox_loc" net[name] = L.Concat(*loc_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_conf" net[name] = L.Concat(*conf_layers, axis=1) mbox_layers.append(net[name]) name = "mbox_priorbox" net[name] = L.Concat(*priorbox_layers, axis=2) mbox_layers.append(net[name]) if use_objectness: name = "mbox_objectness" net[name] = L.Concat(*objectness_layers, axis=1) mbox_layers.append(net[name]) return mbox_layers
def flatten_layer(layer_config, bottom_name): return L.Flatten(bottom=bottom_name)
def attach(self, netspec, bottom): label = bottom[0] mbox_source_layers = self.params['mbox_source_layers'] num_classes = self.params['num_classes'] normalizations = self.params['normalizations'] aspect_ratios = self.params['aspect_ratios'] min_sizes = self.params['min_sizes'] max_sizes = self.params['max_sizes'] is_train = self.params['is_train'] use_global_stats = False if is_train else True loc = [] conf = [] prior = [] for i, layer in enumerate(mbox_source_layers): if normalizations[i] != -1: norm_name = "{}_norm".format(layer) norm_layer = BaseLegoFunction( 'Normalize', dict(name=norm_name, scale_filler=dict(type="constant", value=normalizations[i]), across_spatial=False, channel_shared=False)).attach(netspec, [netspec[layer]]) layer_name = norm_name else: layer_name = layer # Estimate number of priors per location given provided parameters. aspect_ratio = [] if len(aspect_ratios) > i: aspect_ratio = aspect_ratios[i] if type(aspect_ratio) is not list: aspect_ratio = [aspect_ratio] if max_sizes and max_sizes[i]: num_priors_per_location = 2 + len(aspect_ratio) else: num_priors_per_location = 1 + len(aspect_ratio) num_priors_per_location += len(aspect_ratio) params = dict(name=layer_name, num_classes=num_classes, num_priors_per_location=num_priors_per_location, min_size=min_sizes[i], max_size=max_sizes[i], aspect_ratio=aspect_ratio, use_global_stats=use_global_stats) params['deep_mult'] = 4 params['type'] = 'linear' # params['type'] = 'deep' # params['depth'] = 3 arr = MBoxUnitLego(params).attach( netspec, [netspec[layer_name], netspec['data']]) loc.append(arr[0]) conf.append(arr[1]) prior.append(arr[2]) mbox_layers = [] locs = BaseLegoFunction('Concat', dict(name='mbox_loc', axis=1)).attach(netspec, loc) mbox_layers.append(locs) confs = BaseLegoFunction('Concat', dict(name='mbox_conf', axis=1)).attach(netspec, conf) mbox_layers.append(confs) priors = BaseLegoFunction('Concat', dict(name='mbox_priorbox', axis=2)).attach(netspec, prior) mbox_layers.append(priors) # MultiBoxLoss parameters. share_location = True background_label_id = 0 train_on_diff_gt = True normalization_mode = P.Loss.VALID code_type = P.PriorBox.CENTER_SIZE neg_pos_ratio = 3. loc_weight = (neg_pos_ratio + 1.) / 4. multibox_loss_param = { 'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1, 'conf_loss_type': P.MultiBoxLoss.SOFTMAX, 'loc_weight': loc_weight, 'num_classes': num_classes, 'share_location': share_location, 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'overlap_threshold': 0.5, 'use_prior_for_matching': True, 'background_label_id': background_label_id, 'use_difficult_gt': train_on_diff_gt, 'do_neg_mining': True, 'neg_pos_ratio': neg_pos_ratio, 'neg_overlap': 0.5, 'code_type': code_type, } loss_param = { 'normalization': normalization_mode, } mbox_layers.append(label) BaseLegoFunction( 'MultiBoxLoss', dict(name='mbox_loss', multibox_loss_param=multibox_loss_param, loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), propagate_down=[True, True, False, False])).attach(netspec, mbox_layers) if not is_train: # parameters for generating detection output. det_out_param = { 'num_classes': num_classes, 'share_location': True, 'background_label_id': 0, 'nms_param': { 'nms_threshold': 0.45, 'top_k': 400 }, 'save_output_param': { 'output_directory': "./models/voc2007/resnet_36_with4k_inception_trick/expt1/detection/", 'output_name_prefix': "comp4_det_test_", 'output_format': "VOC", 'label_map_file': "data/VOC0712/labelmap_voc.prototxt", 'name_size_file': "data/VOC0712/test_name_size.txt", 'num_test_image': 4952, }, 'keep_top_k': 200, 'confidence_threshold': 0.01, 'code_type': P.PriorBox.CENTER_SIZE, } # parameters for evaluating detection results. det_eval_param = { 'num_classes': num_classes, 'background_label_id': 0, 'overlap_threshold': 0.5, 'evaluate_difficult_gt': False, 'name_size_file': "data/VOC0712/test_name_size.txt", } conf_name = "mbox_conf" reshape_name = "{}_reshape".format(conf_name) netspec[reshape_name] = L.Reshape( netspec[conf_name], shape=dict(dim=[0, -1, num_classes])) softmax_name = "{}_softmax".format(conf_name) netspec[softmax_name] = L.Softmax(netspec[reshape_name], axis=2) flatten_name = "{}_flatten".format(conf_name) netspec[flatten_name] = L.Flatten(netspec[softmax_name], axis=1) mbox_layers[1] = netspec[flatten_name] netspec.detection_out = L.DetectionOutput( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) netspec.detection_eval = L.DetectionEvaluate( netspec.detection_out, netspec.label, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST')))
batch_size = 100 num_clas = 2 sub_nets = ('generator2', 'discriminator2', 'data2') ############ creating the data net ############################# data = caffe.NetSpec() data.ECAL, data.TAG = L.HDF5Data(batch_size = batch_size, source = "train.txt", ntop = 2) # train.txt is a text file containing the path to the training data folder with open('data2.prototxt', 'w') as f: f.write(str(data.to_proto())) ############ creating the generator net ######################## n = caffe.NetSpec() n.feat = L.Input(shape=dict(dim=[batch_size, latent])) # random array n.clas = L.Input(shape=dict(dim=[batch_size,1])) # array with classes n.embed = L.Embed(n.clas, input_dim=num_clas, num_output=latent, weight_filler=dict(type='xavier')) # class dependant embedding (xavier for glorot_normal in keras) n.flat = L.Flatten(n.embed) n.mult = L.Eltwise(n.flat, n.feat, operation=0) # 0 = multiplication mode n.Dense = L.InnerProduct(n.mult, num_output=7*7*8*8, weight_filler=dict(type='msra')) # 3136 n.resh = L.Reshape(n.Dense, reshape_param ={'shape':{'dim':[100, 7, 7, 8, 8]}}) n.conv5 = L.Convolution(n.resh, num_output=64, kernel_size= [6, 6, 8], pad=[2, 2, 3], engine=1) # (not working for nd) weight_filler=dict(type='msra') => keras he_uniform n.relu5 = L.ReLU(n.conv5, negative_slope=0.3, engine=1) n.bn5 = L.BatchNorm(n.relu5, in_place=True) n.upsmpl5 = L.Deconvolution(n.bn5, convolution_param=dict(num_output=1, group=1, kernel_size=4, stride = 2, pad=1)) #f=2, kernel_size:{{2*f- f%2}} stride:{{f}} num_output:{{C}} group:{{C}} pad:{{ceil((f-1)/2.)}} (gives error for nd) weight_filler: "bilinear" n.conv4 = L.Convolution(n.upsmpl5, num_output=6, kernel_size= [6, 5, 8], pad=[2, 2, 0], engine=1)# (not working for nd) weight_filler=dict(type='msra') => keras he_uniform n.relu4 = L.ReLU(n.conv4, negative_slope=0.3, engine=1) n.bn4 = L.BatchNorm(n.relu4, in_place=True) n.upsmpl4 = L.Deconvolution(n.bn4, convolution_param=dict(num_output=1, group=1, kernel_size=[4, 4, 5], stride = [2, 2, 3], pad=1)) # f = [2, 2, 3] n.conv3 = L.Convolution(n.upsmpl4, num_output=6, kernel_size= [3, 3, 8], pad=[1, 0, 3], engine=1) # (not working for nd) weight_filler=dict(type='msra') => keras he_uniform n.relu3 = L.ReLU(n.conv3, negative_slope=0.3, engine=1) n.conv2 = L.Convolution(n.relu3, num_output=1, kernel_size= [2, 2, 2],pad = [2, 0, 3], engine=1) # (not working for nd) weight_filler=dict(type='xavier') n.generated = L.ReLU(n.conv2, negative_slope=0.3, engine=1)
def generate_caffe_prototxt(self, caffe_net, layer): layer = L.Flatten(layer, axis=self.axis) caffe_net[self.g_name] = layer return layer
def SsdDetector(net, train=True, data_layer="data", gt_label="label", \ net_width=300, net_height=300, basenet="VGG", \ visualize=False, extra_data="data", eval_enable=True, **ssdparam): """ 创建SSD检测器。 train: TRAIN /TEST data_layer/gt_label: 数据输入和label输入。 net_width/net_height: 网络的输入尺寸 num_classes: 估计分类的数量。 basenet: "vgg"/"res101",特征网络 ssdparam: ssd检测器使用的参数列表。 返回:整个SSD检测器网络。 """ # BaseNetWork if basenet == "VGG": net = VGG16Net(net, from_layer=data_layer, fully_conv=True, reduced=True, \ dilated=True, dropout=False) base_feature_layers = ['conv4_3', 'fc7'] add_layers = 3 first_channels = 256 second_channels = 512 elif basenet == "Res101": net = ResNet101Net(net, from_layer=data_layer, use_pool5=False) # 1/8, 1/16, 1/32 base_feature_layers = ['res3b3', 'res4b22', 'res5c'] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "Res50": net = ResNet50Net(net, from_layer=data_layer, use_pool5=False) base_feature_layers = ['res3d', 'res4f', 'res5c'] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "PVA": net = PvaNet(net, from_layer=data_layer) # 1/8, 1/16, 1/32 base_feature_layers = [ 'conv4_1/incep/pre', 'conv5_1/incep/pre', 'conv5_4' ] add_layers = 2 first_channels = 256 second_channels = 512 elif basenet == "Yolo": net = YoloNet(net, from_layer=data_layer) base_feature_layers = ssdparam.get("multilayers_feature_map", []) # add_layers = 2 # first_channels = 256 # second_channels = 512 feature_layers = base_feature_layers else: raise ValueError( "only VGG16, Res50/101 and PVANet are supported in current version." ) result = [] for item in feature_layers: if len(item) == 1: result.append(item[0]) continue name = "" for layers in item: name += layers tags = ["Down", "Ref"] down_methods = [["Reorg"]] UnifiedMultiScaleLayers(net,layers=item, tags=tags, \ unifiedlayer=name, dnsampleMethod=down_methods) result.append(name) feature_layers = result # Add extra layers # extralayers_use_batchnorm=True, extralayers_lr_mult=1, \ # net, feature_layers = AddSsdExtraConvLayers(net, \ # use_batchnorm=ssdparam.get("extralayers_use_batchnorm",False), \ # feature_layers=base_feature_layers, add_layers=add_layers, \ # first_channels=first_channels, second_channels=second_channels) # create ssd detector deader mbox_layers = SsdDetectorHeaders(net, \ min_ratio=ssdparam.get("multilayers_min_ratio",15), \ max_ratio=ssdparam.get("multilayers_max_ratio",90), \ boxsizes=ssdparam.get("multilayers_boxsizes", []), \ net_width=net_width, \ net_height=net_height, \ data_layer=data_layer, \ num_classes=ssdparam.get("num_classes",2), \ from_layers=feature_layers, \ use_batchnorm=ssdparam.get("multilayers_use_batchnorm",True), \ prior_variance = ssdparam.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \ normalizations=ssdparam.get("multilayers_normalizations",[]), \ aspect_ratios=ssdparam.get("multilayers_aspect_ratios",[]), \ flip=ssdparam.get("multilayers_flip",True), \ clip=ssdparam.get("multilayers_clip",False), \ inter_layer_channels=ssdparam.get("multilayers_inter_layer_channels",[]), \ kernel_size=ssdparam.get("multilayers_kernel_size",3), \ pad=ssdparam.get("multilayers_pad",1)) if train == True: loss_param = get_loss_param(normalization=ssdparam.get( "multiloss_normalization", P.Loss.VALID)) mbox_layers.append(net[gt_label]) # create loss if not ssdparam["combine_yolo_ssd"]: multiboxloss_param = get_multiboxloss_param( \ loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \ conf_loss_type=ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), \ loc_weight=ssdparam.get("multiloss_loc_weight",1), \ conf_weight=ssdparam.get("multiloss_conf_weight",1), \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \ overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \ use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \ do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \ neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \ neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \ name_to_label_file=ssdparam.get("multiloss_name_to_label_file","")) net["mbox_loss"] = L.MultiBoxLoss(*mbox_layers, \ multibox_loss_param=multiboxloss_param, \ loss_param=loss_param, \ include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: multimcboxloss_param = get_multimcboxloss_param( \ loc_loss_type=ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), \ loc_weight=ssdparam.get("multiloss_loc_weight",1), \ conf_weight=ssdparam.get("multiloss_conf_weight",1), \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ match_type=ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), \ overlap_threshold=ssdparam.get("multiloss_overlap_threshold",0.5), \ use_prior_for_matching=ssdparam.get("multiloss_use_prior_for_matching",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ use_difficult_gt=ssdparam.get("multiloss_use_difficult_gt",False), \ do_neg_mining=ssdparam.get("multiloss_do_neg_mining",True), \ neg_pos_ratio=ssdparam.get("multiloss_neg_pos_ratio",3), \ neg_overlap=ssdparam.get("multiloss_neg_overlap",0.5), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ encode_variance_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ map_object_to_agnostic=ssdparam.get("multiloss_map_object_to_agnostic",False), \ name_to_label_file=ssdparam.get("multiloss_name_to_label_file",""),\ rescore=ssdparam.get("multiloss_rescore",True),\ object_scale=ssdparam.get("multiloss_object_scale",1),\ noobject_scale=ssdparam.get("multiloss_noobject_scale",1),\ class_scale=ssdparam.get("multiloss_class_scale",1),\ loc_scale=ssdparam.get("multiloss_loc_scale",1)) net["mbox_loss"] = L.MultiMcBoxLoss(*mbox_layers, \ multimcbox_loss_param=multimcboxloss_param, \ loss_param=loss_param, \ include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) return net else: # create conf softmax layer # mbox_layers[1] if not ssdparam["combine_yolo_ssd"]: if ssdparam.get("multiloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif ssdparam.get( "multiloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_layers[1]) mbox_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") det_out_param = get_detection_out_param( \ num_classes=ssdparam.get("num_classes",2), \ share_location=ssdparam.get("multiloss_share_location",True), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ code_type=ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), \ variance_encoded_in_target=ssdparam.get("multiloss_encode_variance_in_target",False), \ conf_threshold=ssdparam.get("detectionout_conf_threshold",0.01), \ nms_threshold=ssdparam.get("detectionout_nms_threshold",0.45), \ boxsize_threshold=ssdparam.get("detectionout_boxsize_threshold",0.001), \ top_k=ssdparam.get("detectionout_top_k",30), \ visualize=ssdparam.get("detectionout_visualize",False), \ visual_conf_threshold=ssdparam.get("detectionout_visualize_conf_threshold", 0.5), \ visual_size_threshold=ssdparam.get("detectionout_visualize_size_threshold", 0), \ display_maxsize=ssdparam.get("detectionout_display_maxsize",1000), \ line_width=ssdparam.get("detectionout_line_width",4), \ color=ssdparam.get("detectionout_color",[[0,255,0],])) if visualize: mbox_layers.append(net[extra_data]) if not ssdparam["combine_yolo_ssd"]: net.detection_out = L.DetectionOutput(*mbox_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out = L.DetectionMultiMcOutput(*mbox_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if not visualize and eval_enable: # create eval layer det_eval_param = get_detection_eval_param( \ num_classes=ssdparam.get("num_classes",2), \ background_label_id=ssdparam.get("multiloss_background_label_id",0), \ evaluate_difficult_gt=ssdparam.get("detectioneval_evaluate_difficult_gt",False), \ boxsize_threshold=ssdparam.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), \ iou_threshold=ssdparam.get("detectioneval_iou_threshold",[0.9,0.75,0.5]), \ name_size_file=ssdparam.get("detectioneval_name_size_file","")) net.detection_eval = L.DetectionEvaluate(net.detection_out, net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if not eval_enable: net.slience = L.Silence(net.detection_out, ntop=0, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def setLayers_twoBranches(data_source, batch_size, layername, kernel, stride, outCH, label_name, transform_param_in, deploy=False, batchnorm=0, lr_mult_distro=[1, 1, 1]): # it is tricky to produce the deploy prototxt file, as the data input is not from a layer, so we have to creat a workaround # producing training and testing prototxt files is pretty straight forward n = caffe.NetSpec() assert len(layername) == len(kernel) assert len(layername) == len(stride) assert len(layername) == len(outCH) num_parts = transform_param['num_parts'] if deploy == False and "lmdb" not in data_source: if (len(label_name) == 1): n.data, n.tops[label_name[0]] = L.HDF5Data(hdf5_data_param=dict( batch_size=batch_size, source=data_source), ntop=2) elif (len(label_name) == 2): n.data, n.tops[label_name[0]], n.tops[label_name[1]] = L.HDF5Data( hdf5_data_param=dict(batch_size=batch_size, source=data_source), ntop=3) # produce data definition for deploy net elif deploy == False: n.data, n.tops['label'] = L.CPMData( data_param=dict(backend=1, source=data_source, batch_size=batch_size), cpm_transform_param=transform_param_in, ntop=2) n.tops[label_name[2]], n.tops[label_name[3]], n.tops[ label_name[4]], n.tops[label_name[5]] = L.Slice( n.label, slice_param=dict( axis=1, slice_point=[38, num_parts + 1, num_parts + 39]), ntop=4) n.tops[label_name[0]] = L.Eltwise(n.tops[label_name[2]], n.tops[label_name[4]], operation=P.Eltwise.PROD) n.tops[label_name[1]] = L.Eltwise(n.tops[label_name[3]], n.tops[label_name[5]], operation=P.Eltwise.PROD) else: input = "data" dim1 = 1 dim2 = 4 dim3 = 368 dim4 = 368 # make an empty "data" layer so the next layer accepting input will be able to take the correct blob name "data", # we will later have to remove this layer from the serialization string, since this is just a placeholder n.data = L.Layer() # something special before everything n.image, n.center_map = L.Slice(n.data, slice_param=dict(axis=1, slice_point=3), ntop=2) n.silence2 = L.Silence(n.center_map, ntop=0) #n.pool_center_lower = L.Pooling(n.center_map, kernel_size=9, stride=8, pool=P.Pooling.AVE) # just follow arrays..CPCPCPCPCCCC.... last_layer = ['image', 'image'] stage = 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' # can be image or fuse share_point = 0 for l in range(0, len(layername)): if layername[l] == 'V': #pretrained VGG layers conv_name = 'conv%d_%d' % (pool_counter, local_counter) lr_m = lr_mult_distro[0] n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) ReLUname = 'relu%d_%d' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) local_counter += 1 print ReLUname if layername[l] == 'B': pool_counter += 1 local_counter = 1 if layername[l] == 'C': if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM' % ( pool_counter, local_counter ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d' % (conv_counter, stage) lr_m = lr_mult_distro[2] conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub n.tops[conv_name] = L.Convolution( n.tops[last_layer[0]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[0] = conv_name last_layer[1] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[0], outCH[l], lr_m) if layername[l + 1] != 'L': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM' % (pool_counter, local_counter) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d' % (conv_counter, stage) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[0]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[0] = batchnorm_name ReLUname = 'Mrelu%d_stage%d' % (conv_counter, stage) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[0]], in_place=True) #last_layer = ReLUname print ReLUname #conv_counter += 1 local_counter += 1 elif layername[l] == 'C2': for level in range(0, 2): if state == 'image': #conv_name = 'conv%d_stage%d' % (conv_counter, stage) conv_name = 'conv%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1 ) # no image state in subsequent stages if stage == 1: lr_m = lr_mult_distro[1] else: lr_m = lr_mult_distro[1] else: # fuse conv_name = 'Mconv%d_stage%d_L%d' % (conv_counter, stage, level + 1) lr_m = lr_mult_distro[2] #conv_counter += 1 #if stage == 1: # lr_m = 1 #else: # lr_m = lr_sub if layername[l + 1] == 'L2' or layername[l + 1] == 'L3': if level == 0: outCH[l] = 38 else: outCH[l] = 19 n.tops[conv_name] = L.Convolution( n.tops[last_layer[level]], kernel_size=kernel[l], num_output=outCH[l], pad=int(math.floor(kernel[l] / 2)), param=[ dict(lr_mult=lr_m, decay_mult=1), dict(lr_mult=lr_m * 2, decay_mult=0) ], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant')) last_layer[level] = conv_name print '%s\tch=%d\t%.1f' % (last_layer[level], outCH[l], lr_m) if layername[l + 1] != 'L2' and layername[l + 1] != 'L3': if (state == 'image'): if (batchnorm == 1): batchnorm_name = 'bn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name #ReLUname = 'relu%d_stage%d' % (conv_counter, stage) ReLUname = 'relu%d_%d_CPM_L%d' % ( pool_counter, local_counter, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) else: if (batchnorm == 1): batchnorm_name = 'Mbn%d_stage%d_L%d' % ( conv_counter, stage, level + 1) n.tops[batchnorm_name] = L.BatchNorm( n.tops[last_layer[level]], param=[ dict(lr_mult=0), dict(lr_mult=0), dict(lr_mult=0) ]) #scale_filler=dict(type='constant', value=1), shift_filler=dict(type='constant', value=0.001)) last_layer[level] = batchnorm_name ReLUname = 'Mrelu%d_stage%d_L%d' % (conv_counter, stage, level + 1) n.tops[ReLUname] = L.ReLU(n.tops[last_layer[level]], in_place=True) print ReLUname conv_counter += 1 local_counter += 1 elif layername[l] == 'P': # Pooling n.tops['pool%d_stage%d' % (pool_counter, stage)] = L.Pooling( n.tops[last_layer[0]], kernel_size=kernel[l], stride=stride[l], pool=P.Pooling.MAX) last_layer[0] = 'pool%d_stage%d' % (pool_counter, stage) pool_counter += 1 local_counter = 1 conv_counter += 1 print last_layer[0] elif layername[l] == 'L': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d' % stage] = L.Flatten( n.tops[last_layer[0]]) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[1]]) elif deploy == False: level = 1 name = 'weight_stage%d' % stage n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d' % stage] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]]) print 'loss %d' % stage stage += 1 conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L2': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] for level in range(0, 2): if deploy == False and "lmdb" not in data_source: n.tops['map_vec_stage%d_L%d' % (stage, level + 1)] = L.Flatten( n.tops[last_layer[level]]) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops['map_vec_stage%d' % stage], n.tops[label_name[level]], loss_weight=weight[level]) elif deploy == False: name = 'weight_stage%d_L%d' % (stage, level + 1) n.tops[name] = L.Eltwise(n.tops[last_layer[level]], n.tops[label_name[(level + 2)]], operation=P.Eltwise.PROD) n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[name], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'L3': # Loss: n.loss layer is only in training and testing nets, but not in deploy net. weight = [lr_mult_distro[3], 1] # print lr_mult_distro[3] if deploy == False: level = 0 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.Euclidean2Loss( n.tops[last_layer[level]], n.tops[label_name[level]], n.tops[label_name[2]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) level = 1 n.tops['loss_stage%d_L%d' % (stage, level + 1)] = L.EuclideanLoss( n.tops[last_layer[level]], n.tops[label_name[level]], loss_weight=weight[level]) print 'loss %d level %d' % (stage, level + 1) stage += 1 #last_connect = last_layer #last_layer = 'image' conv_counter = 1 pool_counter = 1 drop_counter = 1 local_counter = 1 state = 'image' elif layername[l] == 'D': if deploy == False: n.tops['drop%d_stage%d' % (drop_counter, stage)] = L.Dropout( n.tops[last_layer[0]], in_place=True, dropout_param=dict(dropout_ratio=0.5)) drop_counter += 1 elif layername[l] == '@': #if not share_point: # share_point = last_layer n.tops['concat_stage%d' % stage] = L.Concat( n.tops[last_layer[0]], n.tops[last_layer[1]], n.tops[share_point], concat_param=dict(axis=1)) local_counter = 1 state = 'fuse' last_layer[0] = 'concat_stage%d' % stage last_layer[1] = 'concat_stage%d' % stage print last_layer elif layername[l] == '$': share_point = last_layer[0] pool_counter += 1 local_counter = 1 print 'share' # final process stage -= 1 #if stage == 1: # n.silence = L.Silence(n.pool_center_lower, ntop=0) if deploy == False: return str(n.to_proto()) # for generating the deploy net else: # generate the input information header string deploy_str = 'input: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}\ninput_dim: {}'.format( '"' + input + '"', dim1, dim2, dim3, dim4) # assemble the input header with the net layers string. remove the first placeholder layer from the net string. return deploy_str + '\n' + 'layer {' + 'layer {'.join( str(n.to_proto()).split('layer {')[2:])
AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult) mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers, use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes, aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations, num_classes=num_classes, share_location=share_location, flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult) conf_name = "mbox_conf" if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX: reshape_name = "{}_reshape".format(conf_name) net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes])) softmax_name = "{}_softmax".format(conf_name) net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "{}_flatten".format(conf_name) net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "{}_sigmoid".format(conf_name) net[sigmoid_name] = L.Sigmoid(net[conf_name]) mbox_layers[1] = net[sigmoid_name] net.detection_out = L.DetectionOutput(*mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) with open(test_net_file, 'w') as f: print('name: "{}_test"'.format(model_name), file=f)