def MaskNet_Val_Det(net, from_layer="data", label="label", lr=1, decay=1, visualize=False): # net =YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) net = YoloNetPartCompress(net, from_layer="data", use_bn=True, use_layers=5, use_sub_layers=5, strid_conv=[1, 1, 1, 0, 0], final_pool=False, lr=0.1, decay=0.1) out_layer = "conv5_5" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=[128,128,128,128,128,128], \ conv6_kernal_size=[3,3,3,3,3,3], pre_name="conv6",start_pool=True,lr_mult=1, decay_mult=1,n_group=1) net.bbox, net.kps, net.mask = L.SplitLabel(net[label], name='SplitLabel', ntop=3) net = UnifiedMultiScaleLayers(net, layers=["conv5_5", "conv6_6"], tags=["Down", "Ref"], unifiedlayer="featuremap2", dnsampleMethod=[["MaxPool"]]) net = UnifiedMultiScaleLayers(net, layers=["conv4_3", "conv5_5"], tags=["Down", "Ref"], unifiedlayer="featuremap1", dnsampleMethod=[["Reorg"]]) mbox_layers = SSDHeader(net, data_layer="data", from_layers=["featuremap1", "featuremap2"], input_height=Input_Height, input_width=Input_Width, **ssdparam) if bbox_loss_param.get("multiloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] # elif bbox_loss_param.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: # sigmoid_name = "mbox_conf_sigmoid" # net[sigmoid_name] = L.Sigmoid(mbox_layers[1]) # mbox_layers[1] = net[sigmoid_name] if visualize: mbox_layers.append(net["data"]) net.detection_out = L.DetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval_accu = L.DetEval( net.detection_out, net.bbox, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def MaskNet_Test(net, from_layer="data", image="image", lr=1, decay=1): # net = YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) net = YoloNetPartCompress(net, from_layer="data", use_bn=True, use_layers=5, use_sub_layers=5, strid_conv=[1, 1, 1, 0, 0], final_pool=False, lr=0.1, decay=0.1) out_layer = "conv5_5" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=[128,128,128,128,128,128], \ conv6_kernal_size=[3,3,3,3,3,3], pre_name="conv6",start_pool=True,lr_mult=1, decay_mult=1,n_group=1) # Concat [conv5_5, conv6_7] net = UnifiedMultiScaleLayers(net, layers=["conv5_5", "conv6_6"], tags=["Down", "Ref"], unifiedlayer="featuremap2", dnsampleMethod=[["MaxPool"]]) net = UnifiedMultiScaleLayers(net, layers=["conv4_3", "conv5_5"], tags=["Down", "Ref"], unifiedlayer="featuremap1", dnsampleMethod=[["Reorg"]]) # Concat [conv4_3, conv5_5, conv6_7] # net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5","conv6_7"], tags=["Down","Ref","Up"], unifiedlayer="convf_mask", \ # dnsampleMethod=[["MaxPool"]],upsampleMethod="Reorg") net = UnifiedMultiScaleLayers(net,layers=["conv4_3","conv5_5"], tags=["Down","Ref"], unifiedlayer="convf_mask", \ dnsampleMethod=[["MaxPool"]]) mbox_layers = SSDHeader(net, data_layer="data", from_layers=["featuremap1", "featuremap2"], input_height=Input_Height, input_width=Input_Width, **ssdparam) net.detection_out = L.DetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.roi_maps = L.RoiAlign(net.convf_mask, net.detection_out, roi_align_param=roi_align_param) net = KpsHeader(net,from_layer="roi_maps",out_layer="kps_maps",use_layers=kps_use_conv_layers,num_channels=channels_of_kps, \ all_kernel_size=kernel_size_of_kps,pad=pad_of_kps,use_deconv_layers=kps_use_deconv_layers,lr=lr,decay=decay) net.kps_flatten = L.Flatten(net.kps_maps, flatten_param=dict(axis=2, end_axis=-1)) net.kps_softmax = L.Softmax(net.kps_flatten, softmax_param=dict(axis=2)) net.kps_peaks = L.PeaksFind(net.kps_softmax, peaks_find_param=dict(height=Rh_Kps, width=Rw_Kps)) net = MaskHeader(net,from_layer="roi_maps",out_layer="mask_maps",use_layers=mask_use_conv_layers,num_channels=channels_of_mask, \ kernel_size=kernel_size_of_mask,pad=pad_of_mask,use_deconv_layers=mask_use_deconv_layers,lr=lr,decay=decay) net.mask_sigmoid = L.Sigmoid(net.mask_maps) net.mask_thre = L.Threshold(net.mask_sigmoid, threshold_param=dict(threshold=0.5)) net.vis = L.VisualMask(net.orig_data, net.detection_out, net.mask_thre, net.kps_peaks, **visual_mask_param) return net
def FaceBoxFPNNet(net, train=True, data_layer="data", gt_label="label", \ net_width=512, net_height=288): flag_handusefpn = False lr = 0 decay = 0 use_bn = False from_layer = data_layer num_channels = [32,64,128] k_sizes = [3,3,3] strides = [2,2,2] for i in xrange(len(num_channels)): add_layer = "conv{}".format(i+1) ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False, num_output=num_channels[i], kernel_size=k_sizes[i], pad=(k_sizes[i]-1)/2, stride=strides[i], use_scale=True, n_group=1, lr_mult=lr, decay_mult=decay) from_layer = add_layer # if not i == len(num_channels) - 1: # add_layer = "pool{}".format(i+1) # net[add_layer] = L.Pooling(net[from_layer], pool=P.Pooling.MAX, kernel_size=3, stride=2, pad=0) # from_layer = add_layer layer_cnt = len(num_channels) num_channels = [192,192,192,192] divide_scale = 4 f4_depth = len(num_channels) for i in xrange(len(num_channels)): n_chan = num_channels[i] add_layer = "conv{}_{}".format(layer_cnt+1,i + 1) net = InceptionOfficialLayer(net, from_layer, add_layer, channels_1=n_chan/divide_scale, channels_3=[n_chan/8, n_chan/4], channels_5=[n_chan/8, n_chan/4], channels_ave=n_chan/divide_scale, inter_bn=use_bn, leaky=False, lr=lr,decay=decay) from_layer = "conv{}_{}/incep".format(layer_cnt+1,i + 1) if flag_handusefpn: layer_cnt += 1 num_channels = [256,128,256,128,256] kernels = [3,1,3,1,3] strides = [2,1,1,1,1] f5_depth = len(num_channels) for i in xrange(len(num_channels)): add_layer = "conv{}_{}".format(layer_cnt+1,i + 1) ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False, num_output=num_channels[i], kernel_size=kernels[i], pad=kernels[i]/2, stride=strides[i], use_scale=True, n_group=1, lr_mult=lr, decay_mult=decay) from_layer = add_layer layer_cnt += 1 num_channels = [128,128,128,128,128] kernels = [3,3,3,3,3] strides = [2,1,1,1,1] f6_depth = len(num_channels) for i in xrange(len(num_channels)): add_layer = "conv{}_{}".format(layer_cnt+1,i + 1) ConvBNUnitLayer(net, from_layer, add_layer, use_bn=use_bn, use_relu=True, leaky=False, num_output=num_channels[i], kernel_size=kernels[i], pad=kernels[i]/2, stride=strides[i], use_scale=True, n_group=1, lr_mult=lr, decay_mult=decay) from_layer = add_layer # ########################################################################## # Use FPN # f3 -> c6_4 f3 = 'conv6_{}'.format(f6_depth) # f2: f3 -> deconv + c5_3 -> 1x1 out_layer_1 = f3 + '_deconv' net[out_layer_1]=L.Deconvolution(net[f3],**(getDecovArgs(256,lr,decay))) f2 = 'conv5_{}'.format(f5_depth) out_layer_2 = f2 + '_1x1' ConvBNUnitLayer(net, f2, out_layer_2, use_bn=False, use_relu=False, num_output=256, kernel_size=1, pad=0, stride=1, lr_mult=lr,decay_mult=decay) out_layer = 'feat5' net[out_layer] = L.Eltwise(net[out_layer_2], net[out_layer_1], eltwise_param=dict(operation=P.Eltwise.SUM)) net['feat5_relu'] = L.ReLU(net['feat5'], in_place=True) # f1: f2 -> deconv + c4_4 -> 1x1 out_layer_1 = out_layer + '_deconv' net[out_layer_1]=L.Deconvolution(net['feat5_relu'],**(getDecovArgs(192,lr,decay))) f1 = 'conv4_{}/incep'.format(f4_depth) out_layer_2 = f1 + '_1x1' ConvBNUnitLayer(net, f1, out_layer_2, use_bn=False, use_relu=False, num_output=192, kernel_size=1, pad=0, stride=1, lr_mult=lr,decay_mult=decay) out_layer = 'feat4' net[out_layer] = L.Eltwise(net[out_layer_2], net[out_layer_1], eltwise_param=dict(operation=P.Eltwise.SUM)) net['feat4_relu'] = L.ReLU(net['feat4'], in_place=True) from_layer = "feat4" add_layer = from_layer + "_deconv" net[add_layer] = L.Deconvolution(net[from_layer], **(getDecovArgs(64))) from_layer = add_layer add_layer = from_layer + "_relu" net[add_layer] = L.ReLU(net[from_layer], in_place=True) print net.keys() # make Loss & Detout for SSD2 mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers', []), \ num_classes=ssd_Param_2.get("num_classes", 2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios", []), \ prior_variance=ssd_Param_2.get("anchor_prior_variance", [0.1, 0.1, 0.2, 0.2]), \ flip=ssd_Param_2.get("anchor_flip", True), \ clip=ssd_Param_2.get("anchor_clip", True), \ normalizations=ssd_Param_2.get("interlayers_normalizations", []), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm", True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels", []), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss", False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes', False), \ stage=2) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization", P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'use_prior_for_matching': True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), 'size_threshold_max': ssd_Param_2.get("bboxloss_size_threshold_max", 2), 'flag_showdebug': ssd_Param_2.get("flag_showdebug", False), 'flag_forcematchallgt': ssd_Param_2.get("flag_forcematchallgt", False), 'flag_areamaxcheckinmatch': ssd_Param_2.get("flag_areamaxcheckinmatch", False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'share_location': True, 'use_prior_for_matching': True, 'background_label_id': 0, 'encode_variance_in_target': False, 'map_object_to_agnostic': False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes", 2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes': ssd_Param_2.get("num_classes", 2), 'target_labels': ssd_Param_2.get('detout_target_labels', []), 'alias_id': ssd_Param_2.get("alias_id", 0), 'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01), 'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45), 'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001), 'top_k': ssd_Param_2.get("detout_top_k", 30), 'share_location': True, 'code_type': P.PriorBox.CENTER_SIZE, 'background_label_id': 0, 'variance_encoded_in_target': False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels', []), 'num_classes': eval_Param.get("eval_num_classes", 2), 'evaluate_difficult_gt': eval_Param.get("eval_difficult_gt", False), 'boxsize_threshold': eval_Param.get("eval_boxsize_threshold", [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]), 'iou_threshold': eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]), 'background_label_id': 0, } net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def Face_eval(net, from_layer="data", label="label", lr=1, decay=1, visualize=False): # net =YoloNetPart(net,from_layer=from_layer,use_bn=True,use_layers=6,use_sub_layers=7,lr=lr,decay=decay) # net = YoloNetPartCompress(net, from_layer="data", use_bn=True, use_layers=5, use_sub_layers=5,strid_conv=[1,1,1,0,0],final_pool=False,lr=0.1, decay=0.1) # net = FaceNet(net, from_layer="data", use_bn=True) net = YoloNetPart(net, from_layer=from_layer, use_bn=True, use_layers=6, use_sub_layers=7, lr=0, decay=0) ConvBNUnitLayer(net, "conv2", "conv2_pool", use_bn=True, use_relu=True, num_output=64, kernel_size=1, pad=0, stride=2) net = UnifiedMultiScaleLayers(net, layers=["conv2_pool", "conv3_3", "conv4_3"], tags=["Down", "Down", "Ref"], unifiedlayer="featuremap11", dnsampleMethod=[["Conv"], ["MaxPool"]], dnsampleChannels=64) net = UnifiedMultiScaleLayers(net, layers=["conv4_3", "conv5_5"], tags=["Down", "Ref"], unifiedlayer="featuremap22", dnsampleMethod=[["MaxPool"]]) net = UnifiedMultiScaleLayers(net, layers=["conv5_5", "conv6_7"], tags=["Down", "Ref"], unifiedlayer="featuremap33", dnsampleMethod=[["MaxPool"]], pad=True) # mbox_layers = SSDHeader(net,data_layer="data",from_layers=["inception3","conv3_2","conv4_2"],input_height=Input_Height,input_width=Input_Width,**ssdparam) mbox_layers = SSDHeader( net, data_layer="data", from_layers=["featuremap11", "featuremap22", "featuremap33"], input_height=Input_Height, input_width=Input_Width, **ssdparam) reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] # elif bbox_loss_param.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: # sigmoid_name = "mbox_conf_sigmoid" # net[sigmoid_name] = L.Sigmoid(mbox_layers[1]) # mbox_layers[1] = net[sigmoid_name] # if visualize: # mbox_layers.append(net["data"]) # net.detection_out=L.DenseDetOut(*mbox_layers, # detection_output_param=det_out_param, # include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # net.detection_eval_accu = L.DetEval(net.detection_out,net.label,detection_evaluate_param=det_eval_param, # include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_out = L.DetOut( *mbox_layers, detection_output_param=det_out_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) net.detection_eval_accu = L.DetEval( net.detection_out, net.label, detection_evaluate_param=det_eval_param, include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def DAPNetVGGDark(net, train=True, data_layer="data", gt_label="label", \ net_width=512, net_height=288): # BaseNet flag_withparamname = True net = VGGDarkNet(net, data_layer="data", flag_withparamname=flag_withparamname) # Add Conv6 conv6_output = Conv6_Param.get('conv6_output', []) conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', []) out_layer = "pool5" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \ conv6_kernal_size=conv6_kernal_size, pre_name="conv6",start_pool=False,lr_mult=1, decay_mult=1.0,n_group=1) featuremap1 = ["conv2", "conv3_3"] tags = ["Down", "Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap1" UnifiedMultiScaleLayers(net, layers=featuremap1, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) # Concat FM2 featuremap2 = ["conv3_3", "conv4_5"] tags = ["Ref", "Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap2" UnifiedMultiScaleLayers(net, layers=featuremap2, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) # Concat FM3 c6_layer = 'conv6_{}'.format(len(Conv6_Param['conv6_output'])) featuremap3 = ["pool5", c6_layer] tags = ["Ref", "Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap3" UnifiedMultiScaleLayers(net, layers=featuremap3, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) # Create SSD Header for SSD1 lr_mult = 1 decay_mult = 1.0 mbox_1_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_1.get('feature_layers',[]), \ num_classes=ssd_Param_1.get("num_classes",2), \ boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_1.get("anchor_flip",True), \ clip=ssd_Param_1.get("anchor_clip",True), \ normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \ stage=1,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_1.get( "bboxloss_normalization", P.Loss.VALID)) mbox_1_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels', []), 'target_labels': ssd_Param_1.get('target_labels', []), 'num_classes': ssd_Param_1.get("num_classes", 2), 'alias_id': ssd_Param_1.get("alias_id", 0), 'loc_loss_type': ssd_Param_1.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_1.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC), 'loc_weight': ssd_Param_1.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_1.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_1.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_1.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_1.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_1.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_1.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_1.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_1.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_1.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_1.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'use_prior_for_matching': True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_1.get('flag_noperson', False), } net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels', []), 'target_labels': ssd_Param_1.get('target_labels', []), 'num_classes': ssd_Param_1.get("num_classes", 2), 'alias_id': ssd_Param_1.get("alias_id", 0), 'loc_loss_type': ssd_Param_1.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_1.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX), 'loc_weight': ssd_Param_1.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_1.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_1.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_1.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_1.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_1.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_1.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_1.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_1.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_1.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_1.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'share_location': True, 'use_prior_for_matching': True, 'background_label_id': 0, 'encode_variance_in_target': False, 'map_object_to_agnostic': False, } net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_1.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_1_conf_reshape" net[reshape_name] = L.Reshape(mbox_1_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)])) softmax_name = "mbox_1_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_1_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_1_layers[1] = net[flatten_name] elif ssd_Param_1.get( "bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_1_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1]) mbox_1_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes': ssd_Param_1.get("num_classes", 2), 'target_labels': ssd_Param_1.get('detout_target_labels', []), 'alias_id': ssd_Param_1.get("alias_id", 0), 'conf_threshold': ssd_Param_1.get("detout_conf_threshold", 0.01), 'nms_threshold': ssd_Param_1.get("detout_nms_threshold", 0.45), 'size_threshold': ssd_Param_1.get("detout_size_threshold", 0.0001), 'top_k': ssd_Param_1.get("detout_top_k", 30), 'share_location': True, 'code_type': P.PriorBox.CENTER_SIZE, 'background_label_id': 0, 'variance_encoded_in_target': False, } use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_1 = L.DetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # make Loss & Detout for SSD2 lr_mult = 1.0 decay_mult = 1.0 if use_ssd2_for_detection: mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers',[]), \ num_classes=ssd_Param_2.get("num_classes",2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_2.get("anchor_flip",True), \ clip=ssd_Param_2.get("anchor_clip",True), \ normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \ stage=2,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get( "bboxloss_normalization", P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'use_prior_for_matching': True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'share_location': True, 'use_prior_for_matching': True, 'background_label_id': 0, 'encode_variance_in_target': False, 'map_object_to_agnostic': False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get( "bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get( "bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes': ssd_Param_2.get("num_classes", 2), 'target_labels': ssd_Param_2.get('detout_target_labels', []), 'alias_id': ssd_Param_2.get("alias_id", 0), 'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01), 'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45), 'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001), 'top_k': ssd_Param_2.get("detout_top_k", 30), 'share_location': True, 'code_type': P.PriorBox.CENTER_SIZE, 'background_label_id': 0, 'variance_encoded_in_target': False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels', []), 'num_classes': eval_Param.get("eval_num_classes", 2), 'evaluate_difficult_gt': eval_Param.get("eval_difficult_gt", False), 'boxsize_threshold': eval_Param.get("eval_boxsize_threshold", [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]), 'iou_threshold': eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]), 'background_label_id': 0, } if use_ssd2_for_detection: det_out_layers = [] det_out_layers.append(net['detection_out_1']) det_out_layers.append(net['detection_out_2']) name = 'det_out' net[name] = L.Concat(*det_out_layers, axis=2) net.det_accu = L.DetEval(net[name], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def HandNet_DarkBase(net, train=True, data_layer="data", gt_label="label", \ net_width=512, net_height=288): use_bn = False lr_mult = 0 use_global_stats = None channels = ((32, ), (64, ), (128, 64, 128), (192, 96, 192, 96, 192)) strides = (True, True, True, False) kernels = ((3, ), (3, ), (3, 1, 3), (3, 1, 3, 1, 3)) pool_last = (False, False, False, True) net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer, channels=channels, strides=strides, use_bn=use_bn, kernels=kernels, freeze_layers=[], pool_last=pool_last, lr_mult=lr_mult, decay_mult=lr_mult, use_global_stats=use_global_stats) flag_with_deconv = True flag_eltwise = False from_layer = "conv4_5" if flag_with_deconv: Deconv(net, from_layer, num_output=64, group=1, kernel_size=2, stride=2, lr_mult=1.0, decay_mult=1.0, use_bn=True, use_scale=True, use_relu=False) print net.keys() if flag_eltwise: use_bn = True from_layer = "conv1" out_layer = 'conv2_hand' ConvBNUnitLayer(net, from_layer, out_layer, use_bn=use_bn, use_relu=False, num_output=64, kernel_size=3, pad=1, stride=2, use_scale=True, leaky=False, lr_mult=1, decay_mult=1) out_layer = "hand_multiscale" net[out_layer] = L.Eltwise(net["conv2_hand"], net["conv4_3_deconv"], eltwise_param=dict(operation=P.Eltwise.SUM)) from_layer = out_layer out_layer = from_layer + "_relu" net[out_layer] = L.ReLU(net[from_layer], in_place=True) # make Loss & Detout for SSD2 mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers',[]), \ num_classes=ssd_Param_2.get("num_classes",2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_2.get("anchor_flip",True), \ clip=ssd_Param_2.get("anchor_clip",True), \ normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \ stage=2) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get( "bboxloss_normalization", P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'use_prior_for_matching': True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), 'size_threshold_max': ssd_Param_2.get("bboxloss_size_threshold_max", 2), 'flag_showdebug': ssd_Param_2.get("flag_showdebug", False), 'flag_forcematchallgt': ssd_Param_2.get("flag_forcematchallgt", False), 'flag_areamaxcheckinmatch': ssd_Param_2.get("flag_areamaxcheckinmatch", False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'share_location': True, 'use_prior_for_matching': True, 'background_label_id': 0, 'encode_variance_in_target': False, 'map_object_to_agnostic': False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get( "bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes': ssd_Param_2.get("num_classes", 2), 'target_labels': ssd_Param_2.get('detout_target_labels', []), 'alias_id': ssd_Param_2.get("alias_id", 0), 'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01), 'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45), 'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001), 'top_k': ssd_Param_2.get("detout_top_k", 30), 'share_location': True, 'code_type': P.PriorBox.CENTER_SIZE, 'background_label_id': 0, 'variance_encoded_in_target': False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels', []), 'num_classes': eval_Param.get("eval_num_classes", 2), 'evaluate_difficult_gt': eval_Param.get("eval_difficult_gt", False), 'boxsize_threshold': eval_Param.get("eval_boxsize_threshold", [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]), 'iou_threshold': eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]), 'background_label_id': 0, } net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def DAP_HandNet(net, train=True, data_layer="data", gt_label="label", \ net_width=512, net_height=288): # BaseNet: Only contains conv1 & pool1 # lr_basenet =0 # use_sub_layers = ()# exmpty means only has conv1 and pooling # num_channels = () # output_channels = (0, ) # channel_scale = 4 # add_strs = "_recon" # net = ResidualVariant_Base_A(net, data_layer=data_layer, use_sub_layers=use_sub_layers, num_channels=num_channels, # output_channels=output_channels, channel_scale=channel_scale, lr=lr_basenet, decay=lr_basenet, # add_strs=add_strs) # Base of ZhangM net = HandBase(net, data_layer=data_layer, use_bn=True) # make Loss & Detout for SSD2 mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers',[]), \ num_classes=ssd_Param_2.get("num_classes",2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_2.get("anchor_flip",True), \ clip=ssd_Param_2.get("anchor_clip",True), \ normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \ stage=2) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get( "bboxloss_normalization", P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.LOGISTIC), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'use_prior_for_matching': True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), 'size_threshold_max': ssd_Param_2.get("bboxloss_size_threshold_max", 2), 'flag_showdebug': ssd_Param_2.get("flag_showdebug", False), 'flag_forcematchallgt': ssd_Param_2.get("flag_forcematchallgt", False), 'flag_areamaxcheckinmatch': ssd_Param_2.get("flag_areamaxcheckinmatch", False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels', []), 'target_labels': ssd_Param_2.get('target_labels', []), 'num_classes': ssd_Param_2.get("num_classes", 2), 'alias_id': ssd_Param_2.get("alias_id", 0), 'loc_loss_type': ssd_Param_2.get("bboxloss_loc_loss_type", P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type': ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX), 'loc_weight': ssd_Param_2.get("bboxloss_loc_weight", 1), 'conf_weight': ssd_Param_2.get("bboxloss_conf_weight", 1), 'overlap_threshold': ssd_Param_2.get("bboxloss_overlap_threshold", 0.5), 'neg_overlap': ssd_Param_2.get("bboxloss_neg_overlap", 0.5), 'size_threshold': ssd_Param_2.get("bboxloss_size_threshold", 0.0001), 'do_neg_mining': ssd_Param_2.get("bboxloss_do_neg_mining", True), 'neg_pos_ratio': ssd_Param_2.get("bboxloss_neg_pos_ratio", 3), 'using_focus_loss': ssd_Param_2.get("bboxloss_using_focus_loss", False), 'gama': ssd_Param_2.get("bboxloss_focus_gama", 2), 'use_difficult_gt': ssd_Param_2.get("bboxloss_use_difficult_gt", False), 'code_type': ssd_Param_2.get("bboxloss_code_type", P.PriorBox.CENTER_SIZE), 'match_type': P.MultiBoxLoss.PER_PREDICTION, 'share_location': True, 'use_prior_for_matching': True, 'background_label_id': 0, 'encode_variance_in_target': False, 'map_object_to_agnostic': False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get("bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get( "bboxloss_conf_loss_type", P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes': ssd_Param_2.get("num_classes", 2), 'target_labels': ssd_Param_2.get('detout_target_labels', []), 'alias_id': ssd_Param_2.get("alias_id", 0), 'conf_threshold': ssd_Param_2.get("detout_conf_threshold", 0.01), 'nms_threshold': ssd_Param_2.get("detout_nms_threshold", 0.45), 'size_threshold': ssd_Param_2.get("detout_size_threshold", 0.0001), 'top_k': ssd_Param_2.get("detout_top_k", 30), 'share_location': True, 'code_type': P.PriorBox.CENTER_SIZE, 'background_label_id': 0, 'variance_encoded_in_target': False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes', False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels', []), 'num_classes': eval_Param.get("eval_num_classes", 2), 'evaluate_difficult_gt': eval_Param.get("eval_difficult_gt", False), 'boxsize_threshold': eval_Param.get("eval_boxsize_threshold", [0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25]), 'iou_threshold': eval_Param.get("eval_iou_threshold", [0.9, 0.75, 0.5]), 'background_label_id': 0, } net.det_accu = L.DetEval(net['detection_out_2'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net
def DAPPoseNet(net, train=True, data_layer="data", gt_label="label",net_width=512, net_height=288): # BaseNet channels = ((32,), (32,), (64, 32, 128), (128, 64, 128, 64, 256), (256, 128, 256, 128, 256)) strides = (True, True, True, False, False) kernels = ((3,), (3,), (3, 1, 3), (3, 1, 3, 1, 3), (3, 1, 3, 1, 3)) pool_last = (False,False,False,True,True) net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer, channels=channels, strides=strides, kernels=kernels,freeze_layers=[], pool_last=pool_last,flag_withparamname=True,) net = VGG16_BaseNet_ChangeChannel(net, from_layer=data_layer + pose_string, channels=channels, strides=strides, kernels=kernels, freeze_layers=[], pool_last=pool_last, flag_withparamname=True, pose_string = pose_string) conv6_output = Conv6_Param.get('conv6_output', []) conv6_kernal_size = Conv6_Param.get('conv6_kernal_size', []) out_layer = "pool5" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \ conv6_kernal_size=conv6_kernal_size, pre_name="conv6", start_pool=False, lr_mult=1, decay_mult=1, n_group=1) # Create SSD Header for SSD1 lr_mult = 1 decay_mult = 1.0 mbox_1_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_1.get('feature_layers',[]), \ num_classes=ssd_Param_1.get("num_classes",2), \ boxsizes=ssd_Param_1.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_1.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_1.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_1.get("anchor_flip",True), \ clip=ssd_Param_1.get("anchor_clip",True), \ normalizations=ssd_Param_1.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_1.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_1.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_1.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_1.get('bboxloss_use_dense_boxes',False), \ stage=1,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_1.get("bboxloss_normalization",P.Loss.VALID)) mbox_1_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels',[]), 'target_labels': ssd_Param_1.get('target_labels',[]), 'num_classes':ssd_Param_1.get("num_classes",2), 'alias_id':ssd_Param_1.get("alias_id",0), 'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC), 'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_1.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'use_prior_for_matching':True, 'encode_variance_in_target': False, 'flag_noperson':ssd_Param_1.get('flag_noperson',False), } net["mbox_1_loss"] = L.DenseBBoxLoss(*mbox_1_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_1.get('gt_labels',[]), 'target_labels': ssd_Param_1.get('target_labels',[]), 'num_classes':ssd_Param_1.get("num_classes",2), 'alias_id':ssd_Param_1.get("alias_id",0), 'loc_loss_type':ssd_Param_1.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), 'loc_weight':ssd_Param_1.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_1.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_1.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_1.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_1.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_1.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_1.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_1.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_1.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_1.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_1.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'match_type':P.MultiBoxLoss.PER_PREDICTION, 'share_location':True, 'use_prior_for_matching':True, 'background_label_id':0, 'encode_variance_in_target': False, 'map_object_to_agnostic':False, } net["mbox_1_loss"] = L.BBoxLoss(*mbox_1_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_1_conf_reshape" net[reshape_name] = L.Reshape(mbox_1_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_1.get("num_classes",2)])) softmax_name = "mbox_1_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_1_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_1_layers[1] = net[flatten_name] elif ssd_Param_1.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_1_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_1_layers[1]) mbox_1_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes':ssd_Param_1.get("num_classes",2), 'target_labels': ssd_Param_1.get('detout_target_labels',[]), 'alias_id':ssd_Param_1.get("alias_id",0), 'conf_threshold':ssd_Param_1.get("detout_conf_threshold",0.01), 'nms_threshold':ssd_Param_1.get("detout_nms_threshold",0.45), 'size_threshold':ssd_Param_1.get("detout_size_threshold",0.0001), 'top_k':ssd_Param_1.get("detout_top_k",30), 'share_location':True, 'code_type':P.PriorBox.CENTER_SIZE, 'background_label_id':0, 'variance_encoded_in_target':False, } use_dense_boxes = ssd_Param_1.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: net.detection_out_1 = L.DenseDetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_1 = L.DetOut(*mbox_1_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # make Loss & Detout for SSD2 lr_mult = 1.0 decay_mult = 1.0 if use_ssd2_for_detection: mbox_2_layers = SsdDetectorHeaders(net, \ net_width=net_width, net_height=net_height, data_layer=data_layer, \ from_layers=ssd_Param_2.get('feature_layers',[]), \ num_classes=ssd_Param_2.get("num_classes",2), \ boxsizes=ssd_Param_2.get("anchor_boxsizes", []), \ aspect_ratios=ssd_Param_2.get("anchor_aspect_ratios",[]), \ prior_variance = ssd_Param_2.get("anchor_prior_variance",[0.1,0.1,0.2,0.2]), \ flip=ssd_Param_2.get("anchor_flip",True), \ clip=ssd_Param_2.get("anchor_clip",True), \ normalizations=ssd_Param_2.get("interlayers_normalizations",[]), \ use_batchnorm=ssd_Param_2.get("interlayers_use_batchnorm",True), \ inter_layer_channels=ssd_Param_2.get("interlayers_channels_kernels",[]), \ use_focus_loss=ssd_Param_2.get("bboxloss_using_focus_loss",False), \ use_dense_boxes=ssd_Param_2.get('bboxloss_use_dense_boxes',False), \ stage=2,lr_mult=lr_mult, decay_mult=decay_mult) # make Loss or Detout for SSD1 if train: loss_param = get_loss_param(normalization=ssd_Param_2.get("bboxloss_normalization",P.Loss.VALID)) mbox_2_layers.append(net[gt_label]) use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels',[]), 'target_labels': ssd_Param_2.get('target_labels',[]), 'num_classes':ssd_Param_2.get("num_classes",2), 'alias_id':ssd_Param_2.get("alias_id",0), 'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.LOGISTIC), 'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_2.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'use_prior_for_matching':True, 'encode_variance_in_target': False, 'flag_noperson': ssd_Param_2.get('flag_noperson', False), } net["mbox_2_loss"] = L.DenseBBoxLoss(*mbox_2_layers, dense_bbox_loss_param=bboxloss_param, \ loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: bboxloss_param = { 'gt_labels': ssd_Param_2.get('gt_labels',[]), 'target_labels': ssd_Param_2.get('target_labels',[]), 'num_classes':ssd_Param_2.get("num_classes",2), 'alias_id':ssd_Param_2.get("alias_id",0), 'loc_loss_type':ssd_Param_2.get("bboxloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), 'loc_weight':ssd_Param_2.get("bboxloss_loc_weight",1), 'conf_weight':ssd_Param_2.get("bboxloss_conf_weight",1), 'overlap_threshold':ssd_Param_2.get("bboxloss_overlap_threshold",0.5), 'neg_overlap':ssd_Param_2.get("bboxloss_neg_overlap",0.5), 'size_threshold':ssd_Param_2.get("bboxloss_size_threshold",0.0001), 'do_neg_mining':ssd_Param_2.get("bboxloss_do_neg_mining",True), 'neg_pos_ratio':ssd_Param_2.get("bboxloss_neg_pos_ratio",3), 'using_focus_loss':ssd_Param_2.get("bboxloss_using_focus_loss",False), 'gama':ssd_Param_2.get("bboxloss_focus_gama",2), 'use_difficult_gt':ssd_Param_2.get("bboxloss_use_difficult_gt",False), 'code_type':ssd_Param_2.get("bboxloss_code_type",P.PriorBox.CENTER_SIZE), 'match_type':P.MultiBoxLoss.PER_PREDICTION, 'share_location':True, 'use_prior_for_matching':True, 'background_label_id':0, 'encode_variance_in_target': False, 'map_object_to_agnostic':False, } net["mbox_2_loss"] = L.BBoxLoss(*mbox_2_layers, bbox_loss_param=bboxloss_param, \ loss_param=loss_param,include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_2_conf_reshape" net[reshape_name] = L.Reshape(mbox_2_layers[1], \ shape=dict(dim=[0, -1, ssd_Param_2.get("num_classes",2)])) softmax_name = "mbox_2_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_2_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_2_layers[1] = net[flatten_name] elif ssd_Param_2.get("bboxloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_2_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_2_layers[1]) mbox_2_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes':ssd_Param_2.get("num_classes",2), 'target_labels': ssd_Param_2.get('detout_target_labels',[]), 'alias_id':ssd_Param_2.get("alias_id",0), 'conf_threshold':ssd_Param_2.get("detout_conf_threshold",0.01), 'nms_threshold':ssd_Param_2.get("detout_nms_threshold",0.45), 'size_threshold':ssd_Param_2.get("detout_size_threshold",0.0001), 'top_k':ssd_Param_2.get("detout_top_k",30), 'share_location':True, 'code_type':P.PriorBox.CENTER_SIZE, 'background_label_id':0, 'variance_encoded_in_target':False, } use_dense_boxes = ssd_Param_2.get('bboxloss_use_dense_boxes',False) if use_dense_boxes: net.detection_out_2 = L.DenseDetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.detection_out_2 = L.DetOut(*mbox_2_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # EVAL in TEST MODE if not train: det_eval_param = { 'gt_labels': eval_Param.get('eval_gt_labels',[]), 'num_classes':eval_Param.get("eval_num_classes",2), 'evaluate_difficult_gt':eval_Param.get("eval_difficult_gt",False), 'boxsize_threshold':eval_Param.get("eval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), 'iou_threshold':eval_Param.get("eval_iou_threshold",[0.9,0.75,0.5]), 'background_label_id':0, } if use_ssd2_for_detection: det_out_layers = [] det_out_layers.append(net['detection_out_1']) det_out_layers.append(net['detection_out_2']) name = 'det_out' net[name] = L.Concat(*det_out_layers, axis=2) net.det_accu = L.DetEval(net[name], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) else: net.det_accu = L.DetEval(net['detection_out_1'], net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) if train: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp = \ L.Slice(net["label"+pose_string], ntop=4, slice_param=dict(slice_point=[34, 52, 86], axis=1)) else: net.vec_mask, net.heat_mask, net.vec_temp, net.heat_temp, net.gt = \ L.Slice(net["label"+pose_string], ntop=5, slice_param=dict(slice_point=[34, 52, 86, 104], axis=1)) # label net.vec_label = L.Eltwise(net.vec_mask, net.vec_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_label = L.Eltwise(net.heat_mask, net.heat_temp, eltwise_param=dict(operation=P.Eltwise.PROD)) ###pose pose_test_kwargs={ # nms 'nms_threshold': 0.05, 'nms_max_peaks': 500, 'nms_num_parts': 18, # connect 'conn_is_type_coco': True, 'conn_max_person': 10, 'conn_max_peaks_use': 20, 'conn_iters_pa_cal': 10, 'conn_connect_inter_threshold': 0.05, 'conn_connect_inter_min_nums': 8, 'conn_connect_min_subset_cnt': 3, 'conn_connect_min_subset_score': 0.4, # visual 'eval_area_thre': 64*64, 'eval_oks_thre': [0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9], } bn_kwargs = { 'param': [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)], 'eps': 0.001, } sb_kwargs = { 'bias_term': True, 'param': [dict(lr_mult=1, decay_mult=0), dict(lr_mult=1, decay_mult=0)], 'filler': dict(type='constant', value=1.0), 'bias_filler': dict(type='constant', value=0.2), } deconv_param = { 'num_output': 128, 'kernel_size': 2, 'pad': 0, 'stride': 2, 'weight_filler': dict(type='gaussian', std=0.01), 'bias_filler': dict(type='constant', value=0), 'group': 1, } kwargs_deconv = { 'param': [dict(lr_mult=1, decay_mult=1)], 'convolution_param': deconv_param } from_layer = "conv5_5" + pose_string add_layer = from_layer + "_deconv" net[add_layer] = L.Deconvolution(net[from_layer], **kwargs_deconv) bn_name = add_layer + '_bn' net[bn_name] = L.BatchNorm(net[add_layer], in_place=True, **bn_kwargs) sb_name = add_layer + '_scale' net[sb_name] = L.Scale(net[add_layer], in_place=True, **sb_kwargs) relu_name = add_layer + '_relu' net[relu_name] = L.ReLU(net[add_layer], in_place=True) baselayer = add_layer use_stage = 3 use_3_layers = 5 use_1_layers = 0 n_channel = 64 lrdecay = 1.0 kernel_size = 3 flag_output_sigmoid = False for stage in xrange(use_stage): if stage == 0: from_layer = baselayer else: from_layer = "concat_stage{}".format(stage) outlayer = "concat_stage{}".format(stage + 1) if stage == use_stage - 1: short_cut = False else: short_cut = True net = mPose_StageX_Train(net, from_layer=from_layer, out_layer=outlayer, stage=stage + 1, mask_vec="vec_mask", mask_heat="heat_mask", \ label_vec="vec_label", label_heat="heat_label", \ use_3_layers=use_3_layers, use_1_layers=use_1_layers, short_cut=short_cut, \ base_layer=baselayer, lr=0.1, decay=lrdecay, num_channels=n_channel, kernel_size=kernel_size, flag_sigmoid=flag_output_sigmoid) # for Test if not train: if flag_output_sigmoid: conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) + "_sig" conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) + "_sig" else: conv_vec = "stage{}_conv{}_vec".format(use_stage, use_3_layers + use_1_layers) conv_heat = "stage{}_conv{}_heat".format(use_stage, use_3_layers + use_1_layers) net.vec_out = L.Eltwise(net.vec_mask, net[conv_vec], eltwise_param=dict(operation=P.Eltwise.PROD)) net.heat_out = L.Eltwise(net.heat_mask, net[conv_heat], eltwise_param=dict(operation=P.Eltwise.PROD)) feaLayers = [] feaLayers.append(net.heat_out) feaLayers.append(net.vec_out) outlayer = "concat_stage{}".format(3) net[outlayer] = L.Concat(*feaLayers, axis=1) # Resize resize_kwargs = { 'factor': pose_test_kwargs.get("resize_factor", 8), 'scale_gap': pose_test_kwargs.get("resize_scale_gap", 0.3), 'start_scale': pose_test_kwargs.get("resize_start_scale", 1.0), } net.resized_map = L.ImResize(net[outlayer], name="resize", imresize_param=resize_kwargs) # Nms nms_kwargs = { 'threshold': pose_test_kwargs.get("nms_threshold", 0.05), 'max_peaks': pose_test_kwargs.get("nms_max_peaks", 100), 'num_parts': pose_test_kwargs.get("nms_num_parts", 18), } net.joints = L.Nms(net.resized_map, name="nms", nms_param=nms_kwargs) # ConnectLimbs connect_kwargs = { 'is_type_coco': pose_test_kwargs.get("conn_is_type_coco", True), 'max_person': pose_test_kwargs.get("conn_max_person", 10), 'max_peaks_use': pose_test_kwargs.get("conn_max_peaks_use", 20), 'iters_pa_cal': pose_test_kwargs.get("conn_iters_pa_cal", 10), 'connect_inter_threshold': pose_test_kwargs.get("conn_connect_inter_threshold", 0.05), 'connect_inter_min_nums': pose_test_kwargs.get("conn_connect_inter_min_nums", 8), 'connect_min_subset_cnt': pose_test_kwargs.get("conn_connect_min_subset_cnt", 3), 'connect_min_subset_score': pose_test_kwargs.get("conn_connect_min_subset_score", 0.4), } net.limbs = L.Connectlimb(net.resized_map, net.joints, connect_limb_param=connect_kwargs) # Eval eval_kwargs = { 'stride': 8, 'area_thre': pose_test_kwargs.get("eval_area_thre", 64 * 64), 'oks_thre': pose_test_kwargs.get("eval_oks_thre", [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]), } net.eval = L.PoseEval(net.limbs, net.gt, pose_eval_param=eval_kwargs) return net
def SsdDetector(net, train=True, data_layer="data", gt_label="label", \ net_width=512, net_height=288, \ eval_enable=True, **ssdparam): conv6_output = ssdparam.get("multilayers_conv6_output",[]) conv6_kernal_size = ssdparam.get("multilayers_conv6_kernal_size",[]) use_sub_layers = (6, 7) num_channels = (144, 288) output_channels = (128, 0) channel_scale = 4 add_strs = "_recon" net = ResidualVariant_Base_A(net, data_layer=data_layer, use_sub_layers=use_sub_layers, num_channels=num_channels, output_channels=output_channels,channel_scale=channel_scale,lr=0.1, decay=0.1, add_strs=add_strs,) # Conv6 out_layer = "conv3_7_recon_relu" net = addconv6(net, from_layer=out_layer, use_bn=True, conv6_output=conv6_output, \ conv6_kernal_size=conv6_kernal_size, pre_name="conv6",start_pool=True,lr_mult=1, decay_mult=1,n_group=1) # Concat FM1 feature_layers = [] featuremap1 = ["pool1_recon","conv2_6_recon_relu"] tags = ["Down","Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap1" UnifiedMultiScaleLayers(net,layers=featuremap1, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) feature_layers.append(out_layer) # Concat FM2 featuremap2 = ["conv2_6_recon_relu","conv3_7_recon_relu"] tags = ["Down","Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap2" UnifiedMultiScaleLayers(net,layers=featuremap2, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) feature_layers.append(out_layer) # Concat FM3 featuremap3 = ["conv3_7_recon_relu","conv6_6"] tags = ["Down","Ref"] down_methods = [["MaxPool"]] out_layer = "featuremap3" UnifiedMultiScaleLayers(net,layers=featuremap3, tags=tags, unifiedlayer=out_layer, dnsampleMethod=down_methods) feature_layers.append(out_layer) # Create SSD Header mbox_layers = SsdDetectorHeaders(net, \ boxsizes=ssdparam.get("multilayers_boxsizes", []), \ net_width=net_width, \ net_height=net_height, \ data_layer=data_layer, \ num_classes=ssdparam.get("num_classes",2), \ from_layers=feature_layers, \ use_batchnorm=ssdparam.get("multilayers_use_batchnorm",True), \ prior_variance = ssdparam.get("multilayers_prior_variance",[0.1,0.1,0.2,0.2]), \ normalizations=ssdparam.get("multilayers_normalizations",[]), \ aspect_ratios=ssdparam.get("multilayers_aspect_ratios",[]), \ flip=ssdparam.get("multilayers_flip",True), \ clip=ssdparam.get("multilayers_clip",True), \ inter_layer_channels=ssdparam.get("multilayers_inter_layer_channels",[]), \ kernel_size=ssdparam.get("multilayers_kernel_size",3), \ pad=ssdparam.get("multilayers_pad",1), use_focus_loss=ssdparam.get("multiloss_using_focus_loss",False)) # Loss & Det-eval if train: loss_param = get_loss_param(normalization=ssdparam.get("multiloss_normalization",P.Loss.VALID)) mbox_layers.append(net[gt_label]) bboxloss_param = { 'loc_loss_type':ssdparam.get("multiloss_loc_loss_type",P.MultiBoxLoss.SMOOTH_L1), 'conf_loss_type':ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX), 'loc_weight':ssdparam.get("multiloss_loc_weight",1), 'conf_weight':ssdparam.get("multiloss_conf_weight",1), 'num_classes':ssdparam.get("num_classes",2), 'share_location':ssdparam.get("multiloss_share_location",True), 'match_type':ssdparam.get("multiloss_match_type",P.MultiBoxLoss.PER_PREDICTION), 'overlap_threshold':ssdparam.get("multiloss_overlap_threshold",0.5), 'use_prior_for_matching':True, 'background_label_id':0, 'use_difficult_gt':ssdparam.get("multiloss_use_difficult_gt",False), 'do_neg_mining':ssdparam.get("multiloss_do_neg_mining",True), 'neg_pos_ratio':ssdparam.get("multiloss_neg_pos_ratio",3), 'neg_overlap':ssdparam.get("multiloss_neg_overlap",0.5), 'code_type':ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), 'encode_variance_in_target': False, 'map_object_to_agnostic':False, 'size_threshold':ssdparam.get("multiloss_size_threshold",0.0001), 'alias_id':ssdparam.get("multiloss_alias_id",0), 'using_focus_loss':ssdparam.get("multiloss_using_focus_loss",False), 'gama':ssdparam.get("multiloss_focus_gama",2), 'loc_class':1, } net["mbox_loss"] = L.BBoxLoss(*mbox_layers, \ bbox_loss_param=bboxloss_param, \ loss_param=loss_param, \ include=dict(phase=caffe_pb2.Phase.Value('TRAIN')), \ propagate_down=[True, True, False, False]) else: if ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.SOFTMAX: reshape_name = "mbox_conf_reshape" net[reshape_name] = L.Reshape(mbox_layers[1], \ shape=dict(dim=[0, -1, ssdparam.get("num_classes",2)])) softmax_name = "mbox_conf_softmax" net[softmax_name] = L.Softmax(net[reshape_name], axis=2) flatten_name = "mbox_conf_flatten" net[flatten_name] = L.Flatten(net[softmax_name], axis=1) mbox_layers[1] = net[flatten_name] elif ssdparam.get("multiloss_conf_loss_type",P.MultiBoxLoss.SOFTMAX) == P.MultiBoxLoss.LOGISTIC: sigmoid_name = "mbox_conf_sigmoid" net[sigmoid_name] = L.Sigmoid(mbox_layers[1]) mbox_layers[1] = net[sigmoid_name] else: raise ValueError("Unknown conf loss type.") # Det-out param det_out_param = { 'num_classes':ssdparam.get("num_classes",2), 'share_location':ssdparam.get("multiloss_share_location",True), 'background_label_id':0, 'code_type':ssdparam.get("multiloss_code_type",P.PriorBox.CENTER_SIZE), 'variance_encoded_in_target':False, 'conf_threshold':ssdparam.get("detectionout_conf_threshold",0.01), 'nms_threshold':ssdparam.get("detectionout_nms_threshold",0.45), 'size_threshold':ssdparam.get("detectionout_size_threshold",0.001), 'top_k':ssdparam.get("detectionout_top_k",200), 'alias_id':ssdparam.get("multiloss_alias_id",0),, } net.detection_out = L.DetOut(*mbox_layers, \ detection_output_param=det_out_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) # Det-eval det_eval_param = { 'num_classes':ssdparam.get("num_classes",2), 'background_label_id':0, 'evaluate_difficult_gt':ssdparam.get("detectioneval_evaluate_difficult_gt",False), 'boxsize_threshold':ssdparam.get("detectioneval_boxsize_threshold",[0,0.01,0.05,0.1,0.15,0.2,0.25]), 'iou_threshold':ssdparam.get("detectioneval_iou_threshold",[0.9,0.75,0.5]), } net.det_accu = L.DetEval(net.detection_out, net[gt_label], \ detection_evaluate_param=det_eval_param, \ include=dict(phase=caffe_pb2.Phase.Value('TEST'))) return net