def generate(self): conf = self n = caffe.NetSpec() param = LT.learned_param if conf.train else LT.frozen_param if conf.train: n.data = L.Python(top=["im_info", 'gt_boxes'], python_param=dict(module='roi_data_layer.layer', layer='RoIDataLayer', param_str="num_classes: " + str(conf.num_classes))) else: n.data, n.im_info = LT.input() conv15_param = LT.learned_param if ( conf.conv_1_to_5_learn) else LT.frozen_param LT.conv1_to_5(n, conv15_param) n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors( n, self, param) n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score, [0, 2, -1, 0]) if conf.train: n.rpn_labels = L.Python( bottom=["rpn_cls_score", "gt_boxes", "im_info", "data"], top=[ 'rpn_bbox_targets', "rpn_bbox_inside_weights", "rpn_bbox_outside_weights" ], python_param=dict(module='rpn.anchor_target_layer', layer='AnchorTargetLayer', param_str=LT.anchor_params( self.anchor_feat_stride, self.anchor_scales, self.anchor_ratios))) n.loss_cls = LT.soft_max_with_loss( ["rpn_cls_score_reshape", "rpn_labels"]) n.loss_bbox = L.SmoothL1Loss(bottom=[ "rpn_bbox_pred", "rpn_bbox_targets", "rpn_bbox_inside_weights", "rpn_bbox_outside_weights" ], loss_weight=1) # dummy RCNN layers n.dummy_roi_pool_conv_5 = L.DummyData(dummy_data_param=dict( shape=dict(dim=[1, 9216]), data_filler=LT.WEIGHT_FILLER)) n.fc6, n.relu6 = LT.fc_relu(n.dummy_roi_pool_conv_5, 4096, param=LT.frozen_param) n.fc7 = L.InnerProduct(n.fc6, num_output=4096, param=LT.frozen_param) n.silence_fc7 = LT.silence(n.fc7) else: n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal( n, self) return self.save(n)
def resnet_mask_end2end(self): channals = self.channals if not self.deploy: data, im_info, gt_boxes, ins = \ data_layer_train_with_ins(self.net, self.classes, with_rpn=True) else: data, im_info = data_layer_test(self.net) gt_boxes = None conv1 = conv_factory(self.net, "conv1", data, 7, channals, 2, 3, bias_term=True) pool1 = pooling_layer(self.net, 3, 2, 'MAX', 'pool1', conv1) index = 1 out = pool1 if self.module == "normal": residual_block = residual_block else: residual_block = residual_block_basic for i in self.stages[:-1]: index += 1 for j in range(i): if j == 0: if index == 2: stride = 1 else: stride = 2 out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals, stride) else: out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals) channals *= 2 if not self.deploy: rpn_cls_loss, rpn_loss_bbox, rpn_cls_score_reshape, rpn_bbox_pred = rpn(self.net, out, gt_boxes, im_info, data, fixed=False) rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights, mask_roi, masks = \ roi_proposals(self.net, rpn_cls_score_reshape, rpn_bbox_pred, im_info, gt_boxes) self.net["rois_cat"] = L.Concat(rois,mask_roi, name="rois_cat", axis=0) rois=self.net["rois_cat"] else: rpn_cls_score_reshape, rpn_bbox_pred = rpn(self.net, out, gt_boxes, im_info, data) rois, scores = roi_proposals(self.net, rpn_cls_score_reshape, rpn_bbox_pred, im_info, gt_boxes) feat_out = out feat_aligned = roi_align(self.net, "det_mask", feat_out, rois) # if not self.deploy: # self.net["silence_mask_rois"] = L.Silence(mask_rois, ntop=0) # if not self.deploy: # mask_feat_aligned = self.roi_align("mask", feat_out, mask_rois) # else: # mask_feat_aligned = self.roi_align("mask", feat_out, rois) out = feat_aligned index += 1 for j in range(self.stages[-1]): if j == 0: stride = 1 out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals, stride) else: out = residual_block(self.net, "res" + str(index) + ascii_lowercase[j], out, channals) if not self.deploy: self.net["det_feat"], self.net["mask_feat"] = L.Slice(self.net, out, ntop=2, name='slice', slice_param=dict(slice_dim=0, slice_point=self.rois_num)) feat_mask = self.net["mask_feat"] out = self.net["det_feat"] # for bbox detection pool5 = ave_pool(self.net, 7, 1, "pool5", out) cls_score, bbox_pred = final_cls_bbox(self.net, pool5) if not self.deploy: self.net["loss_cls"] = L.SoftmaxWithLoss(cls_score, labels, loss_weight=1, propagate_down=[1, 0]) self.net["loss_bbox"] = L.SmoothL1Loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, \ loss_weight=1) else: self.net["cls_prob"] = L.Softmax(cls_score) # # for mask prediction if not self.deploy: mask_feat_aligned = feat_mask else: mask_feat_aligned = out # out = mask_feat_aligned out = L.Deconvolution(mask_feat_aligned, name = "mask_deconv1",convolution_param=dict(kernel_size=2, stride=2, num_output=256, pad=0, bias_term=False, weight_filler=dict(type='msra'))) out = L.BatchNorm(out, name="bn_mask_deconv1",in_place=True, batch_norm_param=dict(use_global_stats=self.deploy)) out = L.Scale(out, name = "scale_mask_deconv1", in_place=True, scale_param=dict(bias_term=True)) out = L.ReLU(out, name="mask_deconv1_relu", in_place=True) mask_out = conv_factory(self.net, "mask_out", out, 1, self.classes-1, 1, 0, bias_term=True) # for i in range(4): # out = self.conv_factory("mask_conv"+str(i), out, 3, 256, 1, 1, bias_term=False) # mask_out = self.conv_factory("mask_out", out, 1, 1, 1, 0, bias_term=False) if not self.deploy: self.net["loss_mask"] = L.SigmoidCrossEntropyLoss(mask_out, masks, loss_weight=1, propagate_down=[1, 0], loss_param=dict( normalization=1, ignore_label = -1 )) else: self.net["mask_prob"] = L.Sigmoid(mask_out) return self.net.to_proto()
def generate(self): """Returns a NetSpec specifying CaffeNet, following the original proto text specification (./models/bvlc_reference_caffenet/train_val.prototxt).""" conf = self n = caffe.NetSpec() param = LT.learned_param if conf.train else LT.frozen_param if self.train: n.data = L.Python(top=[ "rois", 'labels', 'bbox_targets', 'bbox_inside_weights', 'bbox_outside_weights' ], python_param=dict(module='roi_data_layer.layer', layer='RoIDataLayer', param_str="num_classes: " + str(conf.num_classes))) else: n.data, n.im_info = LT.input() conv15_param = LT.learned_param if ( conf.conv_1_to_5_learn) else LT.frozen_param LT.conv1_to_5(n, conv15_param) if not (self.train): n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors( n, self, param) n.rpn_cls_score_reshape = LT.reshape(n.rpn_cls_score, [0, 2, -1, 0]) n.rpn_cls_prob, n.rpn_cls_prob_reshape, n.rois = LT.roi_proposal( n, self) n.roi_pool = L.ROIPooling(bottom=["conv5", "rois"], pooled_w=6, pooled_h=6, spatial_scale=0.0625) n.fc6, n.relu6 = LT.fc_relu(n.roi_pool, 4096, param=param) n.drop6 = fc7input = L.Dropout(n.relu6, in_place=True, dropout_ratio=0.5, scale_train=False) n.fc7, n.relu7 = LT.fc_relu(fc7input, 4096, param=param) n.drop7 = layer7 = L.Dropout(n.relu7, in_place=True, dropout_ratio=0.5, scale_train=False) weight_filler = (LT.WEIGHT_FILLER if conf.train else dict()) bias_filler = (LT.BIAS_FILLER if conf.train else dict()) n.cls_score = L.InnerProduct(layer7, num_output=conf.num_classes, weight_filler=weight_filler, bias_filler=bias_filler, param=LT.learned_param) n.bbox_pred = L.InnerProduct(layer7, num_output=conf.num_classes * 4, weight_filler=weight_filler, bias_filler=bias_filler, param=LT.learned_param) if conf.train: n.loss_cls = LT.soft_max_with_loss(["cls_score", "labels"]) n.loss_bbox = L.SmoothL1Loss(bottom=[ "bbox_pred", "bbox_targets", "bbox_inside_weights", "bbox_outside_weights" ], loss_weight=1) else: n.cls_prob = L.Softmax(n.cls_score, loss_param=dict(ignore_label=-1, normalize=True)) if self.train: n.rpn_conv1, n.rpn_relu1, n.rpn_cls_score, n.rpn_bbox_pred = LT.rpn_class_and_bbox_predictors( n, self, LT.frozen_param) n.silence_rpn_cls_score = LT.silence(n.rpn_cls_score) n.silence_rpn_bbox_pred = LT.silence(n.rpn_bbox_pred) # write the net to a temporary file and return its filename return self.save(n)
def rpn(net, bottom, gt_boxes, im_info, data, anchors, feat_stride, scales, fixed=False, deploy=False): if not fixed: net["rpn_conv/3x3"] = L.Convolution(bottom, kernel_size=3, stride=1, num_output=512, pad=1, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) else: net["rpn_conv/3x3"] = L.Convolution(bottom, kernel_size=3, stride=1, num_output=512, pad=1, param=[{ 'lr_mult': 0 }, { 'lr_mult': 0 }], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) net["rpn_relu/3x3"] = L.ReLU(net["rpn_conv/3x3"], in_place=True) if not fixed: net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"], kernel_size=1, stride=1, num_output=2 * anchors, pad=0, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }], weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"], kernel_size=1, stride=1, num_output=4 * anchors, pad=0, param=[{ 'lr_mult': 1 }, { 'lr_mult': 2 }], weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) else: net["rpn_cls_score"] = L.Convolution(net["rpn_relu/3x3"], kernel_size=1, stride=1, num_output=2 * anchors, pad=0, param=[{ 'lr_mult': 0 }, { 'lr_mult': 0 }], weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) net["rpn_bbox_pred"] = L.Convolution(net["rpn_relu/3x3"], kernel_size=1, stride=1, num_output=4 * anchors, pad=0, param=[{ 'lr_mult': 0 }, { 'lr_mult': 0 }], weight_filler=dict( type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0), engine=2) net["rpn_cls_score_reshape"] = L.Reshape( net["rpn_cls_score"], reshape_param={"shape": { "dim": [0, 2, -1, 0] }}) if (not deploy) and (not fixed): net["rpn_labels"], net["rpn_bbox_targets"], net["rpn_bbox_inside_weights"], net[ "rpn_bbox_outside_weights"] = \ L.Python(net["rpn_cls_score"], gt_boxes, im_info, data, name='rpn-data', python_param=dict( module='rpn.anchor_target_layer', layer='AnchorTargetLayer', param_str='{"feat_stride": %s,"scales": %s}' % (feat_stride, scales)), # param_str='"feat_stride": %s \n "scales": !!python/tuple %s ' %(feat_stride, scales)), ntop=4, ) net["rpn_cls_loss"] = L.SoftmaxWithLoss(net["rpn_cls_score_reshape"], net["rpn_labels"], name="rpn_loss_cls", propagate_down=[1, 0], \ loss_weight=1, loss_param={"ignore_label": -1, "normalize": True}) net["rpn_loss_bbox"] = L.SmoothL1Loss(net["rpn_bbox_pred"], net["rpn_bbox_targets"], \ net["rpn_bbox_inside_weights"], net["rpn_bbox_outside_weights"], \ name="loss_bbox", loss_weight=1, smooth_l1_loss_param={"sigma": 3.0}) return net["rpn_cls_loss"], net["rpn_loss_bbox"], net[ "rpn_cls_score_reshape"], net["rpn_bbox_pred"] else: return net["rpn_cls_score_reshape"], net["rpn_bbox_pred"]
def add_train_rfcn_layers(net, split_to_rpn_layer, end_body_layer=None, prefix=""): # ensure no same output kwargs = { 'param': [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], 'weight_filler': dict(type='msra'), 'bias_filler': dict(type='constant', value=0.0) } proposal_param = { 'ratio': cfg.ANCHOR_GENERATOR.RATIOS, 'scale': cfg.ANCHOR_GENERATOR.SCALES, 'base_size': cfg.FEAT_STRIDE, 'feat_stride': cfg.FEAT_STRIDE, 'pre_nms_topn': cfg.TRAIN.RPN_PRE_NMS_TOP_N, 'post_nms_topn': cfg.TRAIN.RPN_POST_NMS_TOP_N, 'nms_thresh': cfg.TRAIN.RPN_NMS_THRESH, 'min_size': cfg.TRAIN.DATA_AUG.MIN_SIZE } net[prefix + 'rpn_output'] = L.Convolution(split_to_rpn_layer, num_output=256, pad=1, kernel_size=3, stride=1, **kwargs) net[prefix + 'rpn_output_relu'] = L.ReLU(net[prefix + 'rpn_output'], in_place=True) net[prefix + 'rpn_cls_score'] = L.Convolution(net[prefix + 'rpn_output_relu'], num_output=anchors_num * 2, pad=0, kernel_size=1, stride=1, **kwargs) net[prefix + 'rpn_bbox_pred'] = L.Convolution(net[prefix + 'rpn_output_relu'], num_output=anchors_num * 4, pad=0, kernel_size=1, stride=1, **kwargs) net[prefix + 'rpn_cls_score_reshape'] = L.Reshape( net[prefix + 'rpn_cls_score'], reshape_param={'shape': { 'dim': [0, 2, -1, 0] }}) net[prefix + 'rpn_labels'], net[prefix + 'rpn_bbox_targets'], net[ prefix + 'rpn_bbox_inside_weights'], net[prefix + 'rpn_bbox_outside_weights'] = L.Python( net[prefix + 'rpn_cls_score'], net[prefix + 'gt_boxes'], net['im_info'], net['data'], name=prefix + "rpn-data", python_param={ 'module': "rpn.anchor_target_layer", 'layer': "AnchorTargetLayer" }, ntop=4) net[prefix + 'rpn_loss_cls'] = L.SoftmaxWithLoss( net[prefix + 'rpn_cls_score_reshape'], net[prefix + 'rpn_labels'], loss_weight=1.0, propagate_down=[True, False], loss_param={ "ignore_label": -1, "normalize": True }) net[prefix + 'rpn_loss_bbox'] = L.SmoothL1Loss( net[prefix + 'rpn_bbox_pred'], net[prefix + 'rpn_bbox_targets'], net[prefix + 'rpn_bbox_inside_weights'], net[prefix + 'rpn_bbox_outside_weights'], loss_weight=1.0, smooth_l1_loss_param={'sigma': 3.0}) net[prefix + 'rpn_cls_prob'] = L.Softmax(net[prefix + 'rpn_cls_score_reshape']) net[prefix + 'rpn_cls_prob_reshape'] = L.Reshape( net[prefix + 'rpn_cls_prob'], reshape_param={'shape': { 'dim': [0, 2 * anchors_num, -1, 0] }}) # net[prefix+'rpn_rois'], net[prefix+'rpn_scores'] = L.Python(net[prefix+'rpn_cls_prob_reshape'], net[prefix+'rpn_bbox_pred'], net['im_info'], name=prefix+"proposal", # python_param={'module': "rpn.proposal_layer", 'layer': "ProposalLayer"}, # ntop=2) net[prefix + 'rpn_rois'], net[prefix + 'rpn_scores'] = L.Proposal( net[prefix + 'rpn_cls_prob_reshape'], net[prefix + 'rpn_bbox_pred'], net['im_info'], name=prefix + "proposal", proposal_param=proposal_param, ntop=2) net[prefix + 'rpn_scores_silence'] = L.Silence(net[prefix + 'rpn_scores'], ntop=0) net[prefix + 'rois'], net[prefix + 'labels'], net[prefix + 'bbox_targets'], net[prefix + 'bbox_inside_weights'], \ net[prefix + 'bbox_outside_weights'], net[prefix + 'pos_num'] = L.Python( net[prefix + 'rpn_rois'], net[prefix + 'gt_boxes'], net['data'], name=prefix+"roi-data", python_param={'module': "rpn.proposal_target_layer", 'layer': "ProposalTargetLayer"}, ntop=6) net[prefix + 'conv_new_1'] = L.Convolution(end_body_layer, num_output=256, pad=0, kernel_size=1, stride=1, **kwargs) net[prefix + 'conv_new_1_relu'] = L.ReLU(net[prefix + 'conv_new_1'], in_place=True) net[prefix + 'rfcn_cls'] = L.Convolution(net[prefix + 'conv_new_1_relu'], num_output=position_num**2 * num_classes, pad=0, kernel_size=1, stride=1, **kwargs) net[prefix + 'rfcn_bbox'] = L.Convolution(net[prefix + 'conv_new_1_relu'], num_output=position_num**2 * 8, pad=0, kernel_size=1, stride=1, **kwargs) net[prefix + 'psroipooled_cls_rois'] = L.PSROIPooling( net[prefix + 'rfcn_cls'], net[prefix + 'rois'], psroi_pooling_param={ 'spatial_scale': 1.0 / feat_stride, 'output_dim': num_classes, 'group_size': position_num }) net[prefix + 'cls_score'] = L.Pooling(net[prefix + 'psroipooled_cls_rois'], name=prefix + "ave_cls_score_rois", pool=P.Pooling.AVE, kernel_size=position_num, stride=position_num) net[prefix + 'psroipooled_loc_rois'] = L.PSROIPooling( net[prefix + 'rfcn_bbox'], net[prefix + 'rois'], psroi_pooling_param={ 'spatial_scale': 1.0 / feat_stride, 'output_dim': 8, 'group_size': position_num }) net[prefix + 'bbox_pred'] = L.Pooling(net[prefix + 'psroipooled_loc_rois'], name=prefix + "ave_bbox_pred_rois", pool=P.Pooling.AVE, kernel_size=position_num, stride=position_num) net[prefix + 'temp_loss_cls'], net[prefix + 'temp_prob_cls'], net[ prefix + 'per_roi_loss_cls'] = L.SoftmaxWithLossOHEM( net[prefix + 'cls_score'], net[prefix + 'labels'], name=prefix + "per_roi_loss_cls", loss_weight=[0.0, 0.0, 0.0], propagate_down=[False, False], ntop=3) net[prefix + 'temp_loss_bbox'], net[prefix + 'per_roi_loss_bbox'] = L.SmoothL1LossOHEM( net[prefix + 'bbox_pred'], net[prefix + 'bbox_targets'], net[prefix + 'bbox_inside_weights'], name=prefix + "per_roi_loss_bbox", loss_weight=[0.0, 0.0], propagate_down=[False, False, False], ntop=2) net[prefix + 'per_roi_loss'] = L.Eltwise(net[prefix + 'per_roi_loss_cls'], net[prefix + 'per_roi_loss_bbox'], propagate_down=[False, False]) net[prefix + 'labels_ohem'], net[prefix + 'bbox_loss_weights_ohem'] = L.BoxAnnotatorOHEM( net[prefix + 'rois'], net[prefix + 'per_roi_loss'], net[prefix + 'labels'], net[prefix + 'bbox_inside_weights'], name=prefix + "annotator_detector", propagate_down=[False, False, False, False], box_annotator_ohem_param={ 'roi_per_img': cfg.TRAIN.ROI_PER_IMG, 'ignore_label': -1 }, ntop=2) net[prefix + 'silence'] = L.Silence(net[prefix + 'bbox_outside_weights'], net[prefix + 'temp_loss_cls'], net[prefix + 'temp_prob_cls'], net[prefix + 'temp_loss_bbox'], ntop=0) net[prefix + 'loss_cls'] = L.SoftmaxWithLoss( net[prefix + 'cls_score'], net[prefix + 'labels_ohem'], name=prefix + "loss", loss_weight=1.0, propagate_down=[True, False], loss_param={'ignore_label': -1}) net[prefix + 'accuracy'] = L.Accuracy(net[prefix + 'cls_score'], net[prefix + 'labels_ohem'], propagate_down=[False, False], accuracy_param={'ignore_label': -1}) net[prefix + 'loss_bbox'] = L.Loss( net[prefix + 'bbox_pred'], net[prefix + 'bbox_targets'], net[prefix + 'bbox_loss_weights_ohem'], net[prefix + 'pos_num'], type="SmoothL1LossOHEM", loss_weight=1.0, propagate_down=[True, False, False, False], loss_param={'normalization': P.Loss.POS_NUM}) return net
def gen_rpn_prototxt(basemodel, num_classes, deploy=False, cpp_version=False): assert basemodel.lower() in list_models( ), 'Unsupported basemodel: %s' % basemodel model_parts = re.findall(r'\d+|\D+', basemodel) model_name = model_parts[0].lower() model_depth = -1 if len(model_parts) == 1 else int(model_parts[1]) rcnn = mzoo.FasterRCNN() model = model_dict[model_name] n = caffe.NetSpec() if not deploy: rcnn.add_input_data(n, num_classes) else: # create a placeholder, and replace later n.data = caffe.layers.Layer() n.im_info = caffe.layers.Layer() model.add_body_for_feature(n, depth=model_depth, lr=1, deploy=deploy) bottom = mzoo.last_layer(n) lr = 1.0 # rpn n['rpn_conv/3x3'], n['rpn_relu/3x3'] = mzoo.conv_relu( bottom, nout=256, ks=3, stride=1, pad=1, lr=lr, deploy=deploy, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.rpn_cls_score = mzoo.conv(n['rpn_relu/3x3'], nout=2 * 9, ks=1, stride=1, pad=0, lr=lr, deploy=deploy, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.rpn_bbox_pred = mzoo.conv(n['rpn_relu/3x3'], nout=4 * 9, ks=1, stride=1, pad=0, lr=lr, deploy=deploy, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) n.rpn_cls_score_reshape = L.Reshape( n.rpn_cls_score, reshape_param=dict(shape=dict(dim=[0, 2, -1, 0]))) if not deploy: n.rpn_labels, n.rpn_bbox_targets, n.rpn_bbox_inside_weights, n.rpn_bbox_outside_weights = \ L.Python(n.rpn_cls_score, n.gt_boxes, n.im_info, n.data, ntop=4, python_param=dict(module='rpn.anchor_target_layer', layer='AnchorTargetLayer', param_str="'feat_stride': 16")) n.rpn_loss_cls = L.SoftmaxWithLoss(n.rpn_cls_score_reshape, n.rpn_labels, propagate_down=[1, 0], loss_weight=1, loss_param=dict(ignore_label=-1, normalize=True)) n.rpn_loss_bbox = L.SmoothL1Loss(n.rpn_bbox_pred, n.rpn_bbox_targets, n.rpn_bbox_inside_weights, n.rpn_bbox_outside_weights, loss_weight=1, smooth_l1_loss_param=dict(sigma=3.0)) else: # roi proposal n.rpn_cls_prob = L.Softmax(n.rpn_cls_score_reshape) n.rpn_cls_prob_reshape = L.Reshape( n.rpn_cls_prob, reshape_param=dict(shape=dict(dim=[0, 18, -1, 0]))) if cpp_version: assert deploy, "cannot generate cpp version prototxt for training. deploy must be set to True" n['rois'] = L.RPNProposal(n.rpn_cls_prob_reshape, n.rpn_bbox_pred, n.im_info, ntop=1, rpn_proposal_param=dict(feat_stride=16)) else: n['rois' if deploy else 'rpn_rois'] = \ L.Python(n.rpn_cls_prob_reshape, n.rpn_bbox_pred, n.im_info, ntop=1, python_param=dict(module='rpn.proposal_layer', layer='ProposalLayer', param_str="'feat_stride': 16")) if not deploy: n.rois, n.labels, n.bbox_targets, n.bbox_inside_weights, n.bbox_outside_weights = \ L.Python(n.rpn_rois, n.gt_boxes, ntop=5, python_param=dict(module='rpn.proposal_target_layer', layer='ProposalTargetLayer', param_str="'num_classes': %d"%num_classes)) layers = str(n.to_proto()).split('layer {')[1:] layers = ['layer {' + x for x in layers] if deploy: layers[ 0] = 'input: {}\ninput_shape {{\n dim: {}\n dim: {}\n dim: {}\n dim: {}\n}}\n'.format( '"data"', 1, 3, 224, 224) layers[ 1] = 'input: {}\ninput_shape {{\n dim: {}\n dim: {}\n}}\n'.format( '"im_info"', 1, 3) proto_str = ''.join(layers) proto_str = proto_str.replace("\\'", "'") return 'name: "Faster-RCNN-%s"\n' % basemodel + proto_str
def net(split, vocab_size, opts): n = caffe.NetSpec() param_str = json.dumps({'split': split, 'batchsize': cfg.BATCHSIZE}) n.qvec, n.cvec, n.img_feat, n.spt_feat, n.query_label, n.query_label_mask, n.query_bbox_targets, \ n.query_bbox_inside_weights, n.query_bbox_outside_weights = L.Python( \ name='data', module='networks.data_layer', layer='DataProviderLayer', param_str=param_str, ntop=9 ) n.embed_ba = L.Embed(n.qvec, input_dim=vocab_size, num_output=cfg.WORD_EMB_SIZE, \ weight_filler=dict(type='xavier')) n.embed = L.TanH(n.embed_ba) word_emb = n.embed # LSTM1 n.lstm1 = L.LSTM(\ word_emb, n.cvec,\ recurrent_param=dict(\ num_output=cfg.RNN_DIM,\ weight_filler=dict(type='xavier'))) tops1 = L.Slice(n.lstm1, ntop=cfg.QUERY_MAXLEN, slice_param={'axis': 0}) for i in xrange(cfg.QUERY_MAXLEN - 1): n.__setattr__('slice_first' + str(i), tops1[int(i)]) n.__setattr__('silence_data_first' + str(i), L.Silence(tops1[int(i)], ntop=0)) n.lstm1_out = tops1[cfg.QUERY_MAXLEN - 1] n.lstm1_reshaped = L.Reshape( n.lstm1_out, reshape_param=dict(shape=dict(dim=[-1, cfg.RNN_DIM]))) n.lstm1_droped = L.Dropout( n.lstm1_reshaped, dropout_param={'dropout_ratio': cfg.DROPOUT_RATIO}) n.lstm_l2norm = L.L2Normalize(n.lstm1_droped) n.q_emb = L.Reshape(n.lstm_l2norm, reshape_param=dict(shape=dict(dim=[0, -1]))) q_layer = n.q_emb # (N, 1024) v_layer = proc_img(n, n.img_feat, n.spt_feat) #out: (N, 100, 2053) out_layer = concat(n, q_layer, v_layer) # predict score n.query_score_fc = L.InnerProduct(out_layer, num_output=1, weight_filler=dict(type='xavier')) n.query_score_pred = L.Reshape( n.query_score_fc, reshape_param=dict(shape=dict(dim=[-1, cfg.RPN_TOPN]))) if cfg.USE_KLD: n.loss_query_score = L.SoftmaxKLDLoss(n.query_score_pred, n.query_label, n.query_label_mask, propagate_down=[1, 0, 0], loss_weight=1.0) else: n.loss_query_score = L.SoftmaxWithLoss(n.query_score_pred, n.query_label, n.query_label_mask, propagate_down=[1, 0, 0], loss_weight=1.0) # predict bbox n.query_bbox_pred = L.InnerProduct(out_layer, num_output=4, weight_filler=dict(type='xavier')) if cfg.USE_REG: n.loss_query_bbox = L.SmoothL1Loss( n.query_bbox_pred, n.query_bbox_targets, \ n.query_bbox_inside_weights, n.query_bbox_outside_weights, loss_weight=1.0) else: n.__setattr__('silence_query_bbox_pred', L.Silence(n.query_bbox_pred, ntop=0)) n.__setattr__('silence_query_bbox_targets', L.Silence(n.query_bbox_targets, ntop=0)) n.__setattr__('silence_query_bbox_inside_weights', L.Silence(n.query_bbox_inside_weights, ntop=0)) n.__setattr__('silence_query_bbox_outside_weights', L.Silence(n.query_bbox_outside_weights, ntop=0)) return n.to_proto()
def network(split): num_chns = int(360 / cfg.LD_INTERVAL) + 1 net = caffe.NetSpec() if split == 'train': pymodule = 'roi_data_layer.layer' pylayer = 'RoIDataLayer' pydata_params = dict(num_classes=2) net.data, net.im_info, net.gt_boxes = L.Python( module=pymodule, layer=pylayer, ntop=3, param_str=str(pydata_params)) else: net.data = L.Input(name='data', input_param=dict(shape=dict(dim=[1, 3, 512, 512]))) net.im_info = L.Input(name='im_info', input_param=dict(shape=dict(dim=[1, 3]))) # Backbone net.conv1_1, net.relu1_1 = conv_relu(net.data, 64, pad=1) net.conv1_2, net.relu1_2 = conv_relu(net.relu1_1, 64) net.pool1 = max_pool(net.relu1_2) net.conv2_1, net.relu2_1 = conv_relu(net.pool1, 128) net.conv2_2, net.relu2_2 = conv_relu(net.relu2_1, 128) net.pool2 = max_pool(net.relu2_2) net.conv3_1, net.relu3_1 = conv_relu(net.pool2, 256) net.conv3_2, net.relu3_2 = conv_relu(net.relu3_1, 256) net.conv3_3, net.relu3_3 = conv_relu(net.relu3_2, 256) net.pool3 = max_pool(net.relu3_3) net.conv4_1, net.relu4_1 = conv_relu(net.pool3, 512) net.conv4_2, net.relu4_2 = conv_relu(net.relu4_1, 512) net.conv4_3, net.relu4_3 = conv_relu(net.relu4_2, 512) net.pool4 = max_pool(net.relu4_3) net.conv5_1, net.relu5_1 = conv_relu(net.pool4, 512) net.conv5_2, net.relu5_2 = conv_relu(net.relu5_1, 512) net.conv5_3, net.relu5_3 = conv_relu(net.relu5_2, 512) # net.pool_5 = max_pool(net.relu5_3) # Hyper Feature net.downsample = L.Convolution( net.conv3_3, num_output=64, kernel_size=3, pad=1, stride=2, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.relu_downsample = L.ReLU(net.downsample, in_place=True) net.upsample = L.Deconvolution( net.conv5_3, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], convolution_param=dict(num_output=512, kernel_size=2, stride=2, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.relu_upsample = L.ReLU(net.upsample, in_place=True) net.fuse = L.Concat(net.downsample, net.upsample, net.conv4_3, name='concat', concat_param=dict({'concat_dim': 1})) net.conv_hyper = L.Convolution( net.fuse, num_output=512, kernel_size=3, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.relu_conv_hyper = L.ReLU(net.conv_hyper, in_place=True) net.conv_rpn = L.Convolution( net.conv_hyper, num_output=128, kernel_size=3, pad=1, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='xavier'), bias_filler=dict(type='constant')) net.conv_rpn_relu = L.ReLU(net.conv_rpn, in_place=True) net.rpn_score_tl = L.Convolution( net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) net.rpn_score_tr = L.Convolution( net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) net.rpn_score_br = L.Convolution( net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) net.rpn_score_bl = L.Convolution( net.conv_rpn, num_output=num_chns, kernel_size=1, pad=0, param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)], weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0)) net.rpn_prob_tl = L.Softmax(net.rpn_score_tl) net.rpn_prob_tr = L.Softmax(net.rpn_score_tr) net.rpn_prob_br = L.Softmax(net.rpn_score_br) net.rpn_prob_bl = L.Softmax(net.rpn_score_bl) if split == 'train': pymodule = 'rpn.labelmap_layer' pylayer = 'LabelMapLayer' net.rpn_label_tl, net.rpn_label_tr, net.rpn_label_br, net.rpn_label_bl = L.Python( net.conv_rpn, net.im_info, net.gt_boxes, module=pymodule, layer=pylayer, ntop=4) net.loss_rpn_tl = L.BalancedSoftmaxWithLoss( net.rpn_score_tl, net.rpn_label_tl, propagate_down=[1, 0], loss_param=dict(normalize=True, ignore_label=-1)) net.loss_rpn_tr = L.BalancedSoftmaxWithLoss( net.rpn_score_tr, net.rpn_label_tr, propagate_down=[1, 0], loss_param=dict(normalize=True, ignore_label=-1)) net.loss_rpn_br = L.BalancedSoftmaxWithLoss( net.rpn_score_br, net.rpn_label_br, propagate_down=[1, 0], loss_param=dict(normalize=True, ignore_label=-1)) net.loss_rpn_bl = L.BalancedSoftmaxWithLoss( net.rpn_score_bl, net.rpn_label_bl, propagate_down=[1, 0], loss_param=dict(normalize=True, ignore_label=-1)) pymodule = 'rpn.proposal_layer' pylayer = 'ProposalLayer' pydata_params = dict(feat_stride=8) net.quads = L.Python( net.im_info, net.rpn_prob_tl, net.rpn_prob_tr, net.rpn_prob_br, net.rpn_prob_bl, module=pymodule, layer=pylayer, ntop=1, param_str=str(pydata_params)) pymodule = 'rpn.proposal_target_layer' pylayer = 'ProposalTargetLayer' net.rois, net.labels, net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights = L.Python( net.quads, net.gt_boxes, module=pymodule, layer=pylayer, name='roi-data', ntop=5) # RCNN net.dual_pool5 = L.RotateROIPooling( net.conv_hyper, net.rois, name='roi_pool5_dual', rotate_roi_pooling_param=dict(pooled_w=7, pooled_h=7, spatial_scale=0.125)) net.pool5_a, net.pool5_b = L.Slice(net.dual_pool5, slice_param=dict(axis=0), ntop=2, name='slice') net.pool5 = L.Eltwise(net.pool5_a, net.pool5_b, name='roi_pool5', eltwise_param=dict(operation=1)) net.fc6 = L.InnerProduct( net.pool5, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=4096, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.fc6_relu = L.ReLU(net.fc6, in_place=True) net.drop6 = L.Dropout(net.fc6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct( net.fc6, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=4096, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.fc7_relu = L.ReLU(net.fc7, in_place=True) net.drop7 = L.Dropout(net.fc7, dropout_ratio=0.5, in_place=True) net.cls_score = L.InnerProduct( net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=2, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))) net.bbox_pred = L.InnerProduct( net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=16, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0))) net.loss_cls = L.SoftmaxWithLoss(net.cls_score, net.labels, propagate_down=[1, 0], loss_weight=1) net.loss_bbox = L.SmoothL1Loss(net.bbox_pred, net.bbox_targets, net.bbox_inside_weights, net.bbox_outside_weights, loss_weight=1) if split == 'test': pymodule = 'rpn.proposal_layer' pylayer = 'ProposalLayer' pydata_params = dict(feat_stride=8) net.quads, net.rois = L.Python( net.im_info, net.rpn_prob_tl, net.rpn_prob_tr, net.rpn_prob_br, net.rpn_prob_bl, module=pymodule, layer=pylayer, ntop=2, param_str=str(pydata_params)) # RCNN net.dual_pool5 = L.RotateROIPooling( net.conv_hyper, net.rois, name='roi_pool5_dual', rotate_roi_pooling_param=dict(pooled_w=7, pooled_h=7, spatial_scale=0.125)) net.pool5_a, net.pool5_b = L.Slice(net.dual_pool5, slice_param=dict(axis=0), ntop=2, name='slice') net.pool5 = L.Eltwise(net.pool5_a, net.pool5_b, name='roi_pool5', eltwise_param=dict(operation=1)) net.fc6 = L.InnerProduct( net.pool5, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=4096, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.fc6_relu = L.ReLU(net.fc6, in_place=True) net.drop6 = L.Dropout(net.fc6, dropout_ratio=0.5, in_place=True) net.fc7 = L.InnerProduct( net.fc6, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=4096, weight_filler=dict(type='xavier'), bias_filler=dict(type='constant'))) net.fc7_relu = L.ReLU(net.fc7, in_place=True) net.drop7 = L.Dropout(net.fc7, dropout_ratio=0.5, in_place=True) net.cls_score = L.InnerProduct( net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=2, weight_filler=dict(type='gaussian', std=0.01), bias_filler=dict(type='constant', value=0))) net.bbox_pred = L.InnerProduct( net.fc7, param=[dict(lr_mult=1), dict(lr_mult=2)], inner_product_param=dict(num_output=16, weight_filler=dict(type='gaussian', std=0.001), bias_filler=dict(type='constant', value=0))) net.cls_prob = L.Softmax(net.cls_score) return net.to_proto()