def get_symbol_train(network, num_classes, num_layers, from_layers, num_filters, strides, pads, sizes, ratios, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections minimum_negative_samples : int always have some negative examples, no matter how many positive there are. this is useful when training on images with no ground-truth. Returns ------- mx.Symbol """ data = mx.sym.Variable('data') label = mx.sym.Variable('label') conv_feat = get_resnet_conv(data, num_layers) _, conv_fpn_feat = get_resnet_conv_down(conv_feat) conv_fpn_feat.reverse() # [P3, P4, P5, P6, P7] loc_preds, cls_preds, anchor_boxes = multibox_layer(conv_fpn_feat, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) # now cls_preds are in shape of batchsize x num_class x num_anchors tmp = mx.contrib.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=1000, minimum_negative_samples=minimum_negative_samples, \ negative_mining_thresh=.4, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] """Focal loss related""" cls_prob_ = mx.symbol.SoftmaxActivation(cls_preds, mode="channel") cls_prob = mx.sym.Custom(cls_preds, cls_prob_, cls_target, op_type="focal_loss", name="cls_prob", gamma=2.0, alpha=0.25, normalize=True) # cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ # ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ # normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ threshold=0.05, name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads, sizes, ratios, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ label = mx.sym.Variable('label') body = import_module(network).get_symbol(num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) tmp = mx.symbol.contrib.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios, strides, pads, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network for testing SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ body = import_module(network).get_symbol(num_classes=num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \ name='cls_prob') out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) return out
def get_symbol_train(network, num_classes, alpha_bb8, alpha_loc, use_dilated, use_focalloss, from_layers, num_filters, strides, pads, sizes, ratios, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, minimum_negative_samples=0, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections minimum_negative_samples : int always have some negative examples, no matter how many positive there are. this is useful when training on images with no ground-truth. Returns ------- mx.Symbol """ use_focalloss = False label = mx.sym.Variable('label') body = import_module(network).get_symbol(num_classes=num_classes, use_dilated=use_dilated, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes, bb8_preds = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) # now cls_preds are in shape of batchsize x num_class x num_anchors # loc_target, loc_target_mask, cls_target, bb8_target, bb8_target_mask = training_targets(anchors=anchor_boxes, # class_preds=cls_preds, labels=label, use_focalloss=use_focalloss) loc_target, loc_target_mask, cls_target, bb8_target, bb8_target_mask = mx.sym.Custom(op_type="training_targets", name="training_targets", anchors=anchor_boxes, cls_preds=cls_preds, labels=label) # tmp = mx.contrib.symbol.MultiBoxTarget( # *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ # ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=minimum_negative_samples, \ # negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), # name="multibox_target") # loc_target = tmp[0] # loc_target_mask = tmp[1] # cls_target = tmp[2] # if use_focalloss: # cls_prob_ = mx.sym.SoftmaxActivation(cls_preds, mode='channel') # cls_prob = mx.sym.Custom(cls_preds, cls_prob_, cls_target, op_type='focal_loss', name='cls_prob', # gamma=2.0, alpha=0.25, normalize=True) # cls_prob = mx.sym.Custom(op_type='FocalLoss', name='cls_prob', data=cls_preds, labels=cls_target, alpha=0.25, gamma=2) # else: cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=alpha_loc, \ normalization='valid', name="loc_loss") bb8_loss_ = mx.symbol.smooth_l1(name="bb8_loss_", \ data=bb8_target_mask * (bb8_preds - bb8_target), scalar=1.0) bb8_loss = mx.symbol.MakeLoss(bb8_loss_, grad_scale=alpha_bb8, \ normalization='valid', name="bb8_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") # anchor = mx.symbol.MakeLoss(data=mx.symbol.broadcast_mul(loc_target_mask.reshape((0,-1,4)), anchor_boxes), grad_scale=0, name='anchors') anchors = mx.symbol.MakeLoss(data=anchor_boxes, grad_scale=0, name='anchors') loc_mae = mx.symbol.MakeLoss(data=mx.sym.abs(loc_target_mask * (loc_preds - loc_target)), grad_scale=0, name='loc_mae') loc_label = mx.symbol.MakeLoss(data=loc_target_mask * loc_target, grad_scale=0., name='loc_label') loc_pred_masked = mx.symbol.MakeLoss(data=loc_target_mask * loc_preds, grad_scale=0, name='loc_pred_masked') bb8_label = mx.symbol.MakeLoss(data=bb8_target_mask * bb8_target, grad_scale=0, name='bb8_label') bb8_pred = mx.symbol.MakeLoss(data=bb8_preds, grad_scale=0, name='bb8_pred') bb8_pred_masked = mx.symbol.MakeLoss(data=bb8_target_mask * bb8_preds, grad_scale=0, name='bb8_pred_masked') bb8_mae = mx.symbol.MakeLoss(data=mx.sym.abs(bb8_target_mask * (bb8_preds - bb8_target)), grad_scale=0, name='bb8_mae') # det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ # name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, # variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) # det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") loc_pred = mx.symbol.MakeLoss(data=loc_preds, grad_scale=0, name='loc_pred') # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, bb8_loss, loc_pred, bb8_pred, anchors, loc_label, loc_pred_masked, loc_mae, bb8_label, bb8_pred_masked, bb8_mae]) return out
def get_symbol_train(num_classes=20): """ Single-shot multi-box detection with VGG 16 layers ConvNet This is a modified version, with fc6/fc7 layers replaced by conv layers And the network is slightly smaller than original VGG 16 network This is a training network with losses Parameters: ---------- num_classes: int number of object classes not including background Returns: ---------- mx.Symbol """ data = mx.symbol.Variable(name="data") label = mx.symbol.Variable(name="label") # group 1 conv1_1 = mx.symbol.Convolution(data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1") relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1") conv1_2 = mx.symbol.Convolution(data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2") relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2") pool1 = mx.symbol.Pooling(data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1") # group 2 conv2_1 = mx.symbol.Convolution(data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1") relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1") conv2_2 = mx.symbol.Convolution(data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2") relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2") pool2 = mx.symbol.Pooling(data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2") # group 3 conv3_1 = mx.symbol.Convolution(data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1") relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1") conv3_2 = mx.symbol.Convolution(data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2") relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2") conv3_3 = mx.symbol.Convolution(data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3") relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3") pool3 = mx.symbol.Pooling( data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \ pooling_convention="full", name="pool3") # group 4 conv4_1 = mx.symbol.Convolution(data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1") relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1") conv4_2 = mx.symbol.Convolution(data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2") relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2") conv4_3 = mx.symbol.Convolution(data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3") relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3") pool4 = mx.symbol.Pooling(data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4") # group 5 conv5_1 = mx.symbol.Convolution(data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1") relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1") conv5_2 = mx.symbol.Convolution(data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2") relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2") conv5_3 = mx.symbol.Convolution(data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3") relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3") pool5 = mx.symbol.Pooling(data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1), pad=(1, 1), name="pool5") # group 6 conv6 = mx.symbol.Convolution(data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6), num_filter=1024, name="conv6") relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6") # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6") # group 7 conv7 = mx.symbol.Convolution(data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7") relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7") # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7") ### ssd extra layers ### conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \ stride=(1,1), act_type="relu", use_batchnorm=False) conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \ stride=(2,2), act_type="relu", use_batchnorm=False) # global Pooling pool10 = mx.symbol.Pooling(data=relu10_2, pool_type="avg", global_pool=True, kernel=(1, 1), name='pool10') # specific parameters for VGG16 network from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10] sizes = [[.1], [.2, .276], [.38, .461], [.56, .644], [.74, .825], [.92, 1.01]] ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \ [1,2,.5,3,1./3], [1,2,.5,3,1./3]] normalizations = [20, -1, -1, -1, -1, -1] loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ clip=True, interm_layer=0) tmp = mx.symbol.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label]) # out = mx.symbol.Group([loc_preds, cls_preds, anchor_boxes]) return out
def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads, sizes, ratios, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ label = mx.sym.Variable('label') body = import_module(network).get_symbol(num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) tmp = mx.symbol.contrib.MultiBoxTarget( *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="multibox_target") loc_target = tmp[0] loc_target_mask = tmp[1] cls_target = tmp[2] cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \ normalization='valid', name="cls_prob") loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0) loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \ normalization='valid', name="loc_loss") # monitoring training status cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label") det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out") # group output out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det]) return out
def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios, strides, pads, normalizations=-1, steps=[], min_filter=128, nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network for testing SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ body = import_module(network).get_symbol(num_classes, **kwargs) layers = multi_layer_feature(body, from_layers, num_filters, strides, pads, min_filter=min_filter) loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \ num_channels=num_filters, clip=False, interm_layer=0, steps=steps) cls_prob = mx.symbol.softmax(data=cls_preds, axis=1, name='cls_prob') out = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) return out
def get_symbol_train(network, num_classes, from_layers, num_filters, sizes, ratios, batch_size, gpus, normalizations=-1, steps=[], nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs): """Build network symbol for training SSD Parameters ---------- network : str base network symbol name num_classes : int number of object classes not including background from_layers : list of str feature extraction layers, use '' for add extra layers For example: from_layers = ['relu4_3', 'fc7', '', '', '', ''] which means extract feature from relu4_3 and fc7, adding 4 extra layers on top of fc7 num_filters : list of int number of filters for extra layers, you can use -1 for extracted features, however, if normalization and scale is applied, the number of filter for that layer must be provided. For example: num_filters = [512, -1, 512, 256, 256, 256] strides : list of int strides for the 3x3 convolution appended, -1 can be used for extracted feature layers pads : list of int paddings for the 3x3 convolution, -1 can be used for extracted layers sizes : list or list of list [min_size, max_size] for all layers or [[], [], []...] for specific layers ratios : list or list of list [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers normalizations : int or list of int use normalizations value for all layers or [...] for specific layers, -1 indicate no normalizations and scales steps : list specify steps for each MultiBoxPrior layer, leave empty, it will calculate according to layer dimensions min_filter : int minimum number of filters used in 1x1 convolution nms_thresh : float non-maximum suppression threshold force_suppress : boolean whether suppress different class objects nms_topk : int apply NMS to top K detections Returns ------- mx.Symbol """ label = mx.sym.Variable('label') times = batch_size / gpus body = import_module(network).get_symbol(num_classes, **kwargs) layers = multi_layer_feature(body, from_layers) arm_loc_preds, arm_cls_preds, arm_anchor_boxes, odm_loc_preds, odm_cls_preds = multibox_layer(layers, num_filters, num_classes, \ sizes=sizes, ratios=ratios, normalization=normalizations, num_channels=num_filters, clip=False, interm_layer=0, steps=steps) # modify arm label label_arm = mx.symbol.Custom(label=label, op_type='modify_label') arm_tmp = mx.contrib.symbol.MultiBoxTarget(*[arm_anchor_boxes, label_arm, arm_cls_preds], overlap_threshold=.5, \ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="arm_multibox_target") arm_loc_target = arm_tmp[0] arm_loc_target_mask = arm_tmp[1] arm_cls_target = arm_tmp[2] # odm module odm_anchor_boxes = mx.symbol.Custom(arm_anchor_boxes=arm_anchor_boxes, arm_loc_preds=arm_loc_preds, arm_loc_mask=arm_loc_target_mask,\ op_type='refine_anchor_generator') odm_anchor_boxes_bs = mx.sym.split(data=odm_anchor_boxes, axis=0, num_outputs=times) odm_loc_target = [] odm_loc_target_mask = [] odm_cls_target = [] label_bs = mx.sym.split(data=label, axis=0, num_outputs=times) odm_cls_preds_bs = mx.sym.split(data=odm_cls_preds, axis=0, num_outputs=times) for i in range(times): odm_tmp = mx.contrib.symbol.MultiBoxTarget(*[odm_anchor_boxes_bs[i], label_bs[i], odm_cls_preds_bs[i]],\ overlap_threshold=.5, ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0,\ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2), name="odm_multibox_target_{}".format(i)) odm_loc_target.append(odm_tmp[0]) odm_loc_target_mask.append(odm_tmp[1]) odm_cls_target.append(odm_tmp[2]) odm_loc_target = mx.symbol.concat(*odm_loc_target, num_args=len(odm_loc_target), dim=0) odm_loc_target_mask = mx.symbol.concat(*odm_loc_target_mask, num_args=len(odm_loc_target_mask), dim=0) odm_cls_target = mx.symbol.concat(*odm_cls_target, num_args=len(odm_cls_target), dim=0) group = mx.symbol.Custom(arm_cls_preds=arm_cls_preds, odm_cls_target=odm_cls_target, odm_loc_target_mask=odm_loc_target_mask,\ op_type='negative_filtering') odm_cls_target = group[0] odm_loc_target_mask = group[1] # monitoring training status arm_cls_prob = mx.symbol.SoftmaxOutput(data=arm_cls_preds, label=arm_cls_target, ignore_label=-1, use_ignore=True, \ grad_scale=1.0, multi_output=True, normalization='valid', name="arm_cls_prob") arm_loc_loss_ = mx.symbol.smooth_l1(name="arm_loc_loss_", data=arm_loc_target_mask * (arm_loc_preds - arm_loc_target), scalar=1.0) arm_loc_loss = mx.symbol.MakeLoss(arm_loc_loss_, grad_scale=1.0, normalization='valid', name="arm_loc_loss") arm_cls_label = mx.symbol.MakeLoss(data=arm_cls_target, grad_scale=0, name="arm_cls_label") odm_cls_prob = mx.symbol.SoftmaxOutput(data=odm_cls_preds, label=odm_cls_target, ignore_label=-1, use_ignore=True, grad_scale=1.0, \ multi_output=True, normalization='valid', name="odm_cls_prob") odm_loc_loss_ = mx.symbol.smooth_l1(name="odm_loc_loss_", data=odm_loc_target_mask * (odm_loc_preds - odm_loc_target), scalar=1.0) odm_loc_loss = mx.symbol.MakeLoss(odm_loc_loss_, grad_scale=1.0, normalization='valid', name="odm_loc_loss") odm_cls_label = mx.symbol.MakeLoss(data=odm_cls_target, grad_scale=0, name="odm_cls_label") odm_det = [] odm_loc_preds_bs = mx.sym.split(data=odm_loc_preds, axis=0, num_outputs=times) odm_cls_prob_bs = mx.sym.split(data=odm_cls_prob, axis=0, num_outputs=times) for i in range(times): odm_det_tmp = mx.contrib.symbol.MultiBoxDetection(*[odm_cls_prob_bs[i], odm_loc_preds_bs[i], odm_anchor_boxes_bs[i]], \ name="odm_detection_{}".format(i), nms_threshold=nms_thresh, force_suppress=force_suppress, \ variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk) odm_det.append(odm_det_tmp) odm_det = mx.symbol.concat(*odm_det, num_args=len(odm_det), dim=0) odm_det = mx.symbol.MakeLoss(data=odm_det, grad_scale=0, name="odm_det_out") # group output out = mx.symbol.Group([arm_cls_prob, arm_loc_loss, arm_cls_label, odm_cls_prob, odm_loc_loss, odm_cls_label, odm_det]) return out