def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 101 num_c3_block = 4 num_c4_block = 23 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 2000 if is_train else 300 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 12 lr_iter = [ 120000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 2000 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) class ResizeParam: short = 800 long = 1200 if is_train else 2000 class PadParam: short = 800 long = 1200 max_num_gt = 100 class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, AnchorTarget2D, Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False loader_worker = 8 class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 152 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 max_side = 1400 class anchor_assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 81 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class MaskParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer resolution = 28 dim_reduced = 256 num_fg_roi = int(RpnParam.subsample_proposal.image_roi * RpnParam.subsample_proposal.fg_fraction) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class MaskRoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 14 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: mult = 2 begin_epoch = 0 end_epoch = 6 * mult lr_iter = [ 60000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: process_output(x, y) class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam, MaskParam) roi_extractor = RoiExtractor(RoiParam) mask_roi_extractor = RoiExtractor(MaskRoiParam) bbox_head = BboxHead(BboxParam) mask_head = MaskHead(BboxParam, MaskParam, MaskRoiParam) bbox_post_processer = BboxPostProcessor(TestParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, mask_roi_extractor, bbox_head, mask_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processer) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] excluded_param = ["mask_fcn"] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 max_len_gt_poly = 2500 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (200, 100, 50, 25, 13) self.long = (334, 167, 84, 42, 21) scales = (8) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.maskrcnn.input import PreprocessGtPoly, EncodeGtPoly, \ Resize2DImageBboxMask, Flip2DImageBboxMask, Pad2DImageBboxMask from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), PreprocessGtPoly(), Resize2DImageBboxMask(ResizeParam), Flip2DImageBboxMask(), EncodeGtPoly(PadParam), Pad2DImageBboxMask(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["im_info", "gt_bbox", "gt_poly"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric from models.maskrcnn.metric import SigmoidCELossMetric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) mask_cls_metric = SigmoidCELossMetric("MaskCE", ["mask_loss_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric, ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (4 * 2 ** 0, 4 * 2 ** (1.0 / 3.0), 4 * 2 ** (2.0 / 3.0)) ratio = (0.5, 1.0, 2.0) stride = (8, 16, 32, 64, 128) image_anchor = None class head: conv_channel = 256 mean = None std = None class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None min_det_score = 0.05 # filter score in network class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class focal_loss: alpha = 0.25 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class regress_target: class_agnostic = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class KDParam: stage = 'c5' channel = 2048 grad_scale = 10 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) kd_head = KDHead(KDParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, kd_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class teacher_param: prefix = 'teacher_model/retina_r152v1b_fpn_1x/checkpoint' epoch = 6 endpoint = ['stage4_unit3_relu_output'] data_name = ['data'] label_name = ['teacher_label'] class OptimizeParam: class optimizer: type = "sgd" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 12 lr_iter = [120000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)] class warmup: type = "gradual" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 1000 class TestParam: min_det_score = 0 # filter appended boxes max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.short = (100, 50, 25, 13, 7) self.long = (167, 84, 42, 21, 11) self.stride = (8, 16, 32, 64, 128) scales = (4 * 2 ** 0, 4 * 2 ** (1.0 / 3.0), 4 * 2 ** (2.0 / 3.0)) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 9999 pos_thr = 0.5 neg_thr = 0.4 min_pos_thr = 0.0 class sample: image_anchor = None pos_fraction = None class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.retinanet.input import PyramidAnchorTarget2D, Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), PyramidAnchorTarget2D(AnchorTarget2DParam()), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] from models.retinanet import metric from core.detection_metric import ScalarLoss rpn_acc_metric = metric.FGAccMetric( "FGAcc", ["cls_loss_output"], ["rpn_cls_label"] ) scalar_metric = ScalarLoss( "fitloss", ["fit_loss_output"], [] ) metric_list = [rpn_acc_metric, scalar_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 8 if is_train else 1 fp16 = True loader_worker = 8 class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="localbn", ndev=len(KvstoreParam.gpus)) # normalizer = normalizer_factory(type="gn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False class anchor_generate: scale = (4, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 max_side = 700 class anchor_assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 81 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") total_image = 82783 + 35504 else: image_set = ("coco_minival2014", ) total_image = 5000 backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = True random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = None epoch = 0 fixed_param = [] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 1e-4 clip_gradient = None class schedule: mult = 9 begin_epoch = 0 end_epoch = 6 * mult if mult <= 2: lr_iter = [ 60000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] else: # follow the setting in Rethinking ImageNet Pre-training # reduce the lr in the last 60k and 20k iterations lr_iter = [ (DatasetParam.total_image * 2 // 16 * end_epoch - 60000) * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), (DatasetParam.total_image * 2 // 16 * end_epoch - 20000) * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: short = 400 long = 600 class PadParam: short = 400 long = 600 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (100, 50, 25, 13, 7) self.long = (150, 75, 38, 19, 10) scales = (4) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 81 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-101" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = 6 class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) # data processing class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (200, 100, 50, 25, 13) self.long = (334, 167, 84, 42, 21) scales = (8) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), PyramidAnchorTarget2D(AnchorTarget2DParam()), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 1 if is_train else 1 fp16 = False class Trident: num_branch = 3 train_scaleaware = True test_scaleaware = True branch_ids = range(num_branch) branch_dilates = [1, 2, 3] valid_ranges = [(0, 90), (30, 160), (90, -1)] valid_ranges_on_origin = True branch_bn_shared = True branch_conv_shared = True branch_deform = False assert num_branch == len(branch_ids) assert num_branch == len(valid_ranges) class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 101 num_branch = Trident.num_branch branch_ids = Trident.branch_ids branch_dilates = Trident.branch_dilates branch_bn_shared = Trident.branch_bn_shared branch_conv_shared = Trident.branch_conv_shared branch_deform = Trident.branch_deform class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image * Trident.num_branch class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 500 if is_train else 300 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = True image_roi = 128 fg_fraction = 0.5 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 128 batch_image = General.batch_image * Trident.num_branch class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol( backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch, scaleaware=Trident.train_scaleaware) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head, Trident.num_branch) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-%s" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 5 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0.001 max_det_per_image = 100 process_roidb = lambda x: x if Trident.test_scaleaware: process_output = lambda x, y: process_branch_outputs( x, Trident.num_branch, Trident.valid_ranges, Trident. valid_ranges_on_origin) else: process_output = lambda x, y: x process_rpn_output = lambda x, y: process_branch_rpn_outputs( x, Trident.num_branch) class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1200 if is_train else 2000 class PadParam: short = 800 long = 1200 if is_train else 2000 max_num_gt = 100 class ScaleRange: valid_ranges = Trident.valid_ranges cal_on_origin = Trident.valid_ranges_on_origin # True: valid_ranges on origin image scale / valid_ranges on resized image scale class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class trident: invalid_anchor_threshd = 0.3 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.tridentnet.input import ScaleAwareRange, TridentAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), ScaleAwareRange(ScaleRange), TridentAnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] if Trident.train_scaleaware: data_name.append("valid_ranges") label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) ratio = (0.5, 1.0, 2.0) stride = (8, 16, 32, 64, 128) max_side = 1440 class anchor_assign: allowed_border = 9999 bbox_thr = 0.6 pre_anchor_top_n = 50 class head: conv_channel = 256 mean = (.0, .0, .0, .0) std = (0.1, 0.1, 0.2, 0.2) class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class focal_loss: alpha = 0.5 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class regress_target: class_agnostic = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.005 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 500 class TestParam: min_det_score = 0.05 # filter appended boxes max_det_per_image = 100 def process_roidb(x): return x # noqa: E704 def process_output(x, y): return x # noqa: E704 class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.retinanet.input import Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric pos_loss = metric.ScalarLoss("PosLoss", ["positive_loss_output"], []) neg_loss = metric.ScalarLoss("NegLoss", ["negative_loss_output"], []) metric_list = [pos_loss, neg_loss] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: # number of iteration for print the metrics to stdout log_frequency = 10 # the directory name for the experiment, the default is the name of config name = __name__.rsplit("/")[-1].rsplit(".")[-1] # batch size per GPU batch_image = 2 if is_train else 1 # use FP16 for weight and activation # recommend to toggle when you are training on Volta or later GPUs fp16 = False # number of threads used for the data loader # this term affects both the CPU utilization and the MEM usage # lower this if you are training on Desktop loader_worker = 8 class KvstoreParam: # the type of communicator used to sync model parameters kvstore = "nccl" # "local", "aggregated" batch_image = General.batch_image # GPUs to use gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # the type of normalizer used for network # see also ModelParam.pretrain.fixed_param for the freeze of gamma/beta normalizer = normalizer_factory(type="fixbn") # freeze bn stats normalizer = normalizer_factory( type="localbn") # use bn stats in one GPU normalizer = normalizer_factory( type="syncbn", ndev=len(KvstoreParam.gpus)) # use bn stats across GPUs normalizer = normalizer_factory(type="gn") # use GroupNorm class BackboneParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer # some backbone component accept additional configs, like the depth for ResNet depth = 50 class NeckParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image # use ONNX-compatible proposal operator instead of the one written in C++/CUDA nnvm_proposal = True # use in-network rpn target operator instead of the label generated by data loader # if your network is quite fast, the CPU might not feed the labels fast enough # else you can offload the rpn target generation to CPU to save GPU resources nnvm_rpn_target = False # anchor grid generated are used in the rpn target assign and proposal decoding class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) # to avoid generate the same anchor grid more than once # we cache an anchor grid in the arg_params # max_side specify the max side of resized input image # 3000 is a safe bet, increase it if necessary max_side = 1400 # valid when use nnvm_rpn_target, controls the rpn target assign class anchor_assign: # number of pixels the anchor box could extend out of the image border allowed_border = 0 # iou lower bound with groundtruth box for foreground anchor pos_thr = 0.7 # iou upper bound with groundtruth box for background anchor neg_thr = 0.3 # every groundtruth box will match the anchors overlaps most with it by default # increase the threshold to avoid matching low quality anchors min_pos_thr = 0.0 # number of anchors per image image_anchor = 256 # fraction of foreground anchors per image pos_fraction = 0.5 # rpn head structure class head: # number of channels for the 3x3 conv in rpn head conv_channel = 256 # mean and std for rpn regression target mean = (0, 0, 0, 0) std = (1, 1, 1, 1) # the proposal generation for RCNN class proposal: # number of top-scored proposals to take before NMS pre_nms_top_n = 2000 if is_train else 1000 # number of top-scored proposals to take after NMS post_nms_top_n = 2000 if is_train else 1000 # proposal NMS threshold nms_thr = 0.7 # min proposal box to keep, 0 means keep all min_bbox_side = 0 # the proposal sampling for RCNN during training class subsample_proposal: # add gt to proposals proposal_wo_gt = False # number of proposals sampled per image during training image_roi = 512 # the maxinum fraction of foreground proposals fg_fraction = 0.25 # iou lower bound with gt bbox for foreground proposals fg_thr = 0.5 # iou upper bound with gt bbox for background proposals bg_thr_hi = 0.5 # iou lower bound with gt bbox for background proposals # set to non-zero value could remove some trivial background proposals bg_thr_lo = 0.0 # the target encoding for RCNN bbox head class bbox_target: # 1(background) + num_class # could be num_class if using sigmoid activition instead of softmax one num_reg_class = 1 + 80 # share the regressor for all classes class_agnostic = False # the mean, std, and weight for bbox head regression target weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer # num_class may be different from RpnParam.bbox_target.num_reg_class # if the class_agnostic regressor is adopted num_class = 1 + 80 image_roi = RpnParam.subsample_proposal.image_roi batch_image = General.batch_image class regress_target: class_agnostic = RpnParam.bbox_target.class_agnostic mean = RpnParam.bbox_target.mean std = RpnParam.bbox_target.std class MaskParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer # output resolution of mask head resolution = 28 # number of channels for 3x3 convs in mask head dim_reduced = 256 # mask head only trains on foreground proposals # so we discard all the background proposals to save computation num_fg_roi = int(RpnParam.subsample_proposal.image_roi * RpnParam.subsample_proposal.fg_fraction) class RoiParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer # Each RoI is pooled into an out_size x out_size fixed-length representation out_size = 7 # the total stride of the feature map to pool from stride = (4, 8, 16, 32) # FPN specific configs # objects of size in [224^2, 448^2) will be assgin to P4 roi_canonical_scale = 224 roi_canonical_level = 4 class MaskRoiParam: # you can control the FP16 option and normalizer for each individual component fp16 = General.fp16 normalizer = NormalizeParam.normalizer # Each RoI is pooled into an out_size x out_size fixed-length representation out_size = 14 # the total stride of the feature map to pool from stride = (4, 8, 16, 32) # FPN specific configs # objects of size in [224^2, 448^2) will be assgin to P4 roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: # specify the roidbs to read for training/validation if is_train: # == coco_train2017 image_set = ("coco_train2014", "coco_valminusminival2014") else: # == coco_val2017 image_set = ("coco_minival2014", ) class OptimizeParam: class optimizer: type = "sgd" # learning rate will automaticly adapt to different batch size # the base learning rate is 0.02 for 16 images lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: # correspond to the 1x, 2x, ... training schedule mult = 2 begin_epoch = 0 end_epoch = 6 * mult lr_mode = "step" # or "cosine" # lr step factor lr_factor = 0.1 # lr step iterations if mult <= 1: lr_iter = [ 60000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] else: # follow the practice in arXiv:1811.08883 # reduce the lr in the last 60k and 20k iterations lr_iter = [ -60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), -20000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] # follow the practice in arXiv:1706.02677 class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 500 class TestParam: # detection below min_det_score will be removed in the evaluation min_det_score = 0.05 # only the top max_det_per_image detecitons will be evaluated max_det_per_image = 100 # callback, useful in multi-scale testing process_roidb = lambda x: x # callback, useful in scale-aware post-processing process_output = lambda x, y: process_output(x, y) # the model name and epoch used during test # by default the last checkpoint is employed # user can override this with --epoch N when invoking script class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" # or "softnms" thr = 0.5 # we make use of the coco test toolchain # if no coco format annotation file is specified # test script will generate one on the fly from roidb class coco: annotation = "data/coco/annotations/instances_minival2014.json" # compose the components to for a detector backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam, MaskParam) roi_extractor = RoiExtractor(RoiParam) mask_roi_extractor = RoiExtractor(MaskRoiParam) bbox_head = BboxHead(BboxParam) mask_head = MaskHead(BboxParam, MaskParam, MaskRoiParam) bbox_post_processer = BboxPostProcessor(TestParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, mask_roi_extractor, bbox_head, mask_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, mask_roi_extractor, bbox_head, mask_head, bbox_post_processer) class ModelParam: train_symbol = train_sym test_symbol = test_sym # training model from scratch from_scratch = False # use random seed when initializating random = True # sublinear memory checkpointing memonger = False # checkpointing up to a layer # recompute early stage of a network is cheaper memonger_until = "stage3_unit21_plus" class pretrain: # the model name and epoch used for initialization prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 # any params partially match the fixed_param will be fixed # fixed params will not be updated fixed_param = ["conv0", "stage1", "gamma", "beta"] # any params partially match the excluded_param will not be fixed excluded_param = ["mask_fcn"] # callback, useful for adding cached anchor or complex initialization def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) # data processing class NormParam: # mean/std for input image mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: # the input is resized to a short side not exceeding short # and a long side not exceeding long short = 800 long = 1333 # SimpleDet is written in MXNet symbolic API which features the fastest # execution while requires static input shape # All the inputs are padded to the maximum shape item on the dataset class PadParam: # the resized input is padded to short x long with 0 in bottom-right corner short = 800 long = 1333 max_num_gt = 100 max_len_gt_poly = 2500 # this control the rpn target generation offloaded to CPU data loader # refer to RpnParam.anchor_generate for more infos class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) # the shorts and longs have to be pre-computed since the # loader knows nothing of the network # the downsampled side can be calculated as ceil(side / 2) self.short = (200, 100, 50, 25, 13) self.long = (334, 167, 84, 42, 21) scales = (8, ) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 # align blobs name between loader and network class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.maskrcnn.input import PreprocessGtPoly, EncodeGtPoly, \ Resize2DImageBboxMask, Flip2DImageBboxMask, Pad2DImageBboxMask from models.FPN.input import PyramidAnchorTarget2D # modular data augmentation design if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), PreprocessGtPoly(), Resize2DImageBboxMask(ResizeParam), Flip2DImageBboxMask(), EncodeGtPoly(PadParam), Pad2DImageBboxMask(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["im_info", "gt_bbox", "gt_poly"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric from models.maskrcnn.metric import SigmoidCELossMetric from mxboard import SummaryWriter # summary writer logs metric to tensorboard for a better track of training sw = SummaryWriter(logdir="./tflogs", flush_secs=5) rpn_acc_metric = metric.AccWithIgnore( name="RpnAcc", output_names=["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], label_names=[], summary=sw) rpn_l1_metric = metric.L1( name="RpnL1", output_names=["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], label_names=[], summary=sw) box_acc_metric = metric.AccWithIgnore( name="RcnnAcc", output_names=["bbox_cls_loss_output", "bbox_label_blockgrad_output"], label_names=[], summary=sw) box_l1_metric = metric.L1( name="RcnnL1", output_names=["bbox_reg_loss_output", "bbox_label_blockgrad_output"], label_names=[], summary=sw) mask_cls_metric = SigmoidCELossMetric(name="MaskCE", output_names=["mask_loss_output"], label_names=[], summary=sw) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric, mask_cls_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False long_side = 1280 short_side = 960 class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False use_symbolic_proposal = None class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 max_side = 1280 class anchor_assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.3 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.5 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "1st" loss_weight = 1.0 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.6 bg_thr_hi = 0.6 bg_thr_lo = 0.6 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam2nd: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "2nd" loss_weight = 0.5 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.05, 0.05, 0.1, 0.1) class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.7 bg_thr_hi = 0.7 bg_thr_lo = 0.7 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.05, 0.05, 0.1, 0.1) class BboxParam3rd: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "3rd" loss_weight = 0.25 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.033, 0.033, 0.067, 0.067) class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("vending_train", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) bbox_head_2nd = BboxHead(BboxParam2nd) bbox_head_3rd = BboxHead(BboxParam3rd) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, bbox_head_2nd, bbox_head_3rd) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, bbox_head_2nd, bbox_head_3rd) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class QuantizeTrainingParam: quantize_flag = False # quantized_op = ("Convolution", "FullyConnected", "Deconvolution","Concat", "Pooling", "add_n", "elemwise_add") quantized_op = ("Convolution", "FullyConnected", "Deconvolution") class WeightQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = True is_weight_perchannel = False quant_mode = "minmax" class ActQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = False is_weight_perchannel = False quant_mode = "minmax" class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.6 class coco: annotation = "data/coco/annotations/instances_val2017.json" # data processing class NormParam: mean = (123.675, 116.28, 103.53) # RGB order std = (58.395, 57.12, 57.375) # data processing class ResizeParam: short = 960 long = 1280 class PadParam: short = 960 long = 1280 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() self.mean = (0, 0, 0, 0) self.std = (1, 1, 1, 1) class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (240, 120, 60, 30, 15) self.long = (320, 160, 80, 40, 20) scales = (8, ) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.3 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) # for bbox, the label is generated in network so it is an output # stage1 metric box_acc_metric_1st = metric.AccWithIgnore( "RcnnAcc_1st", ["bbox_cls_loss_1st_output", "bbox_label_blockgrad_1st_output"], []) box_l1_metric_1st = metric.L1( "RcnnL1_1st", ["bbox_reg_loss_1st_output", "bbox_label_blockgrad_1st_output"], []) # stage2 metric box_acc_metric_2nd = metric.AccWithIgnore( "RcnnAcc_2nd", ["bbox_cls_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], []) box_l1_metric_2nd = metric.L1( "RcnnL1_2nd", ["bbox_reg_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], []) # stage3 metric box_acc_metric_3rd = metric.AccWithIgnore( "RcnnAcc_3rd", ["bbox_cls_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], []) box_l1_metric_3rd = metric.L1( "RcnnL1_3rd", ["bbox_reg_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric_1st, box_l1_metric_1st, box_acc_metric_2nd, box_l1_metric_2nd, box_acc_metric_3rd, box_l1_metric_3rd ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = True loader_worker = 24 class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = normalizer_factory(type="localbn") class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False class anchor_generate: scale = (8, ) ratio = (0.5, 1, 2) stride = (4, 8, 16, 32, 64) max_side = 1450 class anchor_assign: allowed_border = 1000 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 3000 if is_train else 2000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 2 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.5 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 1 image_roi = 512 batch_image = General.batch_image repeat_time = 4 refine_mode = True class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("crowdhuman_train", ) else: image_set = ("crowdhuman_val", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = [ "conv0", "stage1", "bn_gamma", "bn_beta", "bn0", "bn1", "bn2", "bn3", "bn4" ] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 10 lr_iter = [ 14960 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 17765 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 300 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "set_nms" thr = 0.5 class coco: annotation = "/mnt/truenas/scratch/czh/data/crowdhuman/annotations/annotation_val.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: short = 800 long = 1400 class PadParam: short = 800 long = 1400 max_num_gt = 500 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (200, 100, 50, 25, 13) self.long = (350, 175, 88, 44, 22) scales = (8) aspects = RpnParam.anchor_generate.ratio class assign: allowed_border = 1000 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.crowdhuman.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) emd_metric = metric.ScalarLoss("emd", [ 'cls_reg_loss_output', ], []) refine_emd_metric = metric.ScalarLoss("refine_emd", [ 'refine_cls_reg_loss_output', ], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, emd_metric, refine_emd_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list