def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 1 if is_train else 1 fp16 = False class Trident: num_branch = 3 if is_train else 1 train_scaleaware = False test_scaleaware = False branch_ids = range(num_branch) if is_train else [1] branch_dilates = [1, 2, 3] if is_train else [2] valid_ranges = [(0, -1), (0, -1), (0, -1)] if is_train else [(0, -1)] valid_ranges_on_origin = True branch_bn_shared = True branch_conv_shared = True branch_deform = False assert num_branch == len(branch_ids) assert num_branch == len(valid_ranges) class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 101 num_branch = Trident.num_branch branch_ids = Trident.branch_ids branch_dilates = Trident.branch_dilates branch_bn_shared = Trident.branch_bn_shared branch_conv_shared = Trident.branch_conv_shared branch_deform = Trident.branch_deform class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image * Trident.num_branch class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 500 if is_train else 300 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = True image_roi = 128 fg_fraction = 0.5 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 128 batch_image = General.batch_image * Trident.num_branch class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol( backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch, scaleaware=Trident.train_scaleaware) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head, Trident.num_branch) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 5 class schedule: begin_epoch = 0 end_epoch = 12 lr_iter = [ 120000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0.001 max_det_per_image = 100 process_roidb = lambda x: x if Trident.test_scaleaware: process_output = lambda x, y: process_branch_outputs( x, Trident.num_branch, Trident.valid_ranges, Trident. valid_ranges_on_origin) else: process_output = lambda x, y: x process_rpn_output = lambda x, y: process_branch_rpn_outputs( x, Trident.num_branch) class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) class ResizeParam: short = 800 long = 1200 if is_train else 2000 class PadParam: short = 800 long = 1200 if is_train else 2000 max_num_gt = 100 class ScaleRange: valid_ranges = Trident.valid_ranges cal_on_origin = Trident.valid_ranges_on_origin # True: valid_ranges on origin image scale / valid_ranges on resized image scale class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class trident: invalid_anchor_threshd = 0.3 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.tridentnet.input import ScaleAwareRange, TridentAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), ScaleAwareRange(ScaleRange), TridentAnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] if Trident.train_scaleaware: data_name.append("valid_ranges") label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) ratio = (0.5, 1.0, 2.0) stride = (8, 16, 32, 64, 128) image_anchor = None class head: conv_channel = 256 mean = None std = None class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None min_det_score = 0.05 # filter score in network class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class focal_loss: alpha = 0.25 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class regress_target: class_agnostic = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-101" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.005 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 500 class TestParam: min_det_score = 0 # filter appended boxes max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.short = (100, 50, 25, 13, 7) self.long = (167, 84, 42, 21, 11) self.stride = (8, 16, 32, 64, 128) scales = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 9999 pos_thr = 0.5 neg_thr = 0.4 min_pos_thr = 0.0 class sample: image_anchor = None pos_fraction = None class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.retinanet.input import PyramidAnchorTarget2D, Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), PyramidAnchorTarget2D(AnchorTarget2DParam()), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] from models.retinanet import metric rpn_acc_metric = metric.FGAccMetric("FGAcc", ["cls_loss_output"], ["rpn_cls_label"]) metric_list = [rpn_acc_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 81 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-101" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) # data processing class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (200, 100, 50, 25, 13) self.long = (334, 167, 84, 42, 21) scales = (8) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), PyramidAnchorTarget2D(AnchorTarget2DParam()), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 3 if is_train else 1 fp16 = True class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="syncbn", ndev=8, wd_mult=1.0) class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer dim_reduced = 384 num_stage = 3 S0_kernel = 1 class RpnParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image sync_loss = True class anchor_generate: scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) ratio = (0.5, 1.0, 2.0) stride = (8, 16, 32, 64, 128) image_anchor = None class head: conv_channel = 256 mean = None std = None class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None min_det_score = 0.05 # filter score in network class focal_loss: alpha = 0.25 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class DatasetParam: if is_train: image_set = ("coco_train2017", "coco_val2017") else: image_set = ("coco_test-dev2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage4_unit3_relu" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 25 lr_iter = [ 15272 * 15 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 15272 * 20 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.001 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image iter = 15272 * 1 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0 # filter appended boxes max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_val2017.json" # data processing class NormParam: mean = (123.688, 116.779, 103.939) # RGB order std = (58.393, 57.12, 57.375) class ResizeParam: short = 1280 long = 1280 scale_min = 0.8 scale_max = 1.2 class PadParam: short = ResizeParam.short long = ResizeParam.long max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.short = (160, 80, 40, 20, 10) self.long = (160, 80, 40, 20, 10) self.stride = (8, 16, 32, 64, 128) scales = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 9999 pos_thr = 0.5 neg_thr = 0.5 min_pos_thr = 0.0 class sample: image_anchor = None pos_fraction = None class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.NASFPN.input import RandResizeCrop2DImageBbox, ResizeCrop2DImageBbox from models.retinanet.input import PyramidAnchorTarget2D, Norm2DImage, \ AverageFgCount if is_train: transform = { "sample": [ ReadRoiRecord(None), Norm2DImage(NormParam), RandResizeCrop2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), PyramidAnchorTarget2D(AnchorTarget2DParam()), RenameRecord(RenameParam.mapping) ], "batch": [AverageFgCount("rpn_fg_count")] } data_name = ["data"] label_name = [ "rpn_cls_label", "rpn_fg_count", "rpn_reg_target", "rpn_reg_weight" ] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), ResizeCrop2DImageBbox(ResizeParam), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] from models.retinanet import metric rpn_acc_metric = metric.FGAccMetric("FGAcc", ["cls_loss_output"], ["rpn_cls_label"]) metric_list = [rpn_acc_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = True loader_worker = 24 class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = normalizer_factory(type="localbn") class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False class anchor_generate: scale = (8, ) ratio = (0.5, 1, 2) stride = (4, 8, 16, 32, 64) max_side = 1450 class anchor_assign: allowed_border = 1000 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 3000 if is_train else 2000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 2 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.5 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 1 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("crowdhuman_train", ) else: image_set = ("crowdhuman_val", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = [ "conv0", "stage1", "bn_gamma", "bn_beta", "bn0", "bn1", "bn2", "bn3", "bn4" ] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 10 lr_iter = [ 14960 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 17765 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 300 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "/mnt/truenas/scratch/czh/data/crowdhuman/annotations/annotation_val.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: short = 800 long = 1400 class PadParam: short = 800 long = 1400 max_num_gt = 500 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (200, 100, 50, 25, 13) self.long = (350, 175, 88, 44, 22) scales = (8) aspects = RpnParam.anchor_generate.ratio class assign: allowed_border = 1000 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.crowdhuman.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) emd_metric = metric.ScalarLoss("emd", [ 'cls_reg_loss_output', ], []) metric_list = [rpn_acc_metric, rpn_l1_metric, emd_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 8 if is_train else 1 fp16 = True loader_worker = 8 class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="localbn", ndev=len(KvstoreParam.gpus)) # normalizer = normalizer_factory(type="gn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False class anchor_generate: scale = (4, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 max_side = 700 class anchor_assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 81 class_agnostic = False weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = False mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("coco_train2017", ) total_image = 82783 + 35504 else: image_set = ("coco_val2017", ) total_image = 5000 backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = True random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = None epoch = 0 fixed_param = [] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 1e-4 clip_gradient = None class schedule: mult = 6 begin_epoch = 0 end_epoch = 6 * mult if mult <= 2: lr_iter = [ 60000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * mult * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] else: # follow the setting in Rethinking ImageNet Pre-training # reduce the lr in the last 60k and 20k iterations lr_iter = [ (DatasetParam.total_image * 2 // 16 * end_epoch - 60000) * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), (DatasetParam.total_image * 2 // 16 * end_epoch - 20000) * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) # data processing class ResizeParam: short = 400 long = 600 class PadParam: short = 400 long = 600 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (100, 50, 25, 13, 7) self.long = (150, 75, 38, 19, 10) scales = (4) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=8, wd_mult=1.0) normalizer = normalizer_factory(type="gn") class BackboneParam: fp16 = General.fp16 # normalizer = NormalizeParam.normalizer normalizer = normalizer_factory(type="fixbn") class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class HeadParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class point_generate: num_points = 9 scale = 4 stride = (8, 16, 32, 64, 128) # transform = "minmax" transform = "moment" class head: conv_channel = 256 point_conv_channel = 256 mean = None std = None class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None class point_target: target_scale = 4 num_pos = 1 class bbox_target: pos_iou_thr = 0.5 neg_iou_thr = 0.5 min_pos_iou = 0.0 class focal_loss: alpha = 0.25 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class regress_target: class_agnostic = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) head = Head(HeadParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] excluded_param = ["gn"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.005 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 500 class TestParam: min_det_score = 0.05 # filter appended boxes max_det_per_image = 100 def process_roidb(x): return x def process_output(x, y): return x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.retinanet.input import Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] from models.retinanet import metric as cls_metric import core.detection_metric as box_metric cls_acc_metric = cls_metric.FGAccMetric( "FGAcc", ["cls_loss_output", "point_refine_labels_output"], []) box_init_l1_metric = box_metric.L1( "InitL1", ["pts_init_loss_output", "points_init_labels_output"], []) box_refine_l1_metric = box_metric.L1( "RefineL1", ["pts_refine_loss_output", "point_refine_labels_output"], []) metric_list = [cls_acc_metric, box_init_l1_metric, box_refine_l1_metric] return General, KvstoreParam, HeadParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") transform = [ ReadRoiRecord(None), RandResizeCrop2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] DEBUG = True with open("data/cache/coco_val2017.roidb", "rb") as fin: roidb = pkl.load(fin) roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0] roidb = [roidb[i] for i in np.random.choice(len(roidb), 20, replace=False)] print(roidb[0]) flipped_roidb = [] for rec in roidb:
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = True image_roi = 256 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 256 batch_image = General.batch_image class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 12 lr_iter = [ 120000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 160000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestScaleParam: short_ranges = [600, 800, 1000, 1200] long_ranges = [2000, 2000, 2000, 2000] @staticmethod def add_resize_info(roidb): ms_roidb = [] for r_ in roidb: for short, long in zip(TestScaleParam.short_ranges, TestScaleParam.long_ranges): r = r_.copy() r["resize_long"] = long r["resize_short"] = short ms_roidb.append(r) return ms_roidb class TestParam: min_det_score = 0.001 max_det_per_image = 0 process_roidb = TestScaleParam.add_resize_info process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class ResizeParam: short = 800 long = 1200 if is_train else 2000 class RandResizeParam: short = None # generate on the fly long = None short_ranges = [600, 800, 1000, 1200] long_ranges = [2000, 2000, 2000, 2000] class RandCropParam: mode = "center" # random or center short = 800 long = 1200 class PadParam: short = 800 long = 1200 if is_train else 2000 max_num_gt = 100 class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, RandResize2DImageBbox, RandCrop2DImageBbox, \ Resize2DImageBboxByRoidb, ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, AnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), RandResize2DImageBbox(RandResizeParam), RandCrop2DImageBbox(RandCropParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), #Resize2DImageBbox(ResizeParam), Resize2DImageBboxByRoidb(), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 20 depth = 101 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 3 if is_train else 1 fp16 = True class Trident: num_branch = 3 train_scaleaware = True test_scaleaware = True branch_ids = range(num_branch) branch_dilates = [1, 2, 3] valid_ranges = [(0, 150), (50, 270), (150, -1)] valid_ranges_on_origin = False branch_bn_shared = False branch_conv_shared = True branch_deform = True class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) # normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 depth = General.depth normalizer = NormalizeParam.normalizer num_branch = Trident.num_branch branch_ids = Trident.branch_ids branch_dilates = Trident.branch_dilates branch_bn_shared = Trident.branch_bn_shared branch_conv_shared = Trident.branch_conv_shared branch_deform = Trident.branch_deform class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image * Trident.num_branch class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 500 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = True image_roi = 128 fg_fraction = 0.5 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 128 batch_image = General.batch_image * Trident.num_branch class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014", "coco_minival2014") else: image_set = ("coco_test-dev2017", ) # image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol( backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch, scaleaware=Trident.train_scaleaware) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol( backbone, neck, rpn_head, roi_extractor, bbox_head, num_branch=Trident.num_branch) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = True memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-%d" % General.depth epoch = 0 fixed_param = [] def process_weight(sym, arg_params, aux_params): import re import logging logger = logging.getLogger() # for trident non-shared initialization for k in sym.list_arguments(): branch_name = re.sub('_branch\d+', '', k) if k != branch_name and branch_name in arg_params: arg_params[k] = arg_params[branch_name] logger.info('init arg {} with {}'.format(k, branch_name)) for k in sym.list_auxiliary_states(): branch_name = re.sub('_branch\d+', '', k) if k != branch_name and branch_name in aux_params: aux_params[k] = aux_params[branch_name] logger.info('init aux {} with {}'.format(k, branch_name)) class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 5 class schedule: begin_epoch = 0 end_epoch = 18 lr_iter = [180000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 240000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestScaleParam: short_ranges = [600, 800, 1000, 1200] long_ranges = [2000, 2000, 2000, 2000] @staticmethod def add_resize_info(roidb): ms_roidb = [] for r_ in roidb: for short, long in zip(TestScaleParam.short_ranges, TestScaleParam.long_ranges): r = r_.copy() r["resize_long"] = long r["resize_short"] = short ms_roidb.append(r) return ms_roidb class TestParam: min_det_score = 0.001 max_det_per_image = 100 process_roidb = TestScaleParam.add_resize_info if Trident.test_scaleaware: process_output = lambda x, y: process_branch_outputs( x, Trident.num_branch, Trident.valid_ranges, Trident.valid_ranges_on_origin) else: process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: from operator_py.nms import cython_soft_nms_wrapper type = cython_soft_nms_wrapper thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class ResizeParam: short = 800 long = 1200 if is_train else 2000 class RandResizeParam: short = None # generate on the fly long = None short_ranges = [600, 800, 1000, 1200] long_ranges = [2000, 2000, 2000, 2000] class RandCropParam: mode = "center" # random or center short = 800 long = 1200 class PadParam: short = 800 long = 1200 if is_train else 2000 max_num_gt = 100 class ScaleRange: valid_ranges = Trident.valid_ranges cal_on_origin = Trident.valid_ranges_on_origin # True: valid_ranges on origin image scale / valid_ranges on resized image scale class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class trident: invalid_anchor_threshd = 0.3 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, RandResize2DImageBbox, RandCrop2DImageBbox, Resize2DImageBboxByRoidb, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.tridentnet.input import ScaleAwareRange, TridentAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), RandResize2DImageBbox(RandResizeParam), RandCrop2DImageBbox(RandCropParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), ScaleAwareRange(ScaleRange), TridentAnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] if Trident.train_scaleaware: data_name.append("valid_ranges") label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Resize2DImageBboxByRoidb(), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"] ) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"] ) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [] ) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [] ) metric_list = [rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: num_class = 1 + 80 fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (4 * 2**0, 4 * 2**(1.0 / 3.0), 4 * 2**(2.0 / 3.0)) ratio = (0.5, 1.0, 2.0) stride = (8, 16, 32, 64, 128) max_side = 1440 class anchor_assign: allowed_border = 9999 bbox_thr = 0.6 pre_anchor_top_n = 50 class head: conv_channel = 256 mean = (.0, .0, .0, .0) std = (0.1, 0.1, 0.2, 0.2) class proposal: pre_nms_top_n = 1000 post_nms_top_n = None nms_thr = None min_bbox_side = None class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class focal_loss: alpha = 0.5 gamma = 2.0 class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = None image_roi = None batch_image = None class regress_target: class_agnostic = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = None stride = None class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) test_sym = None else: train_sym = None test_sym = detector.get_test_symbol(backbone, neck, rpn_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-101" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class OptimizeParam: class optimizer: type = "sgd" lr = 0.005 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.005 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3 iter = 1000 class TestParam: min_det_score = 0.05 # filter appended boxes max_det_per_image = 100 def process_roidb(x): return x # noqa: E704 def process_output(x, y): return x # noqa: E704 class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord from models.retinanet.input import Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric pos_loss = metric.ScalarLoss("PosLoss", ["positive_loss_output"], []) neg_loss = metric.ScalarLoss("NegLoss", ["negative_loss_output"], []) metric_list = [pos_loss, neg_loss] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: use_groupsoftmax = True log_frequency = 20 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = True class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: if is_train: normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) else: normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = normalizer_factory( type="fixbn") # old model does not use BN in RPN head batch_image = General.batch_image use_groupsoftmax = General.use_groupsoftmax num_class = (1 + 2) if use_groupsoftmax else 2 class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = True image_roi = 256 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 83 image_roi = 256 batch_image = General.batch_image use_groupsoftmax = General.use_groupsoftmax class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014", "cctsdb_train") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-101" epoch = 0 fixed_param = [] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 5 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "/ws/data/opendata/coco/annotations/instances_minival2014.json" # data processing class GroupParam: # box 83 classes boxv0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83], dtype=np.float32) #COCO benchmark boxv1 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \ 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 0, 0 ], dtype=np.float32) #CCTSDB benchmark boxv2 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 82, 83], dtype=np.float32) rpnv0 = np.array([0, 1, 2], dtype=np.float32) # rpn 3 classes rpnv1 = np.array([0, 1, 0], dtype=np.float32) # COCO benchmark rpnv2 = np.array([0, 0, 2], dtype=np.float32) # CCTSDB benchmark rpn_groups = [rpnv0, rpnv1, rpnv2] box_groups = [boxv0, boxv1, boxv2] class ResizeParam: short = 800 long = 1200 if is_train else 2000 class PadParam: short = 800 long = 1200 max_num_gt = 100 class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) use_groupsoftmax = General.use_groupsoftmax class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 def gtclass2rpn(gtclass): class_gap = 80 gtclass[gtclass > class_gap] = -1 gtclass[gtclass > 0] = 1 gtclass[gtclass < 0] = 2 return gtclass class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, AnchorTarget2D, GroupRead if is_train: transform = [ ReadRoiRecord(None), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] if General.use_groupsoftmax: data_name.append("rpn_group") data_name.append("box_group") transform.append(GroupRead(GroupParam)) else: transform = [ ReadRoiRecord(None), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], []) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 20 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False loader_worker = 4 loader_collector = 2 profile = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: normalizer = normalizer_factory(type="fix") class BboxParam: pass class RoiParam: pass class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_thresh = 0.05 pre_nms_top_n = 1000 post_nms_top_n = 1000 fpn_box_max_n = 100 nms_thr = 0.6 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class loss_setting: focal_loss_alpha = 0.25 focal_loss_gamma = 2.0 ignore_label = -1 ignore_offset = -1 class FCOSParam: num_classifier = 81 - 1 # COCO: 80 object + 1 background stride = (8, 16, 32, 64, 128) # data processing class NormParam: mean = (122.7717, 115.9465, 102.9801) # RGB order std = (1.0, 1.0, 1.0) class ResizeParam: short = 800 long = 1333 class PadParam: short = 800 long = 1333 max_num_gt = 100 class FCOSFPNAssignParam: stages = [ [-1, 64], [64, 128], [128, 256], [256, 512], [512, INF], ] stride = (8, 16, 32, 64, 128) num_classifier = 81 - 1 # COCO: 80 object + 1 background ignore_label = RpnParam.loss_setting.ignore_label ignore_offset = RpnParam.loss_setting.ignore_offset data_size = [PadParam.short, PadParam.long] class RenameParam: mapping = dict(image="data") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = normalizer_factory(type="dummy") class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) # throw out param used as custom op's input global throwout_param throwout_param = FCOSFPNAssignParam # This line MUST be in front of rpn_head = RpnHead(RpnParam) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) rpn_test_sym = None test_sym = None else: rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) train_sym = None class ModelParam: train_symbol = train_sym test_symbol = rpn_test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-101" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 16 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 16 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.6 class coco: annotation = "data/coco/annotations/instances_minival2014.json" from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info"] label_name = ["gt_bbox"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import models.FCOS.metric as metric centerness_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride, pred_id_start=0, pred_id_end=1, name='centernessloss_meter') cls_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride, pred_id_start=1, pred_id_end=2, name='clsloss_meter') reg_loss_metric = metric.LossMeter(RpnParam.FCOSParam.stride, pred_id_start=2, pred_id_end=3, name='offsetloss_meter') metric_list = [centerness_loss_metric, cls_loss_metric, reg_loss_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False class KvstoreParam: kvstore = "nccl" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class BboxParam: pass class RoiParam: pass class DatasetParam: if is_train: image_set = ("coco_train2014", "coco_valminusminival2014") else: image_set = ("coco_minival2014", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = None class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.0 iter = 3000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class ResizeParam: short = 800 long = 1200 if is_train else 2000 class PadParam: short = 800 long = 1200 if is_train else 2000 max_num_gt = 100 class AnchorTarget2DParam: class generate: short = 800 // 16 long = 1200 // 16 stride = 16 scales = (2, 4, 8, 16, 32) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, AnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"]) rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"]) metric_list = [rpn_acc_metric, rpn_l1_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False long_side = 1200 short_side = 800 class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image use_symbolic_proposal = None class anchor_generate: scale = (2, 4, 8, 16, 32) ratio = (0.5, 1.0, 2.0) stride = 16 max_side = General.long_side image_anchor = 256 class head: conv_channel = 512 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 12000 if is_train else 6000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.0 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 80 image_roi = 512 batch_image = General.batch_image class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = 16 class DatasetParam: if is_train: image_set = ("coco_train2017", ) else: image_set = ("coco_minival2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" process_weight = lambda sym, arg, aux: \ add_anchor_to_arg( sym, arg, aux, RpnParam.anchor_generate.max_side, RpnParam.anchor_generate.stride,RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class pretrain: prefix = "pretrain_model/resnet%s_v1b" % BackboneParam.depth epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] class QuantizeTrainingParam: quantize_flag = False # quantized_op = ("Convolution", "FullyConnected", "Deconvolution","Concat", "Pooling", "add_n", "elemwise_add") quantized_op = ("Convolution", "FullyConnected", "Deconvolution") class WeightQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = True is_weight_perchannel = False quant_mode = "minmax" class ActQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = False is_weight_perchannel = False quant_mode = "minmax" class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = 35 class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)] class warmup: type = "gradual" lr = 0.0 iter = 750 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.5 class coco: annotation = "data/coco/annotations/instances_minival2014.json" # data processing class NormParam: mean = tuple(i * 255 for i in (0.485, 0.456, 0.406)) # RGB order std = tuple(i * 255 for i in (0.229, 0.224, 0.225)) class ResizeParam: short = General.short_side long = General.long_side class PadParam: short = ResizeParam.short long = ResizeParam.long max_num_gt = 100 class AnchorTarget2DParam: class generate: short = ResizeParam.short // RpnParam.anchor_generate.stride long = ResizeParam.long // RpnParam.anchor_generate.stride stride = RpnParam.anchor_generate.stride scales = RpnParam.anchor_generate.scale aspects = RpnParam.anchor_generate.ratio class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.0 class sample: image_anchor = RpnParam.anchor_generate.image_anchor pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, AnchorTarget2D, Norm2DImage, Pad2DImage if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), AnchorTarget2D(AnchorTarget2DParam), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "gt_bbox"] label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Pad2DImage(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output"], ["rpn_cls_label"] ) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output"], ["rpn_cls_label"] ) # for bbox, the label is generated in network so it is an output box_acc_metric = metric.AccWithIgnore( "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [] ) box_l1_metric = metric.L1( "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [] ) metric_list = [rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list
def get_config(is_train): class General: log_frequency = 10 name = __name__.rsplit("/")[-1].rsplit(".")[-1] batch_image = 2 if is_train else 1 fp16 = False long_side = 1280 short_side = 960 class KvstoreParam: kvstore = "local" batch_image = General.batch_image gpus = [0, 1, 2, 3, 4, 5, 6, 7] fp16 = General.fp16 class NormalizeParam: # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus)) normalizer = normalizer_factory(type="fixbn") class BackboneParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer depth = 50 class NeckParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer class RpnParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer batch_image = General.batch_image nnvm_proposal = True nnvm_rpn_target = False use_symbolic_proposal = None class anchor_generate: scale = (8, ) ratio = (0.5, 1.0, 2.0) stride = (4, 8, 16, 32, 64) image_anchor = 256 max_side = 1280 class anchor_assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.3 image_anchor = 256 pos_fraction = 0.5 class head: conv_channel = 256 mean = (0, 0, 0, 0) std = (1, 1, 1, 1) class proposal: pre_nms_top_n = 2000 if is_train else 1000 post_nms_top_n = 2000 if is_train else 1000 nms_thr = 0.7 min_bbox_side = 0 class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.5 bg_thr_hi = 0.5 bg_thr_lo = 0.5 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "1st" loss_weight = 1.0 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.6 bg_thr_hi = 0.6 bg_thr_lo = 0.6 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.1, 0.1, 0.2, 0.2) class BboxParam2nd: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "2nd" loss_weight = 0.5 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.05, 0.05, 0.1, 0.1) class subsample_proposal: proposal_wo_gt = False image_roi = 512 fg_fraction = 0.25 fg_thr = 0.7 bg_thr_hi = 0.7 bg_thr_lo = 0.7 class bbox_target: num_reg_class = 2 class_agnostic = True weight = (1.0, 1.0, 1.0, 1.0) mean = (0.0, 0.0, 0.0, 0.0) std = (0.05, 0.05, 0.1, 0.1) class BboxParam3rd: fp16 = General.fp16 normalizer = NormalizeParam.normalizer num_class = 1 + 54 image_roi = 512 batch_image = General.batch_image stage = "3rd" loss_weight = 0.25 class regress_target: class_agnostic = True mean = (0.0, 0.0, 0.0, 0.0) std = (0.033, 0.033, 0.067, 0.067) class subsample_proposal: proposal_wo_gt = None image_roi = None fg_fraction = None fg_thr = None bg_thr_hi = None bg_thr_lo = None class bbox_target: num_reg_class = None class_agnostic = None weight = None mean = None std = None class RoiParam: fp16 = General.fp16 normalizer = NormalizeParam.normalizer out_size = 7 stride = (4, 8, 16, 32) roi_canonical_scale = 224 roi_canonical_level = 4 class DatasetParam: if is_train: image_set = ("vending_train", ) else: image_set = ("coco_val2017", ) backbone = Backbone(BackboneParam) neck = Neck(NeckParam) rpn_head = RpnHead(RpnParam) roi_extractor = RoiExtractor(RoiParam) bbox_head = BboxHead(BboxParam) bbox_head_2nd = BboxHead(BboxParam2nd) bbox_head_3rd = BboxHead(BboxParam3rd) detector = Detector() if is_train: train_sym = detector.get_train_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, bbox_head_2nd, bbox_head_3rd) rpn_test_sym = None test_sym = None else: train_sym = None rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head) test_sym = detector.get_test_symbol(backbone, neck, rpn_head, roi_extractor, bbox_head, bbox_head_2nd, bbox_head_3rd) class ModelParam: train_symbol = train_sym test_symbol = test_sym rpn_test_symbol = rpn_test_sym from_scratch = False random = True memonger = False memonger_until = "stage3_unit21_plus" class pretrain: prefix = "pretrain_model/resnet-v1-50" epoch = 0 fixed_param = ["conv0", "stage1", "gamma", "beta"] def process_weight(sym, arg, aux): for stride in RpnParam.anchor_generate.stride: add_anchor_to_arg(sym, arg, aux, RpnParam.anchor_generate.max_side, stride, RpnParam.anchor_generate.scale, RpnParam.anchor_generate.ratio) class QuantizeTrainingParam: quantize_flag = False # quantized_op = ("Convolution", "FullyConnected", "Deconvolution","Concat", "Pooling", "add_n", "elemwise_add") quantized_op = ("Convolution", "FullyConnected", "Deconvolution") class WeightQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = True is_weight_perchannel = False quant_mode = "minmax" class ActQuantizeParam: delay_quant = 0 ema_decay = 0.99 grad_mode = "ste" is_weight = False is_weight_perchannel = False quant_mode = "minmax" class OptimizeParam: class optimizer: type = "sgd" lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image momentum = 0.9 wd = 0.0001 clip_gradient = None class schedule: begin_epoch = 0 end_epoch = 6 lr_iter = [ 60000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 * 16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image) ] class warmup: type = "gradual" lr = 0.01 / 8 * len( KvstoreParam.gpus) * KvstoreParam.batch_image / 3.0 iter = 500 class TestParam: min_det_score = 0.05 max_det_per_image = 100 process_roidb = lambda x: x process_output = lambda x, y: x class model: prefix = "experiments/{}/checkpoint".format(General.name) epoch = OptimizeParam.schedule.end_epoch class nms: type = "nms" thr = 0.6 class coco: annotation = "data/coco/annotations/instances_val2017.json" # data processing class NormParam: mean = (123.675, 116.28, 103.53) # RGB order std = (58.395, 57.12, 57.375) # data processing class ResizeParam: short = 960 long = 1280 class PadParam: short = 960 long = 1280 max_num_gt = 100 class AnchorTarget2DParam: def __init__(self): self.generate = self._generate() self.mean = (0, 0, 0, 0) self.std = (1, 1, 1, 1) class _generate: def __init__(self): self.stride = (4, 8, 16, 32, 64) self.short = (240, 120, 60, 30, 15) self.long = (320, 160, 80, 40, 20) scales = (8, ) aspects = (0.5, 1.0, 2.0) class assign: allowed_border = 0 pos_thr = 0.7 neg_thr = 0.3 min_pos_thr = 0.3 class sample: image_anchor = 256 pos_fraction = 0.5 class RenameParam: mapping = dict(image="data") from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \ ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \ RenameRecord, Norm2DImage from models.FPN.input import PyramidAnchorTarget2D if is_train: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), Flip2DImageBbox(), Pad2DImageBbox(PadParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data"] label_name = ["gt_bbox", "im_info"] if not RpnParam.nnvm_rpn_target: transform.append(PyramidAnchorTarget2D(AnchorTarget2DParam())) label_name += ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"] else: transform = [ ReadRoiRecord(None), Norm2DImage(NormParam), Resize2DImageBbox(ResizeParam), ConvertImageFromHwcToChw(), RenameRecord(RenameParam.mapping) ] data_name = ["data", "im_info", "im_id", "rec_id"] label_name = [] import core.detection_metric as metric rpn_acc_metric = metric.AccWithIgnore( "RpnAcc", ["rpn_cls_loss_output", "rpn_cls_label_blockgrad_output"], []) rpn_l1_metric = metric.L1( "RpnL1", ["rpn_reg_loss_output", "rpn_cls_label_blockgrad_output"], []) # for bbox, the label is generated in network so it is an output # stage1 metric box_acc_metric_1st = metric.AccWithIgnore( "RcnnAcc_1st", ["bbox_cls_loss_1st_output", "bbox_label_blockgrad_1st_output"], []) box_l1_metric_1st = metric.L1( "RcnnL1_1st", ["bbox_reg_loss_1st_output", "bbox_label_blockgrad_1st_output"], []) # stage2 metric box_acc_metric_2nd = metric.AccWithIgnore( "RcnnAcc_2nd", ["bbox_cls_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], []) box_l1_metric_2nd = metric.L1( "RcnnL1_2nd", ["bbox_reg_loss_2nd_output", "bbox_label_blockgrad_2nd_output"], []) # stage3 metric box_acc_metric_3rd = metric.AccWithIgnore( "RcnnAcc_3rd", ["bbox_cls_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], []) box_l1_metric_3rd = metric.L1( "RcnnL1_3rd", ["bbox_reg_loss_3rd_output", "bbox_label_blockgrad_3rd_output"], []) metric_list = [ rpn_acc_metric, rpn_l1_metric, box_acc_metric_1st, box_l1_metric_1st, box_acc_metric_2nd, box_l1_metric_2nd, box_acc_metric_3rd, box_l1_metric_3rd ] return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \ ModelParam, OptimizeParam, TestParam, \ transform, data_name, label_name, metric_list