Пример #1
0
def get_config(is_train):
    class General:
        log_frequency = 10
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = False

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        # normalizer = normalizer_factory(type="syncbn", ndev=len(KvstoreParam.gpus))
        normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        batch_image = General.batch_image

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 256
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 80
        image_roi = 256
        batch_image = General.batch_image

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2017", )
        else:
            image_set = ("coco_val2017", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-50"
            epoch = 0
            fixed_param = ["conv0", "stage1", "gamma", "beta"]

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 35

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "data/coco/annotations/instances_minival2014.json"

    # data processing
    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class PadParam:
        short = 800
        long = 1200 if is_train else 2000
        max_num_gt = 100

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
    else:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
def get_config(is_train):
    class General:
        use_groupsoftmax = True
        log_frequency = 20
        name = __name__.rsplit("/")[-1].rsplit(".")[-1]
        batch_image = 2 if is_train else 1
        fp16 = True

    class KvstoreParam:
        kvstore = "local"
        batch_image = General.batch_image
        gpus = [0, 1, 2, 3, 4, 5, 6, 7]
        fp16 = General.fp16

    class NormalizeParam:
        if is_train:
            normalizer = normalizer_factory(type="syncbn",
                                            ndev=len(KvstoreParam.gpus))
        else:
            normalizer = normalizer_factory(type="fixbn")

    class BackboneParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class NeckParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer

    class RpnParam:
        fp16 = General.fp16
        normalizer = normalizer_factory(
            type="fixbn")  # old model does not use BN in RPN head
        batch_image = General.batch_image
        use_groupsoftmax = General.use_groupsoftmax
        num_class = (1 + 2) if use_groupsoftmax else 2

        class anchor_generate:
            scale = (2, 4, 8, 16, 32)
            ratio = (0.5, 1.0, 2.0)
            stride = 16
            image_anchor = 256

        class head:
            conv_channel = 512
            mean = (0, 0, 0, 0)
            std = (1, 1, 1, 1)

        class proposal:
            pre_nms_top_n = 12000 if is_train else 6000
            post_nms_top_n = 2000 if is_train else 1000
            nms_thr = 0.7
            min_bbox_side = 0

        class subsample_proposal:
            proposal_wo_gt = True
            image_roi = 256
            fg_fraction = 0.25
            fg_thr = 0.5
            bg_thr_hi = 0.5
            bg_thr_lo = 0.0

        class bbox_target:
            num_reg_class = 2
            class_agnostic = True
            weight = (1.0, 1.0, 1.0, 1.0)
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class BboxParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        num_class = 1 + 83
        image_roi = 256
        batch_image = General.batch_image
        use_groupsoftmax = General.use_groupsoftmax

        class regress_target:
            class_agnostic = True
            mean = (0.0, 0.0, 0.0, 0.0)
            std = (0.1, 0.1, 0.2, 0.2)

    class RoiParam:
        fp16 = General.fp16
        normalizer = NormalizeParam.normalizer
        out_size = 7
        stride = 16

    class DatasetParam:
        if is_train:
            image_set = ("coco_train2014", "coco_valminusminival2014",
                         "cctsdb_train")
        else:
            image_set = ("coco_minival2014", )

    backbone = Backbone(BackboneParam)
    neck = Neck(NeckParam)
    rpn_head = RpnHead(RpnParam)
    roi_extractor = RoiExtractor(RoiParam)
    bbox_head = BboxHead(BboxParam)
    detector = Detector()
    if is_train:
        train_sym = detector.get_train_symbol(backbone, neck, rpn_head,
                                              roi_extractor, bbox_head)
        rpn_test_sym = None
        test_sym = None
    else:
        train_sym = None
        rpn_test_sym = detector.get_rpn_test_symbol(backbone, neck, rpn_head)
        test_sym = detector.get_test_symbol(backbone, neck, rpn_head,
                                            roi_extractor, bbox_head)

    class ModelParam:
        train_symbol = train_sym
        test_symbol = test_sym
        rpn_test_symbol = rpn_test_sym

        from_scratch = False
        random = True
        memonger = False
        memonger_until = "stage3_unit21_plus"

        class pretrain:
            prefix = "pretrain_model/resnet-101"
            epoch = 0
            fixed_param = []

    class OptimizeParam:
        class optimizer:
            type = "sgd"
            lr = 0.01 / 8 * len(KvstoreParam.gpus) * KvstoreParam.batch_image
            momentum = 0.9
            wd = 0.0001
            clip_gradient = 5

        class schedule:
            begin_epoch = 0
            end_epoch = 6
            lr_iter = [
                60000 * 16 //
                (len(KvstoreParam.gpus) * KvstoreParam.batch_image), 80000 *
                16 // (len(KvstoreParam.gpus) * KvstoreParam.batch_image)
            ]

        class warmup:
            type = "gradual"
            lr = 0.0
            iter = 3000 * 16 // (len(KvstoreParam.gpus) *
                                 KvstoreParam.batch_image)

    class TestParam:
        min_det_score = 0.05
        max_det_per_image = 100

        process_roidb = lambda x: x
        process_output = lambda x, y: x

        class model:
            prefix = "experiments/{}/checkpoint".format(General.name)
            epoch = OptimizeParam.schedule.end_epoch

        class nms:
            type = "nms"
            thr = 0.5

        class coco:
            annotation = "/ws/data/opendata/coco/annotations/instances_minival2014.json"

    # data processing
    class GroupParam:
        # box 83 classes
        boxv0 = np.array([0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \
                              31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \
                              61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83], dtype=np.float32)
        #COCO benchmark
        boxv1 = np.array([0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, \
                              31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, \
                              61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0,  0,  0 ], dtype=np.float32)
        #CCTSDB benchmark
        boxv2 = np.array([0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  \
                              0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  \
                              0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  81, 82, 83], dtype=np.float32)

        rpnv0 = np.array([0, 1, 2], dtype=np.float32)  # rpn 3 classes
        rpnv1 = np.array([0, 1, 0], dtype=np.float32)  # COCO benchmark
        rpnv2 = np.array([0, 0, 2], dtype=np.float32)  # CCTSDB benchmark

        rpn_groups = [rpnv0, rpnv1, rpnv2]
        box_groups = [boxv0, boxv1, boxv2]

    class ResizeParam:
        short = 800
        long = 1200 if is_train else 2000

    class PadParam:
        short = 800
        long = 1200
        max_num_gt = 100

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)
            use_groupsoftmax = General.use_groupsoftmax

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

        def gtclass2rpn(gtclass):
            class_gap = 80
            gtclass[gtclass > class_gap] = -1
            gtclass[gtclass > 0] = 1
            gtclass[gtclass < 0] = 2
            return gtclass

    class RenameParam:
        mapping = dict(image="data")


    from core.detection_input import ReadRoiRecord, Resize2DImageBbox, \
        ConvertImageFromHwcToChw, Flip2DImageBbox, Pad2DImageBbox, \
        RenameRecord, AnchorTarget2D, GroupRead

    if is_train:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            Flip2DImageBbox(),
            Pad2DImageBbox(PadParam),
            ConvertImageFromHwcToChw(),
            AnchorTarget2D(AnchorTarget2DParam),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "gt_bbox"]
        label_name = ["rpn_cls_label", "rpn_reg_target", "rpn_reg_weight"]
        if General.use_groupsoftmax:
            data_name.append("rpn_group")
            data_name.append("box_group")
            transform.append(GroupRead(GroupParam))
    else:
        transform = [
            ReadRoiRecord(None),
            Resize2DImageBbox(ResizeParam),
            ConvertImageFromHwcToChw(),
            RenameRecord(RenameParam.mapping)
        ]
        data_name = ["data", "im_info", "im_id", "rec_id"]
        label_name = []

    import core.detection_metric as metric

    rpn_acc_metric = metric.AccWithIgnore("RpnAcc", ["rpn_cls_loss_output"],
                                          ["rpn_cls_label"])
    rpn_l1_metric = metric.L1("RpnL1", ["rpn_reg_loss_output"],
                              ["rpn_cls_label"])
    # for bbox, the label is generated in network so it is an output
    box_acc_metric = metric.AccWithIgnore(
        "RcnnAcc", ["bbox_cls_loss_output", "bbox_label_blockgrad_output"], [])
    box_l1_metric = metric.L1(
        "RcnnL1", ["bbox_reg_loss_output", "bbox_label_blockgrad_output"], [])

    metric_list = [
        rpn_acc_metric, rpn_l1_metric, box_acc_metric, box_l1_metric
    ]

    return General, KvstoreParam, RpnParam, RoiParam, BboxParam, DatasetParam, \
           ModelParam, OptimizeParam, TestParam, \
           transform, data_name, label_name, metric_list
Пример #3
0
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    class RenameParam:
        mapping = dict(image="data")

    transform = [
        ReadRoiRecord(None),
        RandResizeCrop2DImageBbox(ResizeParam),
        Flip2DImageBbox(),
        Pad2DImageBbox(PadParam),
        ConvertImageFromHwcToChw(),
        AnchorTarget2D(AnchorTarget2DParam),
        RenameRecord(RenameParam.mapping)
    ]

    DEBUG = True

    with open("data/cache/coco_val2017.roidb", "rb") as fin:
        roidb = pkl.load(fin)
        roidb = [rec for rec in roidb if rec["gt_bbox"].shape[0] > 0]
        roidb = [roidb[i] for i in np.random.choice(len(roidb), 20, replace=False)]

        print(roidb[0])
        flipped_roidb = []
        for rec in roidb:
            new_rec = rec.copy()
            new_rec["flipped"] = True
Пример #4
0
def test_rpn_target():
    import numpy as np

    # anchor generation
    stride = 16
    aspects = (0.5, 1.0, 2.0)
    scales = (2, 4, 8, 16, 32)
    max_side = 1200
    feat_h = 75
    feat_w = 50

    base_anchor = np.array([0, 0, stride - 1, stride - 1])
    w = base_anchor[2] - base_anchor[0] + 1
    h = base_anchor[3] - base_anchor[1] + 1
    x_ctr = base_anchor[0] + 0.5 * (w - 1)
    y_ctr = base_anchor[1] + 0.5 * (h - 1)
    w_ratios = np.round(np.sqrt(w * h / aspects))
    h_ratios = np.round(w_ratios * aspects)
    ws = (np.outer(w_ratios, scales)).reshape(-1)
    hs = (np.outer(h_ratios, scales)).reshape(-1)
    base_anchor = np.stack([
        x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), x_ctr + 0.5 *
        (ws - 1), y_ctr + 0.5 * (hs - 1)
    ],
                           axis=1)

    shift_x = np.arange(0, max_side // stride, dtype=np.float32) * stride
    shift_y = np.arange(0, max_side // stride, dtype=np.float32) * stride
    grid_x, grid_y = np.meshgrid(shift_x, shift_y)
    grid_x, grid_y = grid_x.reshape(-1), grid_y.reshape(-1)
    grid = np.stack([grid_x, grid_y, grid_x, grid_y], axis=1)
    all_anchor = grid[:, None, :] + base_anchor[None, :, :]
    all_anchor = all_anchor.reshape(1, 1, max_side // stride,
                                    max_side // stride, -1)
    anchors = mx.nd.array(all_anchor, dtype="float32")
    cls_prob = mx.nd.random_normal(
        0, 1, shape=[1, len(aspects) * len(scales), feat_h, feat_w])
    gt_bboxes = mx.nd.array([
        [200, 200, 300, 300],
        [300, 300, 500, 500],
        [-1, -1, -1, -1],
        [200, 200, 300, 300],
        [400, 300, 500, 500],
        [-1, -1, -1, -1],
    ]).reshape(2, 3, 4)
    im_infos = mx.nd.array([[1200, 800, 2], [1200, 800, 2]]).reshape(2, 3)

    rpn_cls_label, rpn_reg_target, rpn_reg_weight = _rpn_target_batch(
        mx.ndarray, cls_prob, anchors, gt_bboxes, im_infos, 2, 15, max_side,
        stride, 0, 256, 0.5, 0.7, 0.3)
    print(len(np.where(rpn_cls_label[1].asnumpy() == 0)[0]))
    print(np.where(rpn_cls_label[1].asnumpy() > 0))
    print(np.where(rpn_reg_weight[1].asnumpy() > 0))
    print(rpn_reg_target[1][np.where(rpn_reg_weight[1].asnumpy() > 0)])

    rpn_cls_label, rpn_reg_target, rpn_reg_weight = _fpn_rpn_target_batch(
        mx.ndarray, [cls_prob], [anchors], gt_bboxes, im_infos, 2, 15,
        max_side, [stride], 0, 256, 0.5, 0.7, 0.3)
    print(len(np.where(rpn_cls_label[1].asnumpy() == 0)[0]))
    print(np.where(rpn_cls_label[1].asnumpy() > 0))
    print(np.where(rpn_reg_weight[1].asnumpy() > 0))
    print(rpn_reg_target[1][np.where(rpn_reg_weight[1].asnumpy() > 0)])

    from core.detection_input import AnchorTarget2D

    class AnchorTarget2DParam:
        class generate:
            short = 800 // 16
            long = 1200 // 16
            stride = 16
            scales = (2, 4, 8, 16, 32)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5

    anchor_target = AnchorTarget2D(AnchorTarget2DParam)

    record = {
        "im_info": im_infos.asnumpy()[1],
        "gt_bbox": gt_bboxes.asnumpy()[1]
    }
    anchor_target.apply(record)
    print(len(np.where(record["rpn_cls_label"] == 0)[0]))
    print(np.where(record["rpn_cls_label"] > 0))
    print(np.where(record["rpn_reg_weight"] > 0))
    print(record["rpn_reg_target"][np.where(record["rpn_reg_weight"] > 0)])

    class AnchorTarget2DParam:
        def __init__(self):
            self.generate = self._generate()

        class _generate:
            def __init__(self):
                self.stride = (4, 8, 16, 32, 64)
                self.short = (200, 100, 50, 25, 13)
                self.long = (334, 167, 84, 42, 21)

            scales = (8)
            aspects = (0.5, 1.0, 2.0)

        class assign:
            allowed_border = 0
            pos_thr = 0.7
            neg_thr = 0.3
            min_pos_thr = 0.0

        class sample:
            image_anchor = 256
            pos_fraction = 0.5