示例#1
0
    def get_output(self, conv_fpn_feat):
        if self.cls_logit_dict is not None and self.bbox_delta_dict is not None:
            return self.cls_logit_dict, self.bbox_delta_dict

        p = self.p
        num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale)
        conv_channel = p.head.conv_channel

        # FPN RPN share weight
        rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01))
        rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init())
        rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01))
        rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init())
        rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01))
        rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init())

        cls_logit_dict = {}
        bbox_delta_dict = {}

        for stride in p.anchor_generate.stride:
            rpn_conv = X.conv(
                conv_fpn_feat['stride%s' % stride],
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3_%s" % stride,
                no_bias=False,
                weight=rpn_conv_weight,
                bias=rpn_conv_bias
            )
            rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride)
            if p.fp16:
                rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride)

            cls_logit = X.conv(
                rpn_relu,
                filter=2 * num_base_anchor,
                name="rpn_cls_score_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_cls_weight,
                bias=rpn_conv_cls_bias
            )

            bbox_delta = X.conv(
                rpn_relu,
                filter=4 * num_base_anchor,
                name="rpn_bbox_pred_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_bbox_weight,
                bias=rpn_conv_bbox_bias
            )

            cls_logit_dict[stride]  = cls_logit
            bbox_delta_dict[stride] = bbox_delta

        self.cls_logit_dict = cls_logit_dict
        self.bbox_delta_dict = bbox_delta_dict

        return self.cls_logit_dict, self.bbox_delta_dict
示例#2
0
    def _bbox_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class,
                     stride):
        p = self.p
        norm = p.normalizer

        # regression subnet
        bbox_conv1 = X.conv(data=conv_feat,
                            kernel=3,
                            filter=conv_channel,
                            weight=self.bbox_conv1_weight,
                            bias=self.bbox_conv1_bias,
                            no_bias=False,
                            name="bbox_conv1")
        bbox_conv1 = norm(bbox_conv1, name="bbox_conv1_bn_s{}".format(stride))
        bbox_conv1_relu = X.relu(bbox_conv1)
        bbox_conv2 = X.conv(data=bbox_conv1_relu,
                            kernel=3,
                            filter=conv_channel,
                            weight=self.bbox_conv2_weight,
                            bias=self.bbox_conv2_bias,
                            no_bias=False,
                            name="bbox_conv2")
        bbox_conv2 = norm(bbox_conv2, name="bbox_conv2_bn_s{}".format(stride))
        bbox_conv2_relu = X.relu(bbox_conv2)
        bbox_conv3 = X.conv(data=bbox_conv2_relu,
                            kernel=3,
                            filter=conv_channel,
                            weight=self.bbox_conv3_weight,
                            bias=self.bbox_conv3_bias,
                            no_bias=False,
                            name="bbox_conv3")
        bbox_conv3 = norm(bbox_conv3, name="bbox_conv3_bn_s{}".format(stride))
        bbox_conv3_relu = X.relu(bbox_conv3)
        bbox_conv4 = X.conv(data=bbox_conv3_relu,
                            kernel=3,
                            filter=conv_channel,
                            weight=self.bbox_conv4_weight,
                            bias=self.bbox_conv4_bias,
                            no_bias=False,
                            name="bbox_conv4")
        bbox_conv4 = norm(bbox_conv4, name="bbox_conv4_bn_s{}".format(stride))
        bbox_conv4_relu = X.relu(bbox_conv4)

        if p.fp16:
            bbox_conv4_relu = X.to_fp32(bbox_conv4_relu,
                                        name="bbox_conv4_fp32")

        output_channel = num_base_anchor * 4
        output = X.conv(data=bbox_conv4_relu,
                        kernel=3,
                        filter=output_channel,
                        weight=self.bbox_pred_weight,
                        bias=self.bbox_pred_bias,
                        no_bias=False,
                        name="bbox_pred")

        return output
示例#3
0
    def _cls_subnet(self, conv_feat, conv_channel, num_base_anchor, num_class,
                    stride):
        p = self.p
        norm = p.normalizer

        # classification subnet
        cls_conv1 = X.conv(data=conv_feat,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv1_weight,
                           bias=self.cls_conv1_bias,
                           no_bias=False,
                           name="cls_conv1")
        cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride))
        cls_conv1_relu = X.relu(cls_conv1)
        cls_conv2 = X.conv(data=cls_conv1_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv2_weight,
                           bias=self.cls_conv2_bias,
                           no_bias=False,
                           name="cls_conv2")
        cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride))
        cls_conv2_relu = X.relu(cls_conv2)
        cls_conv3 = X.conv(data=cls_conv2_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv3_weight,
                           bias=self.cls_conv3_bias,
                           no_bias=False,
                           name="cls_conv3")
        cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride))
        cls_conv3_relu = X.relu(cls_conv3)
        cls_conv4 = X.conv(data=cls_conv3_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv4_weight,
                           bias=self.cls_conv4_bias,
                           no_bias=False,
                           name="cls_conv4")
        cls_conv4 = norm(cls_conv4, name="cls_conv4_bn_s{}".format(stride))
        cls_conv4_relu = X.relu(cls_conv4)

        if p.fp16:
            cls_conv4_relu = X.to_fp32(cls_conv4_relu, name="cls_conv4_fp32")

        output_channel = num_base_anchor * (num_class - 1)
        output = X.conv(data=cls_conv4_relu,
                        kernel=3,
                        filter=output_channel,
                        weight=self.cls_pred_weight,
                        bias=self.cls_pred_bias,
                        no_bias=False,
                        name="cls_pred")
        return output
示例#4
0
    def get_output(self, conv_feat):
        if self._cls_logit is not None and self._bbox_delta is not None:
            return self._cls_logit, self._bbox_delta

        p = self.p
        num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale)
        conv_channel = p.head.conv_channel

        if p.normalizer.__name__ == "fix_bn":
            conv = X.convrelu(
                conv_feat,
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3",
                no_bias=False,
                init=X.gauss(0.01)
            )
        elif p.normalizer.__name__ in ["sync_bn", "gn"]:
            conv = X.convnormrelu(
                p.normalizer,
                conv_feat,
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3",
                no_bias=False,
                init=X.gauss(0.01)
            )
        else:
            raise NotImplementedError("Unsupported normalizer: {}".format(p.normalizer.__name__))

        if p.fp16:
            conv = X.to_fp32(conv, name="rpn_conv_3x3_fp32")

        cls_logit = X.conv(
            conv,
            filter=2 * num_base_anchor,
            name="rpn_cls_logit",
            no_bias=False,
            init=X.gauss(0.01)
        )

        bbox_delta = X.conv(
            conv,
            filter=4 * num_base_anchor,
            name="rpn_bbox_delta",
            no_bias=False,
            init=X.gauss(0.01)
        )

        self._cls_logit = cls_logit
        self._bbox_delta = bbox_delta

        return self._cls_logit, self._bbox_delta
示例#5
0
    def resnet_trident_stage(cls, data, name, num_block, filter, stride,
                             dilate, norm_type, norm_mom, ndev, num_branch,
                             branch_ids, branch_bn_shared, branch_conv_shared,
                             branch_deform):
        """
        One resnet stage is comprised of multiple resnet units. Refer to depth config for more information.
        :param data:
        :param name:
        :param num_block:
        :param filter:
        :param stride:
        :param dilate:
        :param norm_type:
        :param norm_mom:
        :param ndev:
        :param num_branch:
        :param branch_ids:
        :param branch_bn_shared:
        :param branch_conv_shared:
        :return:
        """
        assert isinstance(dilate, list) and len(
            dilate
        ) == num_branch, 'dilate should be a list with num_branch items.'

        d = [(d, d) for d in dilate]

        data = cls.resnet_unit(data, "{}_unit1".format(name), filter, stride,
                               1, True, norm_type, norm_mom, ndev)
        data = [data] * num_branch
        for i in range(2, num_block + 1):
            if branch_deform and i >= num_block - 2:
                unit_deform = True
            else:
                unit_deform = False
            # cast back to fp32 as deformable conv is not optimized for fp16
            if unit_deform and i == num_block - 2:
                for j in range(num_branch):
                    data[j] = X.to_fp32(data[j], name="deform_to32")
            data = cls.resnet_trident_unit(data,
                                           "{}_unit{}".format(name, i),
                                           filter, (1, 1),
                                           d,
                                           False,
                                           norm_type,
                                           norm_mom,
                                           ndev,
                                           branch_ids,
                                           branch_bn_shared,
                                           branch_conv_shared,
                                           branch_deform=unit_deform)

        return data
示例#6
0
    def get_output(self, conv_feat):
        p = self.p
        num_class = p.num_class
        num_reg_class = 2 if p.regress_target.class_agnostic else num_class

        head_feat = self._get_bbox_head_logit(conv_feat)

        if not isinstance(head_feat, dict):
            head_feat = dict(classification=head_feat, regression=head_feat)

        if p.fp16:
            head_feat["classification"] = X.to_fp32(
                head_feat["classification"], name="bbox_cls_head_to_fp32")
            head_feat["regression"] = X.to_fp32(head_feat["regression"],
                                                name="bbox_reg_head_to_fp32")

        cls_logit = X.fc(head_feat["classification"],
                         filter=num_class,
                         name='bbox_cls_logit1',
                         init=X.gauss(0.01))

        cls_sec_logit = X.fc(head_feat["classification"],
                             filter=num_class,
                             name='bbox_cls_logit2',
                             init=X.gauss(0.01))

        bbox_delta = X.fc(head_feat["regression"],
                          filter=4 * num_reg_class,
                          name='bbox_reg_delta1',
                          init=X.gauss(0.001))

        bbox_sec_delta = X.fc(head_feat["regression"],
                              filter=4 * num_reg_class,
                              name='bbox_reg_delta2',
                              init=X.gauss(0.001))

        return cls_logit, bbox_delta, cls_sec_logit, bbox_sec_delta, head_feat[
            'regression']  # NOTE
示例#7
0
    def get_roi_feature(self, conv_fpn_feat, proposal):
        p = self.p
        rcnn_stride = p.stride
        roi_canonical_scale = p.roi_canonical_scale
        roi_canonical_level = p.roi_canonical_level

        group = mx.symbol.Custom(
            op_type="assign_layer_fpn",
            rois=proposal,
            rcnn_stride=rcnn_stride,
            roi_canonical_scale=roi_canonical_scale,
            roi_canonical_level=roi_canonical_level,
            name="assign_layer_fpn"
        )
        proposal_fpn = dict()
        for i, stride in enumerate(rcnn_stride):
            proposal_fpn["stride%s" % stride] = group[i]

        if p.fp16:
            for stride in rcnn_stride:
                conv_fpn_feat["stride%s" % stride] = X.to_fp32(
                    conv_fpn_feat["stride%s" % stride],
                    name="fpn_stride%s_to_fp32"
                )

        fpn_roi_feats = list()
        for stride in rcnn_stride:
            feat_lvl = conv_fpn_feat["stride%s" % stride]
            proposal_lvl = proposal_fpn["stride%s" % stride]
            roi_feat = X.roi_align(
                feat_lvl,
                rois=proposal_lvl,
                out_size=p.out_size,
                stride=stride,
                name="roi_align"
            )
            roi_feat = X.reshape(
                data=roi_feat,
                shape=(-3, -2),
                name='roi_feat_reshape'
            )
            fpn_roi_feats.append(roi_feat)
        roi_feat = X.add_n(*fpn_roi_feats)

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16")

        return roi_feat
示例#8
0
    def _get_mask_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        up_stride = int(self.pMask.resolution // self.pMaskRoi.out_size)
        dim_reduced = self.pMask.dim_reduced

        msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2)

        current = conv_feat
        for i in range(4):
            current = X.conv(
                current,
                name="mask_fcn_conv{}".format(i + 1),
                filter=dim_reduced,
                kernel=3,
                no_bias=False,
                init=msra_init
            )
            current = self.add_norm(current)
            current = X.relu(current)

        mask_up = current
        for i in range(up_stride // 2):
            weight = X.var(
                name="mask_up{}_weight".format(i),
                init=msra_init,
                lr_mult=1,
                wd_mult=1)
            mask_up = mx.sym.Deconvolution(
                mask_up,
                kernel=(2, 2),
                stride=(2, 2),
                num_filter=dim_reduced,
                no_bias=False,
                weight=weight,
                name="mask_up{}".format(i)
                )
            mask_up = X.relu(
                mask_up,
                name="mask_up{}_relu".format(i))

        mask_up = X.to_fp32(mask_up, name='mask_up_to_fp32')
        self._head_feat = mask_up

        return self._head_feat
示例#9
0
    def get_roi_feature(self, rcnn_feat, proposal):
        p = self.p

        if p.fp16:
            rcnn_feat = X.to_fp32(rcnn_feat, "rcnn_feat_to_fp32")

        roi_feat = X.roi_align(rcnn_feat,
                               rois=proposal,
                               out_size=p.out_size,
                               stride=p.stride,
                               name="roi_align")

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, "roi_feat_to_fp16")

        roi_feat = X.reshape(roi_feat, (-3, -2))

        return roi_feat
示例#10
0
    def get_output(self, conv_feat):
        p = self.p
        num_class = p.num_class
        num_reg_class = 2 if p.regress_target.class_agnostic else num_class

        head_feat = self._get_bbox_head_logit(conv_feat)

        if p.fp16:
            head_feat = X.to_fp32(head_feat, name="bbox_head_to_fp32")

        cls_logit = X.fc(head_feat,
                         filter=num_class,
                         name='bbox_cls_logit',
                         init=X.gauss(0.01))
        bbox_delta = X.fc(head_feat,
                          filter=4 * num_reg_class,
                          name='bbox_reg_delta',
                          init=X.gauss(0.001))

        return cls_logit, bbox_delta
示例#11
0
    def get_output(self, conv_feat):
        pBbox = self.pBbox
        num_class = pBbox.num_class

        head_feat = self._get_mask_head_logit(conv_feat)

        msra_init = mx.init.Xavier(rnd_type="gaussian", factor_type="out", magnitude=2)

        if self.pMask:
            head_feat = X.to_fp32(head_feat, name="mask_head_to_fp32")

        mask_fcn_logit = X.conv(
            head_feat,
            filter=num_class,
            name="mask_fcn_logit",
            no_bias=False,
            init=msra_init
        )

        return mask_fcn_logit
示例#12
0
    def _get_bbox_head_logit(self, conv_feat):
        if self._head_feat is not None:
            return self._head_feat

        from mxnext.backbone.resnet_v1 import Builder

        unit = Builder.resnet_stage(conv_feat,
                                    name="stage4",
                                    num_block=3,
                                    filter=2048,
                                    stride=1,
                                    dilate=1,
                                    norm_type=self.p.normalizer,
                                    norm_mom=0.9,
                                    ndev=8)
        unit = X.to_fp32(unit, name='c5_to_fp32')
        pool1 = X.pool(unit, global_pool=True, name='pool1')

        self._head_feat = pool1

        return self._head_feat
示例#13
0
    def _cls_subnet(self, conv_feat, stride):
        p = self.p
        norm = p.normalizer
        conv_channel = p.head.conv_channel

        # classification subset
        cls_conv1 = X.conv(data=conv_feat,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv1_weight,
                           bias=self.cls_conv1_bias,
                           no_bias=False,
                           name="cls_conv1")
        cls_conv1 = norm(cls_conv1, name="cls_conv1_bn_s{}".format(stride))
        cls_conv1_relu = X.relu(cls_conv1)
        cls_conv2 = X.conv(data=cls_conv1_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv2_weight,
                           bias=self.cls_conv2_bias,
                           no_bias=False,
                           name="cls_conv2")
        cls_conv2 = norm(cls_conv2, name="cls_conv2_bn_s{}".format(stride))
        cls_conv2_relu = X.relu(cls_conv2)
        cls_conv3 = X.conv(data=cls_conv2_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.cls_conv3_weight,
                           bias=self.cls_conv3_bias,
                           no_bias=False,
                           name="cls_conv3")
        cls_conv3 = norm(cls_conv3, name="cls_conv3_bn_s{}".format(stride))
        cls_conv3_relu = X.relu(cls_conv3)

        if p.fp16:
            cls_conv3_relu = X.to_fp32(cls_conv3_relu, name="cls_conv3_fp32")

        return cls_conv3_relu
示例#14
0
    def _reg_subnet(self, conv_feat, stride):
        p = self.p
        norm = p.normalizer
        conv_channel = p.head.conv_channel

        # regression subnet
        reg_conv1 = X.conv(data=conv_feat,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv1_weight,
                           bias=self.reg_conv1_bias,
                           no_bias=False,
                           name="reg_conv1")
        reg_conv1 = norm(reg_conv1, name="reg_conv1_bn_s{}".format(stride))
        reg_conv1_relu = X.relu(reg_conv1)
        reg_conv2 = X.conv(data=reg_conv1_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv2_weight,
                           bias=self.reg_conv2_bias,
                           no_bias=False,
                           name="reg_conv2")
        reg_conv2 = norm(reg_conv2, name="reg_conv2_bn_s{}".format(stride))
        reg_conv2_relu = X.relu(reg_conv2)
        reg_conv3 = X.conv(data=reg_conv2_relu,
                           kernel=3,
                           filter=conv_channel,
                           weight=self.reg_conv3_weight,
                           bias=self.reg_conv3_bias,
                           no_bias=False,
                           name="reg_conv3")
        reg_conv3 = norm(reg_conv3, name="reg_conv3_bn_s{}".format(stride))
        reg_conv3_relu = X.relu(reg_conv3)

        if p.fp16:
            reg_conv3_relu = X.to_fp32(reg_conv3_relu, name="reg_conv3_fp32")

        return reg_conv3_relu
示例#15
0
 def _bbox_subnet(self,
                  conv_feat,
                  conv_channel,
                  num_base_anchor,
                  num_class,
                  stride,
                  nb_conv=0):
     p = self.p
     if nb_conv <= 0:
         bbox_conv4_relu = conv_feat
         if p.fp16:
             bbox_conv4_relu = X.to_fp32(bbox_conv4_relu,
                                         name="bbox_conv4_fp32")
         output_channel = num_base_anchor * 4
         output = X.conv(data=bbox_conv4_relu,
                         kernel=3,
                         filter=output_channel,
                         weight=self.bbox_pred_weight,
                         bias=self.bbox_pred_bias,
                         no_bias=False,
                         name="bbox_pred")
         return output
     return super()._bbox_subnet(conv_feat, conv_channel, num_base_anchor,
                                 num_class, stride)
示例#16
0
    def get_roi_feature(self, conv_fpn_feat, proposal):
        p = self.p
        rcnn_stride = p.stride

        group = mx.symbol.Custom(rois=proposal, op_type='assign_layer_fpn')
        proposal_fpn = dict()
        proposal_fpn["stride4"] = group[1]
        proposal_fpn["stride8"] = group[2]
        proposal_fpn["stride16"] = group[3]
        proposal_fpn["stride32"] = group[4]

        if p.fp16:
            for stride in rcnn_stride:
                conv_fpn_feat["stride%s" % stride] = X.to_fp32(
                    conv_fpn_feat["stride%s" % stride],
                    name="fpn_stride%s_to_fp32")

        fpn_roi_feats = list()
        for stride in rcnn_stride:
            feat_lvl = conv_fpn_feat["stride%s" % stride]
            proposal_lvl = proposal_fpn["stride%s" % stride]
            roi_feat = X.roi_align(feat_lvl,
                                   rois=proposal_lvl,
                                   out_size=p.out_size,
                                   stride=stride,
                                   name="roi_align")
            roi_feat = X.reshape(data=roi_feat,
                                 shape=(-3, -2),
                                 name='roi_feat_reshape')
            fpn_roi_feats.append(roi_feat)
        roi_feat = X.add_n(*fpn_roi_feats)

        if p.fp16:
            roi_feat = X.to_fp16(roi_feat, name="roi_feat_to_fp16")

        return roi_feat
示例#17
0
    def fpn_conv_down(self, data):
        if self.fpn_feat:
            return self.fpn_feat

        c2, c3, c4, c5 = data

        if self.p.fp16:
            c2 = X.to_fp32(c2, name="c2_to_fp32")
            c3 = X.to_fp32(c3, name="c3_to_fp32")
            c4 = X.to_fp32(c4, name="c4_to_fp32")
            c5 = X.to_fp32(c5, name="c5_to_fp32")

        xavier_init = mx.init.Xavier(factor_type="in",
                                     rnd_type="uniform",
                                     magnitude=3)

        # P5
        p5 = X.conv(data=c5,
                    filter=256,
                    no_bias=False,
                    weight=X.var(name="P5_lateral_weight", init=xavier_init),
                    bias=X.var(name="P5_lateral_bias", init=X.zero_init()),
                    name="P5_lateral")
        p5_conv = X.conv(data=p5,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P5_conv_weight", init=xavier_init),
                         bias=X.var(name="P5_conv_bias", init=X.zero_init()),
                         name="P5_conv")

        # P4
        p5_up = mx.sym.UpSampling(p5,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P5_upsampling",
                                  num_args=1)
        p4_la = X.conv(data=c4,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P4_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P4_lateral_bias", init=X.zero_init()),
                       name="P4_lateral")
        p5_clip = mx.sym.Crop(*[p5_up, p4_la], name="P4_clip")
        p4 = mx.sym.ElementWiseSum(*[p5_clip, p4_la], name="P4_sum")

        p4_conv = X.conv(data=p4,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P4_conv_weight", init=xavier_init),
                         bias=X.var(name="P4_conv_bias", init=X.zero_init()),
                         name="P4_conv")

        # P3
        p4_up = mx.sym.UpSampling(p4,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P4_upsampling",
                                  num_args=1)
        p3_la = X.conv(data=c3,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P3_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P3_lateral_bias", init=X.zero_init()),
                       name="P3_lateral")
        p4_clip = mx.sym.Crop(*[p4_up, p3_la], name="P3_clip")
        p3 = mx.sym.ElementWiseSum(*[p4_clip, p3_la], name="P3_sum")

        p3_conv = X.conv(data=p3,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P3_conv_weight", init=xavier_init),
                         bias=X.var(name="P3_conv_bias", init=X.zero_init()),
                         name="P3_conv")

        # P2
        p3_up = mx.sym.UpSampling(p3,
                                  scale=2,
                                  sample_type="nearest",
                                  name="P3_upsampling",
                                  num_args=1)
        p2_la = X.conv(data=c2,
                       filter=256,
                       no_bias=False,
                       weight=X.var(name="P2_lateral_weight",
                                    init=xavier_init),
                       bias=X.var(name="P2_lateral_bias", init=X.zero_init()),
                       name="P2_lateral")
        p3_clip = mx.sym.Crop(*[p3_up, p2_la], name="P2_clip")
        p2 = mx.sym.ElementWiseSum(*[p3_clip, p2_la], name="P2_sum")

        p2_conv = X.conv(data=p2,
                         kernel=3,
                         filter=256,
                         no_bias=False,
                         weight=X.var(name="P2_conv_weight", init=xavier_init),
                         bias=X.var(name="P2_conv_bias", init=X.zero_init()),
                         name="P2_conv")

        # P6
        p6 = X.pool(p5_conv,
                    name="P6_subsampling",
                    kernel=1,
                    stride=2,
                    pad=0,
                    pool_type='max')
        if self.p.fp16:
            p6 = X.to_fp16(p6, name="p6_to_fp16")
            p5_conv = X.to_fp16(p5_conv, name="p5_conv_to_fp16")
            p4_conv = X.to_fp16(p4_conv, name="p4_conv_to_fp16")
            p3_conv = X.to_fp16(p3_conv, name="p3_conv_to_fp16")
            p2_conv = X.to_fp16(p2_conv, name="p2_conv_to_fp16")

        conv_fpn_feat = dict()
        conv_fpn_feat.update({
            "stride64": p6,
            "stride32": p5_conv,
            "stride16": p4_conv,
            "stride8": p3_conv,
            "stride4": p2_conv
        })

        self.fpn_feat = conv_fpn_feat
        return self.fpn_feat
示例#18
0
    def get_output(self, fpn_conv_feats, roi_feat, rois, is_train):
        '''
        Args:
            fpn_conv_feats: dict of FPN features, each [batch_image, in_channels, fh, fw]
            roi_feat: [batch_image * image_roi, 256, roi_size, roi_size]
            rois: [batch_image, image_roi, 4]
            is_train: boolean
        Returns:
            cls_logit: [batch_image * image_roi, num_class]
            bbox_delta: [batch_image * image_roi, num_class * 4]
            tsd_cls_logit: [batch_image * image_roi, num_class]
            tsd_bbox_delta: [batch_image * image_roi, num_class * 4]
            delta_c: [batch_image * image_roi, 2*roi_size*roi_size, 1, 1]
            delta_r: [batch_image * image_roi, 2, 1, 1]
        '''
        xavier_init = mx.init.Xavier(factor_type="in",
                                     rnd_type="uniform",
                                     magnitude=3)
        # roi_feat: [batch_roi, 256, 7, 7]

        flatten = X.reshape(
            roi_feat, shape=(0, -1, 1, 1),
            name="bbox_feat_reshape")  # [batch_roi, 256*7*7, 1, 1]

        x1 = flatten
        x2 = X.relu(X.conv(data=x1,
                           kernel=1,
                           filter=256,
                           name="delta_shared_fc1",
                           no_bias=False),
                    name="delta_shared_fc1_relu")  # [batch_roi, 256, 1, 1]

        delta_c = X.relu(X.conv(x2,
                                filter=256,
                                name="delta_c_fc1",
                                init=X.gauss(0.01)),
                         name="delta_c_fc1_relu")  # [batch_roi, 256, 1, 1]
        delta_c = X.conv(delta_c,
                         filter=2 * self.p.roi_size**2,
                         name="delta_c_fc2",
                         init=X.gauss(0.01))  # [batch_roi, 2*7*7, 1, 1]

        delta_r = X.relu(X.conv(x2,
                                filter=256,
                                name="delta_r_fc1",
                                init=X.gauss(0.01)),
                         name="delta_r_fc1_relu")  # [batch_roi, 256, 1, 1]
        delta_r = X.conv(delta_r,
                         filter=2,
                         name="delta_r_fc2",
                         init=X.gauss(0.01))  # [batch_roi, 2, 1, 1]

        image_roi = self.p.image_roi if is_train else 1000
        batch_image = self.p.batch_image

        TSD_cls_feats = self.delta_c_pool.get_roi_feature(
            fpn_conv_feats,
            rois,
            delta_c,
            image_rois=image_roi,
            batch_image=batch_image)  # [batch_roi, 256, 7, 7]
        TSD_loc_feats = self.delta_r_pool.get_roi_feature(
            fpn_conv_feats,
            rois,
            delta_r,
            image_rois=image_roi,
            batch_image=batch_image)  # [batch_roi, 256, 7, 7]

        TSD_x_cls = self._convs_and_fcs(
            TSD_cls_feats,
            self.p.TSD.num_shared_convs,
            self.p.TSD.num_shared_fcs,
            name='TSD_pc',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]
        TSD_x_reg = self._convs_and_fcs(
            TSD_loc_feats,
            self.p.TSD.num_shared_convs,
            self.p.TSD.num_shared_fcs,
            name='TSD_pr',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]

        TSD_x_cls = self._convs_and_fcs(
            TSD_x_cls,
            0,
            self.p.TSD.num_cls_fcs,
            name='TSD_cls',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]
        TSD_x_reg = self._convs_and_fcs(
            TSD_x_reg,
            0,
            self.p.TSD.num_reg_fcs,
            name='TSD_reg',
            conv_init=xavier_init,
            fc_init=X.gauss(0.01))  # [batch_roi, batch_roi, 1, 1]

        num_class = self.p.num_class
        num_reg_class = 2 if self.p.regress_target.class_agnostic else num_class

        tsd_cls_logit = X.fc(TSD_x_cls,
                             filter=num_class,
                             name='tsd_cls_logit',
                             init=X.gauss(0.01))
        tsd_bbox_delta = X.fc(TSD_x_reg,
                              filter=4 * num_reg_class,
                              name='tsd_reg_delta',
                              init=X.gauss(0.01))

        x = self._convs_and_fcs(roi_feat,
                                self.p.TSD.num_shared_convs,
                                self.p.TSD.num_shared_fcs,
                                name='shared_fc',
                                conv_init=xavier_init,
                                fc_init=X.gauss(0.01))
        x_cls = x
        x_reg = x
        x_cls = self._convs_and_fcs(x_cls,
                                    0,
                                    self.p.TSD.num_cls_fcs,
                                    name='cls',
                                    conv_init=xavier_init,
                                    fc_init=X.gauss(0.01))
        x_reg = self._convs_and_fcs(x_reg,
                                    0,
                                    self.p.TSD.num_reg_fcs,
                                    name='reg',
                                    conv_init=xavier_init,
                                    fc_init=X.gauss(0.01))
        cls_logit = X.fc(x_cls,
                         filter=num_class,
                         name='bbox_cls_logit',
                         init=X.gauss(0.01))
        bbox_delta = X.fc(x_reg,
                          filter=4 * num_reg_class,
                          name='bbox_reg_delta',
                          init=X.gauss(0.01))

        if self.p.fp16:
            cls_logit = X.to_fp32(cls_logit, name="cls_logits_fp32")
            bbox_delta = X.to_fp32(bbox_delta, name="bbox_delta_fp32")
            tsd_cls_logit = X.to_fp32(tsd_cls_logit, name="tsd_cls_logit_fp32")
            tsd_bbox_delta = X.to_fp32(tsd_bbox_delta,
                                       name="tsd_bbox_delta_fp32")
            delta_c = X.to_fp32(delta_c, name="delta_c_fp32")
            delta_r = X.to_fp32(delta_r, name="delta_r_fp32")

        return cls_logit, bbox_delta, tsd_cls_logit, tsd_bbox_delta, delta_c, delta_r
示例#19
0
    def get_output(self, conv_fpn_feat):
        if self.cls_logit_dict is not None and self.bbox_delta_dict is not None:
            return self.cls_logit_dict, self.bbox_delta_dict

        p = self.p
        num_base_anchor = len(p.anchor_generate.ratio) * len(p.anchor_generate.scale)
        conv_channel = p.head.conv_channel

        # FPN RPN share weight
        rpn_conv_weight = X.var('rpn_conv_weight', init=X.gauss(0.01))
        rpn_conv_bias = X.var('rpn_conv_bias', init=X.zero_init())
        rpn_conv_gamma = X.var('rpn_conv_gamma')
        rpn_conv_beta = X.var('rpn_conv_beta')
        rpn_conv_mmean = X.var('rpn_conv_moving_mean')
        rpn_conv_mvar = X.var('rpn_conv_moving_var')
        rpn_conv_cls_weight = X.var('rpn_conv_cls_weight', init=X.gauss(0.01))
        rpn_conv_cls_bias = X.var('rpn_conv_cls_bias', init=X.zero_init())
        rpn_conv_bbox_weight = X.var('rpn_conv_bbox_weight', init=X.gauss(0.01))
        rpn_conv_bbox_bias = X.var('rpn_conv_bbox_bias', init=X.zero_init())

        cls_logit_dict = {}
        bbox_delta_dict = {}

        for stride in p.anchor_generate.stride:
            rpn_conv = X.conv(
                conv_fpn_feat['stride%s' % stride],
                kernel=3,
                filter=conv_channel,
                name="rpn_conv_3x3_%s" % stride,
                no_bias=False,
                weight=rpn_conv_weight,
                bias=rpn_conv_bias
            )

            if p.normalizer.__name__ == "fix_bn":
                pass
            elif p.normalizer.__name__ == "sync_bn":
                rpn_conv = p.normalizer(
                    rpn_conv,
                    gamma=rpn_conv_gamma,
                    beta=rpn_conv_beta,
                    moving_mean=rpn_conv_mmean,
                    moving_var=rpn_conv_mvar,
                    name="rpn_conv_3x3_bn_%s" % stride
                )
            elif p.normalizer.__name__ == "gn":
                rpn_conv = p.normalizer(
                    rpn_conv,
                    gamma=rpn_conv_gamma,
                    beta=rpn_conv_beta,
                    name="rpn_conv_3x3_gn_%s" % stride
                )
            else:
                raise NotImplementedError("Unsupported normalizer {}".format(p.normalizer.__name__))

            rpn_relu = X.relu(rpn_conv, name='rpn_relu_%s' % stride)
            if p.fp16:
                rpn_relu = X.to_fp32(rpn_relu, name="rpn_relu_%s_fp32" % stride)
            cls_logit = X.conv(
                rpn_relu,
                filter=2 * num_base_anchor,
                name="rpn_cls_score_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_cls_weight,
                bias=rpn_conv_cls_bias
            )

            bbox_delta = X.conv(
                rpn_relu,
                filter=4 * num_base_anchor,
                name="rpn_bbox_pred_stride%s" % stride,
                no_bias=False,
                weight=rpn_conv_bbox_weight,
                bias=rpn_conv_bbox_bias
            )

            cls_logit_dict[stride]  = cls_logit
            bbox_delta_dict[stride] = bbox_delta

        self.cls_logit_dict = cls_logit_dict
        self.bbox_delta_dict = bbox_delta_dict

        return self.cls_logit_dict, self.bbox_delta_dict