def __init__(self,
                 num_classes,
                 levels=3,
                 num_channels=128,
                 model_name='efficientdet-d0',
                 is_training=True,
                 threshold=0.5):
        super(EfficientDet, self).__init__()
        self.efficientnet = EfficientNet.from_pretrained(MODEL_MAP[model_name])
        self.is_training = is_training
        self.BIFPN = BIFPN(in_channels=self.efficientnet.get_list_features()[2:],
                                out_channels=256,
                                num_outs=5)
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)
        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.threshold = threshold

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0-prior)/prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()
    def __init__(self,
                 num_classes,
                 network='efficientdet-d0',
                 D_bifpn=3,
                 W_bifpn=88,
                 D_class=3,
                 is_training=True,
                 threshold=0.5,
                 iou_threshold=0.5,
                 gpu=1):
        super(EfficientDet, self).__init__()
        self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
        self.is_training = is_training
        self.neck = BIFPN(in_channels=self.backbone.get_list_features()[-5:],
                          out_channels=W_bifpn,
                          stack=D_bifpn,
                          num_outs=5)
        self.bbox_head = RetinaHead(num_classes=num_classes,
                                    in_channels=W_bifpn)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.threshold = threshold
        self.iou_threshold = iou_threshold
        self.gpu = gpu
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        self.freeze_bn()
        self.criterion = FocalLoss()
def detection(classification, regression, inputs, score_thresh=0.01, iou_thresh=0.5):
    anchors = Anchors()
    anchors = anchors(inputs)

    regressBoxes = BBoxTransform()
    clipBoxes = ClipBoxes()
    transformed_anchors = regressBoxes(anchors, regression)
    transformed_anchors = clipBoxes(transformed_anchors, inputs)

    scores = torch.max(classification, dim=2, keepdim=True)[0]

    over_score_thresh_idx = (scores > score_thresh)[0, :, 0]

    classfiy_over_thresh = classification[:, over_score_thresh_idx, :]
    anchors_over_thresh = transformed_anchors[:, over_score_thresh_idx, :]
    scores_over_thresh = scores[:, over_score_thresh_idx, :]

    # nms
    score = scores_over_thresh[0, :, :]
    anchors_score = torch.cat([anchors_over_thresh[0, :, :], score], dim=1)
    anchors_nms_idx, _ = soft_nms(anchors_score, score_threshold=iou_thresh)

    nms_scores, nms_class = classfiy_over_thresh[0, anchors_nms_idx, :].max(
        dim=1)
    nms_anchors = anchors_over_thresh[0, anchors_nms_idx, :]
    return [nms_scores, nms_class, nms_anchors]
    def __init__(self,
                 num_classes,
                 network='efficientdet-d0',
                 D_bifpn=3,
                 W_bifpn=88):
        super(EfficientDetBiFPN, self).__init__()
        self.backbone = EfficientNet.get_network_from_name(MODEL_MAP[network])
        self.neck = BIFPN(in_channels=self.backbone.get_list_features()[-5:],
                          out_channels=W_bifpn,
                          stack=D_bifpn,
                          num_outs=5)

        self.bbox_head = RetinaHead(num_classes=num_classes,
                                    in_channels=W_bifpn)

        self.anchors = Anchors()

        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        self.freeze_bn()
Exemplo n.º 5
0
 def __init__(self,
              num_classes=21,
              levels=3,
              num_channels=128,
              model_name='efficientnet-b0',
              is_training=True):
     super(EfficientDet, self).__init__()
     self.efficientnet = EfficientNet.from_pretrained(model_name)
     self.is_training = is_training
     self.BIFPN = BIFPN(in_channels=[40, 80, 112, 192, 320],
                        out_channels=256,
                        num_outs=5)
     self.regressionModel = RegressionModel(256)
     self.classificationModel = ClassificationModel(256,
                                                    num_classes=num_classes)
     self.anchors = Anchors()
     self.regressBoxes = BBoxTransform()
     self.clipBoxes = ClipBoxes()
    def __init__(self,
                 num_classes,
                 network='efficientdet-d0',
                 D_bifpn=3,
                 W_bifpn=88,
                 D_class=3,
                 is_training=True,
                 threshold=0.01,
                 iou_threshold=0.5):
        super(EfficientDet, self).__init__()
        # self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network])
        self.backbone = EfficientNet.get_network_from_name(MODEL_MAP[network])

        # print backbone parameters
        # params = list(self.backbone.named_parameters())
        # for param_key, param_value in params:
        #     print("{},   {}".format(param_key, param_value.shape))
        #
        # for features in self.backbone.get_list_features():
        #     print(features)

        self.is_training = is_training
        self.neck = BIFPN(in_channels=self.backbone.get_list_features()[-5:],
                          out_channels=W_bifpn,
                          stack=D_bifpn,
                          num_outs=5)

        self.bbox_head = RetinaHead(num_classes=num_classes,
                                    in_channels=W_bifpn)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.threshold = threshold
        self.iou_threshold = iou_threshold
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        self.freeze_bn()
        self.criterion = FocalLoss()
Exemplo n.º 7
0
    def __init__(self,
                 num_classes,
                 network='efficientdet-d0',
                 D_bifpn=3,
                 W_bifpn=88,
                 D_class=3,
                 is_training=True,
                 threshold=0.5,
                 iou_threshold=0.5):
        super(EfficientDet, self).__init__()
        # self.efficientnet = EfficientNet.from_pretrained(MODEL_MAP[network])
        self.efficientnet = EfficientNet.from_name(
            MODEL_MAP[network], override_params={'num_classes': num_classes})
        self.is_training = is_training
        self.BIFPN = BIFPN(
            in_channels=self.efficientnet.get_list_features()[-5:],
            out_channels=W_bifpn,
            stack=D_bifpn,
            num_outs=5)
        self.regressionModel = RegressionModel(W_bifpn)
        self.classificationModel = ClassificationModel(W_bifpn,
                                                       num_classes=num_classes)
        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.threshold = threshold
        self.iou_threshold = iou_threshold

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()