예제 #1
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = jt.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.shape[0]
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent,
                index=idx)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv(src_channels[0],
                                             cfg.num_classes - 1,
                                             kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
예제 #2
0
    def __init__(self,
                 in_channels,
                 out_channels=1024,
                 aspect_ratios=[[1]],
                 scales=[1],
                 parent=None,
                 index=0):
        super().__init__()

        self.num_classes = cfg.num_classes
        self.mask_dim = cfg.mask_dim  # Defined by Yolact
        self.num_priors = sum(len(x) * len(scales) for x in aspect_ratios)
        self.parent = [parent]  # Don't include this in the state dict
        self.index = index
        self.num_heads = cfg.num_heads  # Defined by Yolact

        if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb:
            self.mask_dim = self.mask_dim // self.num_heads

        if cfg.mask_proto_prototypes_as_features:
            in_channels += self.mask_dim

        if parent is None:
            if cfg.extra_head_net is None:
                out_channels = in_channels
            else:
                self.upfeature, out_channels = make_net(
                    in_channels, cfg.extra_head_net)

            if cfg.use_prediction_module:
                self.block = Bottleneck(out_channels, out_channels // 4)
                self.conv = nn.Conv(out_channels,
                                    out_channels,
                                    kernel_size=1,
                                    bias=True)
                self.bn = nn.BatchNorm(out_channels)

            self.bbox_layer = nn.Conv(out_channels, self.num_priors * 4,
                                      **cfg.head_layer_params)
            self.conf_layer = nn.Conv(out_channels,
                                      self.num_priors * self.num_classes,
                                      **cfg.head_layer_params)
            self.mask_layer = nn.Conv(out_channels,
                                      self.num_priors * self.mask_dim,
                                      **cfg.head_layer_params)

            if cfg.use_mask_scoring:
                self.score_layer = nn.Conv(out_channels, self.num_priors,
                                           **cfg.head_layer_params)

            if cfg.use_instance_coeff:
                self.inst_layer = nn.Conv(
                    out_channels, self.num_priors * cfg.num_instance_coeffs,
                    **cfg.head_layer_params)

            # What is this ugly lambda doing in the middle of all this clean prediction module code?
            def make_extra(num_layers):
                if num_layers == 0:
                    return lambda x: x
                else:
                    # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu
                    return nn.Sequential(*sum([[
                        nn.Conv(out_channels,
                                out_channels,
                                kernel_size=3,
                                padding=1),
                        nn.ReLU()
                    ] for _ in range(num_layers)], []))

            self.bbox_extra, self.conf_extra, self.mask_extra = [
                make_extra(x) for x in cfg.extra_layers
            ]

            if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate:
                self.gate_layer = nn.Conv(out_channels,
                                          self.num_priors * self.mask_dim,
                                          kernel_size=3,
                                          padding=1)

        self.aspect_ratios = aspect_ratios
        self.scales = scales

        self.priors = None
        self.last_conv_size = None
        self.last_img_size = None
예제 #3
0
 def __init__(self):
     super().__init__()
     input_channels = 1
     last_layer = [(cfg.num_classes-1, 1, {})]
     self.maskiou_net, _ = make_net(input_channels, cfg.maskiou_net + last_layer, include_last_relu=True)
예제 #4
0
    def __init__(self,
                 in_channels,
                 out_channels=1024,
                 aspect_ratios=[[1]],
                 scales=[1],
                 parent=None,
                 index=0):
        #               256,         256,               [[1, 1/2, 2]],      [24*2**0 , 24*2**(1/3) , 24*2**(2/3) ], parent, 0
        #               256,         256,               [[1, 1/2, 2]],      [48*2**0 , 48*2**(1/3) , 48*2**(2/3) ], parent, 1
        #               256,         256,               [[1, 1/2, 2]],      [96*2**0 , 96*2**(1/3) , 96*2**(2/3) ], parent, 2
        #               256,         256,               [[1, 1/2, 2]],      [192*2**0, 192*2**(1/3), 192*2**(2/3)], parent, 3
        #               256,         256,               [[1, 1/2, 2]],      [384*2**0, 384*2**(1/3), 384*2**(2/3)], parent, 4

        # 2**0 = 0,  2**(1/3) = 1.26,  2**(2/3) = 1.587
        # 좀 더 균일한 스케일을 뽑아낼 수 있도록 만든 장치로 보임.
        super().__init__()
        self.num_classes = cfg.num_classes  # len(coco2017_dataset.class_names) + 1,
        self.mask_dim = cfg.mask_dim  # Defined by Yolact #fpn -> 32
        self.num_priors = sum(len(x) * len(scales)
                              for x in aspect_ratios)  # =9
        self.parent = [parent]  # Don't include this in the state dict
        self.index = index
        self.num_heads = cfg.num_heads  # Defined by Yolact -> 5

        #False
        if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb:
            self.mask_dim = self.mask_dim // self.num_heads

        #False
        if cfg.mask_proto_prototypes_as_features:
            in_channels += self.mask_dim

        #처음 pred_layer는 None.
        if parent is None:
            if cfg.extra_head_net is None:
                out_channels = in_channels
            else:  # 'extra_head_net': [(256, 3, {'padding': 1})],
                self.upfeature, out_channels = make_net(
                    in_channels,
                    cfg.extra_head_net)  # 차원 수 변화없음. conv2d(f=3, p=1)
                # out_channels = 256.

            #False
            if cfg.use_prediction_module:
                self.block = Bottleneck(out_channels, out_channels // 4)
                self.conv = nn.Conv2d(out_channels,
                                      out_channels,
                                      kernel_size=1,
                                      bias=True)
                self.bn = nn.BatchNorm2d(out_channels)

            #cfg  'head_layer_params': {'kernel_size': 3, 'padding': 1},
            self.bbox_layer = nn.Conv2d(
                out_channels, self.num_priors * 4,
                **cfg.head_layer_params)  #out channel:(9*4)
            self.conf_layer = nn.Conv2d(
                out_channels, self.num_priors * self.num_classes,
                **cfg.head_layer_params)  #out channel:(9*c)
            self.mask_layer = nn.Conv2d(
                out_channels, self.num_priors * self.mask_dim,
                **cfg.head_layer_params)  #out channel:(9*32) - coefficient

            # False
            if cfg.use_mask_scoring:
                self.score_layer = nn.Conv2d(out_channels, self.num_priors,
                                             **cfg.head_layer_params)

            # False // 'num_instance_coeffs': 64, -> But, No used
            #   bbox IoU가 아니라 coefficient로 loss함수를 내고 싶을 때 사용.
            if cfg.use_instance_coeff:
                self.inst_layer = nn.Conv2d(
                    out_channels, self.num_priors * cfg.num_instance_coeffs,
                    **cfg.head_layer_params)

            # What is this ugly lambda doing in the middle of all this clean prediction module code?
            def make_extra(num_layers):
                if num_layers == 0:
                    return lambda x: x
                else:
                    # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu
                    return nn.Sequential(*sum([[
                        nn.Conv2d(out_channels,
                                  out_channels,
                                  kernel_size=3,
                                  padding=1),
                        nn.ReLU(inplace=True)
                    ] for _ in range(num_layers)], []))

            # # Add extra layers between the backbone and the network heads
            # # The order is (bbox, conf, mask)
            # 'extra_layers': (0, 0, 0), -> #cw 즉, default설정으로는 아무것도 바뀌지 않는다.
            self.bbox_extra, self.conf_extra, self.mask_extra = [
                make_extra(x) for x in cfg.extra_layers
            ]

            if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate:  #cw  True and False.
                self.gate_layer = nn.Conv2d(out_channels,
                                            self.num_priors * self.mask_dim,
                                            kernel_size=3,
                                            padding=1)

        self.aspect_ratios = aspect_ratios
        self.scales = scales

        self.priors = None
        self.last_conv_size = None
        self.last_img_size = None
예제 #5
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(
            cfg.backbone)  #resnet101_dcn_inter3_backbone

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:  #False
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            #cw yolact_plus default:0
            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3  #cw  0 != None
            elif cfg.fpn is not None:
                in_channels = cfg.fpn.num_features  #cw fpn.num_features -- default:'num_features': 256,
            else:
                in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids  #cw (256 + 0)

            #TODO#Fig. 3 PART
            # The include_last_relu=false here is because we might want to change it to another function
            # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)
            #256        , 6개의 conv및 bilinear
            #cw make_net에 넘기는 cfg.mask_proto_net을 in_channels이 통과하였을 때 마지막 output의 채널을 두번째 인자로 반환하므로.
            #   final in_channels이 cfg.mask_dim이 된다고 보면 되시겠다.

            if cfg.mask_proto_bias:  #False
                cfg.mask_dim += 1
            #   cfg.mask_dim = 32

        self.selected_layers = cfg.backbone.selected_layers  #cw yp -- [1, 2, 3]
        src_channels = self.backbone.channels  #src_channels = [256, 512, 1024, 2048]

        #True #TODO#
        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        # 'fpn': fpn_base.copy({
        #     'use_conv_downsample': True,
        #     'num_downsample': 2,
        # }),

        #TODO#
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers
                            ])  #[512, 1024, 2048] 넘김.
            self.selected_layers = list(
                range(len(self.selected_layers) +
                      cfg.fpn.num_downsample))  #cw range(3 + 2)
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        # src_channels = [256, 256, 256, 256, 256]
        # selected_layers : [0, 1, 2, 3, 4]

        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)  #5 #Prediction Module에서 쓰임.

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            #True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]
            #cw src_channels는 본래 resnet의 layer_idx의 채널수를 가지고 있음.
            #   즉, selected layer에서는 bbox를 prediction하는 것.
            #   call하여 얻은 pred는 prediction_layers에 추가. (selected_layers 수만큼 생성)
            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent,
                index=idx)
            self.prediction_layers.append(pred)

        #False
        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)
        #True
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
예제 #6
0
    def __init__(self, in_channels, out_channels=1024, aspect_ratios=[[1]], scales=[1], parent=None, index=0):
        super().__init__()

        self.num_classes = cfg.num_classes
        self.mask_dim    = cfg.mask_dim # Defined by Yolact

        # for yolact num_priors = 36 ??
        self.num_priors  = sum( len(x)*len(scales) for x in aspect_ratios )

        self.parent      = [parent] # Don't include this in the state dict
        self.index       = index
        self.num_heads   = cfg.num_heads # Defined by Yolact

        # 'mask_proto_split_prototypes_by_head': False for coco yolact
        if cfg.mask_proto_split_prototypes_by_head and cfg.mask_type == mask_type.lincomb:
            self.mask_dim = self.mask_dim // self.num_heads

        # mask_proto_prototypes_as_features False for coco yolact
        if cfg.mask_proto_prototypes_as_features:
            in_channels += self.mask_dim

        ### ________________________________ making prediction head ____________________________________________________
        if parent is None:
            # yolact     'extra_head_net': [(256, 3, {'padding': 1})]
            if cfg.extra_head_net is None:
                out_channels = in_channels
            else:
                self.upfeature, out_channels = make_net(in_channels, cfg.extra_head_net)

            # 'use_prediction_module': False,
            if cfg.use_prediction_module:
                self.block = Bottleneck(out_channels, out_channels // 4)
                self.conv = nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=True)
                self.bn = nn.BatchNorm2d(out_channels)


            # 'head_layer_params': {'kernel_size': 3, 'padding': 1}
            # 36 x 4 = 144
            self.bbox_layer = nn.Conv2d(out_channels, self.num_priors * 4,                **cfg.head_layer_params)
            # 36 x 81 (for coco) = 2916
            self.conf_layer = nn.Conv2d(out_channels, self.num_priors * self.num_classes, **cfg.head_layer_params)
            # mask_dim = 256 ??
            self.mask_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim,    **cfg.head_layer_params)

            # 'use_mask_scoring': False,
            if cfg.use_mask_scoring:
                self.score_layer = nn.Conv2d(out_channels, self.num_priors, **cfg.head_layer_params)

            # 'use_instance_coeff': False,
            if cfg.use_instance_coeff:
                self.inst_layer = nn.Conv2d(out_channels, self.num_priors * cfg.num_instance_coeffs, **cfg.head_layer_params)
            
            # What is this ugly lambda doing in the middle of all this clean prediction module code?
            def make_extra(num_layers):
                if num_layers == 0:
                    return lambda x: x
                else:
                    # Looks more complicated than it is. This just creates an array of num_layers alternating conv-relu
                    return nn.Sequential(*sum([[
                        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                        nn.ReLU(inplace=True)
                    ] for _ in range(num_layers)], []))

            # 'extra_layers': (0, 0, 0), so these layers basically doing nothing
            self.bbox_extra, self.conf_extra, self.mask_extra = [make_extra(x) for x in cfg.extra_layers]

            # 'mask_proto_coeff_gate': False, so ... doing nothing
            if cfg.mask_type == mask_type.lincomb and cfg.mask_proto_coeff_gate:
                self.gate_layer = nn.Conv2d(out_channels, self.num_priors * self.mask_dim, kernel_size=3, padding=1)
        ### ____________________________________________________________________________________________________________
        self.aspect_ratios = aspect_ratios
        self.scales = scales

        self.priors = None
        self.last_conv_size = None  ## only used for debugging ????
        self.last_img_size = None
예제 #7
0
    def __init__(self):
        super().__init__()

        # yolac++ cfg.backbone =
        # 'backbone': resnet101_dcn_inter3_backbone.copy({
        #     'selected_layers': list(range(1, 4)),
        #
        #     'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5,
        #     'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]],
        #     'use_pixel_scales': True,
        #     'preapply_sqrt': False,
        #     'use_square_anchors': False,
        # })
        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
                            # 16^2 = 256 ??
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:

            # mask_proto_use_grid ALWAYS false ??
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            # yolact use 0
            self.proto_src = cfg.mask_proto_src
            
            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]

            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            # yolact ++ proto net
            #     'mask_proto_net': [(256, 3, {'padding': 1})] * 3
            #     + [(None, -2, {}), (256, 3, {'padding': 1})]
            #     + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        ## end of mask type if else ______________________________________________]

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN(
                # yolact++ 101 selected layers = 1,2,3
                # 2nd  128x4
                # 3rd  256x4
                # 4th  512x4
                [src_channels[i] for i in self.selected_layers]
            )

            self.selected_layers = list( # selected_layers = 0,1,2,3,4
                range(
                    # yolact++
                    # 1 , 2 , 3                               2
                    len(self.selected_layers) + cfg.fpn.num_downsample)
            )

                            # num features = 256  x  5
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)


        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers) # --> 5 num_heads ??

        # sooo... is this making 5 prediction modules ????
        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None

            # yolact++ share_prediction_module always True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                                    # in_channels=
                                    src_channels[layer_idx],
                                    # out_channels=
                                    src_channels[layer_idx],
                                    # 'pred_scales': [[1]] * 6
                                    #  'pred_aspect_ratios': [[[0.66685089, 1.7073535, 0.87508774, 1.16524493,
                                    #                            0.49059086]]] * 6
                                    aspect_ratios = cfg.backbone.pred_aspect_ratios[idx],
                                    scales        = cfg.backbone.pred_scales[idx],
                                    parent        = parent,
                                    index         = idx)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        # always False ??
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1)

        # yolact always True ??
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,     #'nms_top_k': 200,
                             conf_thresh=cfg.nms_conf_thresh,     #'nms_conf_thresh': 0.05
                             nms_thresh=cfg.nms_thresh      #'nms_thresh': 0.5
                             )