Пример #1
0
    def __init__(self, cfg):
        super().__init__()

        self.cfg = cfg
        self.backbone = construct_backbone(cfg.backbone, cfg.net_in_channels)
        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure DVIS's constructor is called early!
        if cfg.fpn is not None:
            in_channels = cfg.fpn.num_features
        else:
            in_channels = self.backbone.channels[0]

        src_channels = self.backbone.channels
        self.selected_layers = cfg.backbone.selected_layers
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN(cfg, [src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        # The include_last_relu=false here is because we might want to change it to another function
        self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)
Пример #2
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
Пример #3
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src
            self.interpolation_mode = cfg.fpn.interpolation_mode

            if self.proto_src is None:
                in_channels = 3
            elif cfg.fpn is not None:
                in_channels = cfg.fpn.num_features
            else:
                in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        self.pred_scales = cfg.backbone.pred_scales
        self.pred_aspect_ratios = cfg.backbone.pred_aspect_ratios
        self.num_priors = len(self.pred_scales[0])
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])

            if cfg.backbone_C2_as_features:
                self.selected_layers = list(
                    range(1,
                          len(self.selected_layers) + cfg.fpn.num_downsample))
                src_channels = [cfg.fpn.num_features
                                ] * (len(self.selected_layers) + 1)
            else:
                self.selected_layers = list(
                    range(len(self.selected_layers) + cfg.fpn.num_downsample))
                src_channels = [cfg.fpn.num_features] * len(
                    self.selected_layers)

        # prediction layers for loc, conf, mask
        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)  # yolact++
        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent, parent_t = None, None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule_FC(
                src_channels[layer_idx],
                src_channels[layer_idx],
                deform_groups=1,
                pred_aspect_ratios=self.pred_aspect_ratios[idx],
                pred_scales=self.pred_scales[idx],
                parent=parent)

            self.prediction_layers.append(pred)

        # parameters in temporal correlation net
        if cfg.temporal_fusion_module:
            corr_channels = 2 * in_channels + cfg.correlation_patch_size**2
            self.TemporalNet = TemporalNet(corr_channels, cfg.mask_proto_n)
            self.correlation_selected_layer = cfg.correlation_selected_layer

            # evaluation for frame-level tracking
            self.Detect_TF = Detect_TF(cfg.num_classes,
                                       bkg_label=0,
                                       top_k=cfg.nms_top_k,
                                       conf_thresh=cfg.nms_conf_thresh,
                                       nms_thresh=cfg.nms_thresh)
            self.Track_TF = Track_TF()

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
        self.Track = Track()
Пример #4
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(
            cfg.backbone)  #resnet101_dcn_inter3_backbone

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:  #False
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            #cw yolact_plus default:0
            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3  #cw  0 != None
            elif cfg.fpn is not None:
                in_channels = cfg.fpn.num_features  #cw fpn.num_features -- default:'num_features': 256,
            else:
                in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids  #cw (256 + 0)

            #TODO#Fig. 3 PART
            # The include_last_relu=false here is because we might want to change it to another function
            # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)
            #256        , 6개의 conv및 bilinear
            #cw make_net에 넘기는 cfg.mask_proto_net을 in_channels이 통과하였을 때 마지막 output의 채널을 두번째 인자로 반환하므로.
            #   final in_channels이 cfg.mask_dim이 된다고 보면 되시겠다.

            if cfg.mask_proto_bias:  #False
                cfg.mask_dim += 1
            #   cfg.mask_dim = 32

        self.selected_layers = cfg.backbone.selected_layers  #cw yp -- [1, 2, 3]
        src_channels = self.backbone.channels  #src_channels = [256, 512, 1024, 2048]

        #True #TODO#
        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        # 'fpn': fpn_base.copy({
        #     'use_conv_downsample': True,
        #     'num_downsample': 2,
        # }),

        #TODO#
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers
                            ])  #[512, 1024, 2048] 넘김.
            self.selected_layers = list(
                range(len(self.selected_layers) +
                      cfg.fpn.num_downsample))  #cw range(3 + 2)
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        # src_channels = [256, 256, 256, 256, 256]
        # selected_layers : [0, 1, 2, 3, 4]

        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)  #5 #Prediction Module에서 쓰임.

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            #True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]
            #cw src_channels는 본래 resnet의 layer_idx의 채널수를 가지고 있음.
            #   즉, selected layer에서는 bbox를 prediction하는 것.
            #   call하여 얻은 pred는 prediction_layers에 추가. (selected_layers 수만큼 생성)
            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent,
                index=idx)
            self.prediction_layers.append(pred)

        #False
        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)
        #True
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
Пример #5
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        #Fusion FPN
        self.fusion_layers = cfg.fusion_layers
        self.fusion_dim = cfg.fusion_dim

        # Compute mask_dim here and add it back to the config.
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function

            if cfg.proto_coordconv:
                in_channels += 2
            elif cfg.fpn_fusion:
                in_channels = self.fusion_dim

            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        if cfg.fpn_fusion is True:
            self.fusion_module = FusionModule(src_channels[0],
                                              self.fusion_layers,
                                              out_dim=self.fusion_dim)

        if cfg.ins_coordconv or cfg.sem_coordconv or cfg.proto_coordconv:
            self.addcoords = AddCoords()

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred_in_ch = src_channels[
                layer_idx] + 2 if cfg.ins_coordconv else src_channels[layer_idx]
            pred = PredictionModule(
                pred_in_ch,
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.cross_attention_fusion:
            self.CALayer = CAModule(src_channels[0], share_conv=False)

        if cfg.use_semantic_segmentation_loss:
            sem_in_ch = None
            if cfg.sem_src_fusion is True:
                sem_in_ch = self.fusion_dim
            elif cfg.sem_lincomb is True:
                sem_in_ch = src_channels[0]
            else:  # normal semantic segmentation head
                sem_in_ch = src_channels[-1]

            if cfg.sem_coordconv:
                sem_in_ch += 2

            # Panoptic FPN Fusion Version
            if cfg.sem_src_fusion is True:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch,
                              cfg.stuff_num_classes,
                              kernel_size=(1, 1)))

            elif cfg.sem_lincomb is True:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch, 256, kernel_size=3),
                    # nn.BatchNorm2d(256),
                    nn.GroupNorm(32, 256),
                    nn.ReLU(True),
                    nn.Conv2d(256, (cfg.stuff_num_classes) * cfg.mask_dim,
                              kernel_size=1),
                    nn.Tanh())
            else:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch,
                              cfg.stuff_num_classes,
                              kernel_size=(1, 1)))

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
Пример #6
0
    def __init__(self):
        super().__init__()

        # yolac++ cfg.backbone =
        # 'backbone': resnet101_dcn_inter3_backbone.copy({
        #     'selected_layers': list(range(1, 4)),
        #
        #     'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5,
        #     'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]],
        #     'use_pixel_scales': True,
        #     'preapply_sqrt': False,
        #     'use_square_anchors': False,
        # })
        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
                            # 16^2 = 256 ??
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:

            # mask_proto_use_grid ALWAYS false ??
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            # yolact use 0
            self.proto_src = cfg.mask_proto_src
            
            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]

            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            # yolact ++ proto net
            #     'mask_proto_net': [(256, 3, {'padding': 1})] * 3
            #     + [(None, -2, {}), (256, 3, {'padding': 1})]
            #     + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        ## end of mask type if else ______________________________________________]

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN(
                # yolact++ 101 selected layers = 1,2,3
                # 2nd  128x4
                # 3rd  256x4
                # 4th  512x4
                [src_channels[i] for i in self.selected_layers]
            )

            self.selected_layers = list( # selected_layers = 0,1,2,3,4
                range(
                    # yolact++
                    # 1 , 2 , 3                               2
                    len(self.selected_layers) + cfg.fpn.num_downsample)
            )

                            # num features = 256  x  5
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)


        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers) # --> 5 num_heads ??

        # sooo... is this making 5 prediction modules ????
        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None

            # yolact++ share_prediction_module always True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                                    # in_channels=
                                    src_channels[layer_idx],
                                    # out_channels=
                                    src_channels[layer_idx],
                                    # 'pred_scales': [[1]] * 6
                                    #  'pred_aspect_ratios': [[[0.66685089, 1.7073535, 0.87508774, 1.16524493,
                                    #                            0.49059086]]] * 6
                                    aspect_ratios = cfg.backbone.pred_aspect_ratios[idx],
                                    scales        = cfg.backbone.pred_scales[idx],
                                    parent        = parent,
                                    index         = idx)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        # always False ??
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1)

        # yolact always True ??
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,     #'nms_top_k': 200,
                             conf_thresh=cfg.nms_conf_thresh,     #'nms_conf_thresh': 0.05
                             nms_thresh=cfg.nms_thresh      #'nms_thresh': 0.5
                             )
Пример #7
0
    def __init__(self):
        #super:;call the based-class' init func
        super().__init__()
        print('net initial...\n')

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()


        ##get:: self.proto_net, cfg.mask_dim
        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
            #cfg.mask_proto_grid_file : data/grid.npy , npy is a numpy data file
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            #0
            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            #cfg.fpn is obj
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        #        self.proto_net
        #        Sequential(
        #          (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (1): ReLU(inplace)
        #          (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (3): ReLU(inplace)
        #          (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (5): ReLU(inplace)
        #          (6): InterpolateModule()
        #          (7): ReLU(inplace)
        #          (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (9): ReLU(inplace)
        #          (10): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1))
        #)
        #
        #        self.fpn
        #        FPN(
        #          (lat_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #            (2): WeakScriptModuleProxy()
        #          )
        #          (pred_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #            (2): WeakScriptModuleProxy()
        #          )
        #          (downsample_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #          )
        #        )

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(src_channels[layer_idx], 
                                src_channels[layer_idx],
                                aspect_ratios = cfg.backbone.pred_aspect_ratios[idx],
                                scales        = cfg.backbone.pred_scales[idx],
                                parent        = parent)
                                
            self.prediction_layers.append(pred)

        #False
        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1)
        
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.2, nms_thresh=0.5)
        self.tmp = 1