Пример #1
0
 def __init__(self,
              encoder: nn.Module,
              n_classes,
              final_bias=0.,
              chs=256,
              n_anchors=9,
              flatten=True):
     super().__init__()
     self.n_classes, self.flatten = n_classes, flatten
     imsize = (256, 256)
     sfs_szs = model_sizes(encoder, size=imsize)
     sfs_idxs = list(reversed(_get_sz_change_idxs(sfs_szs)))
     self.sfs = hook_outputs([encoder[i] for i in sfs_idxs])
     self.encoder = encoder
     self.c5top5 = conv2d(sfs_szs[-1][1], chs, ks=1, bias=True)
     self.c5top6 = conv2d(sfs_szs[-1][1], chs, stride=2, bias=True)
     self.p6top7 = nn.Sequential(nn.ReLU(),
                                 conv2d(chs, chs, stride=2, bias=True))
     self.merges = nn.ModuleList([
         LateralUpsampleMerge(chs, sfs_szs[idx][1], hook)
         for idx, hook in zip(sfs_idxs[-2:-4:-1], self.sfs[-2:-4:-1])
     ])
     self.smoothers = nn.ModuleList(
         [conv2d(chs, chs, 3, bias=True) for _ in range(3)])
     self.classifier = self._head_subnet(n_classes,
                                         n_anchors,
                                         final_bias,
                                         chs=chs)
     self.box_regressor = self._head_subnet(4, n_anchors, 0., chs=chs)
    def __init__(self,
                 encoder,
                 n_classes,
                 img_size,
                 blur=False,
                 blur_final=True,
                 self_attention=False,
                 y_range=None,
                 bottle=False,
                 act_cls=defaults.activation,
                 init=nn.init.kaiming_normal_,
                 norm_type=None,
                 include_encoder=True,
                 include_middle_conv=True,
                 **kwargs):
        imsize = img_size
        sizes = model_sizes(encoder, size=imsize)
        sz_chg_idxs = list(reversed(_get_sz_change_idxs(sizes)))
        # self.sfs = hook_outputs([encoder[i] for i in sz_chg_idxs], detach=False)
        x = dummy_eval(encoder, imsize).detach()

        layers = []
        if include_encoder:
            layers.append(encoder)

        if include_middle_conv:
            ni = sizes[-1][1]
            middle_conv = (nn.Sequential(
                ConvLayer(ni,
                          ni * 2,
                          act_cls=act_cls,
                          norm_type=norm_type,
                          **kwargs),
                ConvLayer(ni * 2,
                          ni,
                          act_cls=act_cls,
                          norm_type=norm_type,
                          **kwargs))).eval()
            x = middle_conv(x)
            layers += [BatchNorm(ni), nn.ReLU(), middle_conv]

        for i, idx in enumerate(sz_chg_idxs):
            not_final = (i != len(sz_chg_idxs) - 1)
            up_in_c = int(x.shape[1])
            do_blur = blur and (not_final or blur_final)
            sa = self_attention and (i == len(sz_chg_idxs) - 3)
            noskip_unet_block = NoSkipUnetBlock(up_in_c,
                                                final_div=not_final,
                                                blur=do_blur,
                                                self_attention=sa,
                                                act_cls=act_cls,
                                                init=init,
                                                norm_type=norm_type,
                                                **kwargs).eval()
            layers.append(noskip_unet_block)
            x = noskip_unet_block(x)

        ni = x.shape[1]
        if imsize != sizes[0][-2:]:
            layers.append(
                PixelShuffle_ICNR(ni, act_cls=act_cls, norm_type=norm_type))

        layers += [
            ConvLayer(ni,
                      n_classes,
                      ks=1,
                      act_cls=None,
                      norm_type=norm_type,
                      **kwargs)
        ]

        if include_middle_conv:
            apply_init(nn.Sequential(layers[3], layers[-2]), init)
            apply_init(nn.Sequential(layers[2]), init)

        if y_range is not None:
            layers.append(SigmoidRange(*y_range))
        super().__init__(*layers)
    def __init__(self,
                 arch=resnet50,
                 n_classes=32,
                 img_size=(96, 128),
                 blur=False,
                 blur_final=True,
                 y_range=None,
                 last_cross=True,
                 bottle=False,
                 init=nn.init.kaiming_normal_,
                 norm_type=None,
                 self_attention=None,
                 act_cls=defaults.activation,
                 n_in=3,
                 cut=None,
                 **kwargs):
        meta = model_meta.get(arch, _default_meta)
        encoder = create_body(arch,
                              n_in,
                              pretrained=False,
                              cut=ifnone(cut, meta["cut"]))
        imsize = img_size

        sizes = model_sizes(encoder, size=imsize)
        sz_chg_idxs = list(reversed(_get_sz_change_idxs(sizes)))
        self.sfs = hook_outputs([encoder[i] for i in sz_chg_idxs],
                                detach=False)
        x = dummy_eval(encoder, imsize).detach()

        ni = sizes[-1][1]
        middle_conv = nn.Sequential(
            ConvLayer(ni,
                      ni * 2,
                      act_cls=act_cls,
                      norm_type=norm_type,
                      **kwargs),
            ConvLayer(ni * 2,
                      ni,
                      act_cls=act_cls,
                      norm_type=norm_type,
                      **kwargs),
        ).eval()
        x = middle_conv(x)
        layers = [encoder, BatchNorm(ni), nn.ReLU(), middle_conv]

        for i, idx in enumerate(sz_chg_idxs):
            not_final = i != len(sz_chg_idxs) - 1
            up_in_c, x_in_c = int(x.shape[1]), int(sizes[idx][1])
            do_blur = blur and (not_final or blur_final)
            sa = self_attention and (i == len(sz_chg_idxs) - 3)
            unet_block = UnetBlock(up_in_c,
                                   x_in_c,
                                   self.sfs[i],
                                   final_div=not_final,
                                   blur=do_blur,
                                   self_attention=sa,
                                   act_cls=act_cls,
                                   init=init,
                                   norm_type=norm_type,
                                   **kwargs).eval()
            layers.append(unet_block)
            x = unet_block(x)

        ni = x.shape[1]
        if imsize != sizes[0][-2:]:
            layers.append(
                PixelShuffle_ICNR(ni, act_cls=act_cls, norm_type=norm_type))
        layers.append(ResizeToOrig())
        if last_cross:
            layers.append(MergeLayer(dense=True))
            ni += in_channels(encoder)
            layers.append(
                ResBlock(1,
                         ni,
                         ni // 2 if bottle else ni,
                         act_cls=act_cls,
                         norm_type=norm_type,
                         **kwargs))
        layers += [
            ConvLayer(ni,
                      n_classes,
                      ks=1,
                      act_cls=None,
                      norm_type=norm_type,
                      **kwargs)
        ]
        apply_init(nn.Sequential(layers[3], layers[-2]), init)
        # apply_init(nn.Sequential(layers[2]), init)
        if y_range is not None:
            layers.append(SigmoidRange(*y_range))
        super().__init__(*layers)