Пример #1
0
def test_abn_repr():
    """Checks that activation param is present in repr"""
    l = modules.bn_from_name("frozen_abn")(10)
    expected = "ABN(10, eps=1e-05, momentum=0.1, affine=True, activation=ACT.LEAKY_RELU[0.01], frozen=True)"
    assert repr(l) == expected

    l2 = modules.bn_from_name("estimated_abn")(10, activation="relu")
    expected2 = "ABN(10, eps=1e-05, momentum=0.1, affine=True, activation=ACT.RELU, estimated_stats=True)"
    assert repr(l2) == expected2
Пример #2
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        num_classes=1,
        drop_rate=0,
        decoder_attention_type=None,
        encoder_norm_layer="abn",
        encoder_norm_act="relu",
        decoder_norm_layer="abn",
        decoder_norm_act="relu",
        **encoder_params,
    ):
        encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )

        decoder = LinknetDecoder(
            encoder_channels=encoder.out_shapes,
            prefinal_channels=32,
            final_channels=num_classes,
            drop_rate=drop_rate,
            attn_type=decoder_attention_type,
            norm_layer=bn_from_name(decoder_norm_layer),
            norm_act=decoder_norm_act,
        )

        super().__init__(encoder, decoder)

        self.name = f"link-{encoder_name}"
Пример #3
0
    def __init__(
        self,
        layers,
        pretrained=None,  # not used. here for proper signature.
        num_classes=1000,
        in_channels=3,
        encoder=False,
        antialias=False,
        norm_layer="abn",
        norm_act="relu",
    ):

        super(VGG, self).__init__()
        self.in_channels = in_channels
        self.norm_act = norm_act
        self.norm_layer = bn_from_name(norm_layer)
        self.encoder = encoder
        self.antialias = antialias
        self.features = self._make_layers(layers)
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        if not encoder:
            self.classifier = nn.Sequential(
                nn.Linear(512 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, num_classes),
            )
        else:
            self.forward = self.encoder_features

        initialize(self)
Пример #4
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        decoder_channels=(256, 128, 64, 32, 16),
        num_classes=1,
        center=False,  # usefull for VGG models
        drop_rate=0,
        norm_layer="abn",
        norm_act="relu",
        **encoder_params,
    ):
        encoder = get_encoder(
            encoder_name,
            norm_layer=norm_layer,
            norm_act=norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        decoder = UnetDecoder(
            encoder_channels=encoder.out_shapes,
            decoder_channels=decoder_channels,
            final_channels=num_classes,
            center=center,
            drop_rate=drop_rate,
            norm_layer=bn_from_name(norm_layer),
            norm_act=norm_act,
        )

        super().__init__(encoder, decoder)
        self.name = f"u-{encoder_name}"
Пример #5
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        num_classes=1,
        norm_layer="abn",
        norm_act="relu",
        **encoder_params,
    ):
        encoder = get_encoder(
            encoder_name,
            norm_layer=norm_layer,
            norm_act=norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )

        decoder = LinknetDecoder(
            encoder_channels=encoder.out_shapes,
            prefinal_channels=32,
            final_channels=num_classes,
            norm_layer=bn_from_name(norm_layer),
            norm_act=norm_act,
        )

        super().__init__(encoder, decoder)

        self.name = f"link-{encoder_name}"
Пример #6
0
    def __init__(
        self,
        pretrained="coco",  # not used here for proper signature
        encoder_name="resnet50",
        encoder_weights="imagenet",
        pyramid_channels=256,
        num_classes=80,
        # drop_connect_rate=0, # TODO: add
        encoder_norm_layer="abn",
        encoder_norm_act="relu",
        decoder_norm_layer="none",  # None by default to match detectron & mmdet versions
        decoder_norm_act="relu",
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(decoder_norm_layer)
        self.pyramid6 = nn.Sequential(
            conv3x3(self.encoder.out_shapes[0], pyramid_channels, 2,
                    bias=True),
            norm_layer(pyramid_channels, activation="identity"),
        )
        self.pyramid7 = nn.Sequential(
            conv3x3(pyramid_channels, pyramid_channels, 2, bias=True),
            norm_layer(pyramid_channels, activation="identity"),
        )
        self.fpn = FPN(self.encoder.out_shapes[:-2],
                       pyramid_channels=pyramid_channels)

        def make_final_convs():
            layers = []
            for _ in range(4):
                layers += [
                    conv3x3(pyramid_channels, pyramid_channels, bias=True)
                ]
                # Norm here is fine for GroupNorm but for BN it should be implemented the other way
                # see EffDet for example. Maybe need to change this implementation to align with EffDet
                layers += [
                    norm_layer(pyramid_channels, activation=decoder_norm_act)
                ]
            return nn.Sequential(*layers)

        anchors_per_location = 9
        self.cls_convs = make_final_convs()
        self.cls_head_conv = conv3x3(pyramid_channels,
                                     num_classes * anchors_per_location,
                                     bias=True)
        self.box_convs = make_final_convs()
        self.box_head_conv = conv3x3(pyramid_channels,
                                     4 * anchors_per_location,
                                     bias=True)
        self.num_classes = num_classes
        self._initialize_weights()
Пример #7
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        pyramid_channels=256,
        num_fpn_layers=1,
        segmentation_channels=128,
        num_classes=1,
        merge_policy="add",
        last_upsample=True,
        output_stride=32,
        drop_rate=0,
        norm_layer="abn",
        norm_act="relu",
        **encoder_params,
    ):
        super().__init__()
        if output_stride != 32:
            encoder_params["output_stride"] = output_stride
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=norm_layer,
            norm_act=norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )

        bn_args = {
            "norm_layer": bn_from_name(norm_layer),
            "norm_act": norm_act
        }

        self.fpn = self.__class__.FEATURE_PYRAMID(
            self.encoder.
            out_shapes[:-1],  # only want features from 1/4 to 1/32
            pyramid_channels=pyramid_channels,
            num_layers=num_fpn_layers,
            output_stride=output_stride,
            **bn_args,
        )

        self.decoder = PanopticDecoder(
            pyramid_channels=pyramid_channels,
            segmentation_channels=segmentation_channels,
            merge_policy=merge_policy,
            upsamples=[2, 2, 1, 0] if output_stride == 16 else [3, 2, 1, 0],
            **bn_args,
        )
        if merge_policy == "cat":
            segmentation_channels *= 4

        self.dropout = nn.Dropout2d(drop_rate, inplace=True)
        self.segm_head = conv1x1(segmentation_channels, num_classes)
        self.upsample = nn.Upsample(
            scale_factor=4,
            mode="bilinear") if last_upsample else nn.Identity()
        self.name = f"segm-fpn-{encoder_name}"
Пример #8
0
def test_frozen_abn():
    l = modules.bn_from_name("frozen_abn")(10)
    assert list(l.parameters()) == []
    l = modules.ABN(10, frozen=True)
    assert list(l.parameters()) == []
    # check that passing tensor through frozen ABN won't update stats
    running_mean_original = l.running_mean.clone()
    running_var_original = l.running_var.clone()
    l(INP)
    assert torch.allclose(running_mean_original, l.running_mean)
    assert torch.allclose(running_var_original, l.running_var)
Пример #9
0
    def __init__(
        self,
        encoder_name="efficientnet_b0",
        encoder_weights="imagenet",
        pyramid_channels=128,
        head_channels=256,
        num_classes=1,
        last_upsample=True,
        encoder_norm_layer="abn",
        encoder_norm_act="swish",
        decoder_norm_layer="abn",
        decoder_norm_act="swish",
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(decoder_norm_layer)
        bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act)

        self.bifpn = BiFPN(
            # pass P2-P5
            encoder_channels=self.encoder.out_shapes[:-1],
            pyramid_channels=pyramid_channels,
            num_layers=3,  # hardcode num_fpn_layers=3
            **bn_args,
        )

        self.cls_head_conv = nn.Sequential(
            DepthwiseSeparableConv(pyramid_channels, head_channels, **bn_args),
            DepthwiseSeparableConv(head_channels, head_channels, **bn_args),
            DepthwiseSeparableConv(head_channels, num_classes, use_norm=False),
        )

        self.upsample = nn.Upsample(
            scale_factor=4,
            mode="bilinear") if last_upsample else nn.Identity()

        self.num_classes = num_classes

        patch_bn_mom(self, 0.01)
        # set last layer bias for better convergence with sigmoid loss
        # -4.59 = -np.log((1 - 0.01) / 0.01)
        nn.init.constant_(self.cls_head_conv[-1][1].bias, -4.59)
Пример #10
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        pyramid_channels=256,
        num_classes=80,
        norm_layer="abn",
        norm_act="relu",
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=norm_layer,
            norm_act=norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(norm_layer)
        self.pyramid6 = conv3x3(256, 256, 2, bias=True)
        self.pyramid7 = conv3x3(256, 256, 2, bias=True)
        self.fpn = FPN(
            self.encoder.out_shapes[:-2],
            pyramid_channels=pyramid_channels,
        )

        def make_head(out_size):
            layers = []
            for _ in range(4):
                # some implementations don't use BN here but I think it's needed
                # TODO: test how it affects results
                layers += [
                    nn.Conv2d(256, 256, 3, padding=1),
                    norm_layer(256, activation=norm_act)
                ]
                # layers += [nn.Conv2d(256, 256, 3, padding=1), nn.ReLU()]

            layers += [nn.Conv2d(256, out_size, 3, padding=1)]
            return nn.Sequential(*layers)

        self.ratios = [1.0, 2.0, 0.5]
        self.scales = [4 * 2**(i / 3) for i in range(3)]
        anchors = len(self.ratios) * len(self.scales)  # 9

        self.cls_head = make_head(num_classes * anchors)
        self.box_head = make_head(4 * anchors)
Пример #11
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        decoder_channels=(256, 128, 64, 32, 16),
        num_classes=1,
        center=False,  # usefull for VGG models
        output_stride=32,
        drop_rate=0,
        decoder_attention_type=None,
        encoder_norm_layer="abn",
        encoder_norm_act="relu",
        decoder_norm_layer="abn",
        decoder_norm_act="relu",
        sigmoid_init=True,
        **encoder_params,
    ):
        if output_stride != 32:
            encoder_params["output_stride"] = output_stride
        encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        decoder = UnetDecoder(
            encoder_channels=encoder.out_shapes,
            decoder_channels=decoder_channels,
            final_channels=num_classes,
            center=center,
            drop_rate=drop_rate,
            output_stride=output_stride,
            attn_type=decoder_attention_type,
            norm_layer=bn_from_name(decoder_norm_layer),
            norm_act=decoder_norm_act,
        )

        super().__init__(encoder, decoder)
        self.name = f"u-{encoder_name}"
        if sigmoid_init:
            # set last layer bias for better convergence with sigmoid loss
            # -4.59 = -np.log((1 - 0.01) / 0.01)
            nn.init.constant_(self.decoder.final_conv.bias, -4.59)
Пример #12
0
    def __init__(
        self,
        encoder_name="resnet34",
        encoder_weights="imagenet",
        num_classes=1,
        last_upsample=True,
        aspp_dilation_rates=[6, 12, 18],
        output_stride=16,
        drop_rate=0,
        encoder_norm_layer="abn",
        encoder_norm_act="relu",
        decoder_norm_layer="abn",
        decoder_norm_act="relu",
        **encoder_params,
    ):

        encoder = get_encoder(
            encoder_name,
            encoder_weights=encoder_weights,
            output_stride=output_stride,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            **encoder_params,
        )

        decoder = DeepLabHead(
            encoder_channels=encoder.out_shapes,
            num_classes=num_classes,
            dilation_rates=aspp_dilation_rates,
            output_stride=output_stride,
            drop_rate=drop_rate,
            norm_layer=bn_from_name(decoder_norm_layer),
            norm_act=decoder_norm_act,
        )

        super().__init__(encoder, decoder)
        self.upsample = nn.Upsample(
            scale_factor=4,
            mode="bilinear") if last_upsample else nn.Identity()
        self.name = f"deeplabv3plus-{encoder_name}"
Пример #13
0
    def __init__(
        self,
        block=None,
        layers=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        use_se=False,
        groups=1,
        base_width=64,
        deep_stem=False,
        output_stride=32,
        norm_layer="abn",
        norm_act="relu",
        antialias=False,
        encoder=False,
        drop_rate=0.0,
        drop_connect_rate=0.0,
        global_pool="avg",
        init_bn0=True,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = groups
        self.base_width = base_width
        self.block = block
        self.expansion = block.expansion
        self.norm_act = norm_act
        self.block_idx = 0
        self.num_blocks = sum(layers)
        self.drop_connect_rate = drop_connect_rate
        super(ResNet, self).__init__()

        if deep_stem:
            self.conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width // 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width),
            )
        else:
            self.conv1 = nn.Conv2d(in_channels,
                                   stem_width,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)
        self.bn1 = norm_layer(stem_width, activation=norm_act)
        self.maxpool = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=0 if use_se else 1,
            ceil_mode=True if use_se else False,
        )
        if output_stride not in [8, 16, 32]:
            raise ValueError("Output stride should be in [8, 16, 32]")
        if output_stride == 8:
            stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
        elif output_stride == 16:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
        elif output_stride == 32:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1
        largs = dict(use_se=use_se,
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=antialias)
        self.layer1 = self._make_layer(64, layers[0], stride=1, **largs)
        self.layer2 = self._make_layer(128, layers[1], stride=2, **largs)
        self.layer3 = self._make_layer(256,
                                       layers[2],
                                       stride=stride_3,
                                       dilation=dilation_3,
                                       **largs)
        self.layer4 = self._make_layer(512,
                                       layers[3],
                                       stride=stride_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = GlobalPool2d(global_pool)
        self.num_features = 512 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.dropout = nn.Dropout(p=drop_rate, inplace=True)
            self.last_linear = nn.Linear(
                self.num_features * self.global_pool.feat_mult(), num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0)
Пример #14
0
    def __init__(
        self,
        encoder_name="hrnet_w18",
        encoder_weights="imagenet",
        pretrained=None,  # not used 
        num_classes=1,
        last_upsample=True,
        OCR=False,
        drop_rate=0,
        norm_layer="inplace_abn",  # use memory efficient by default
        norm_act="leaky_relu",
        **encoder_params,
    ):

        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            encoder_weights=encoder_weights,
            norm_layer=norm_layer,
            norm_act=norm_act,
            **encoder_params,
        )
        norm_layer = bn_from_name(norm_layer)
        final_channels = sum(self.encoder.out_shapes[:4])

        self.OCR = OCR
        if OCR:
            self.conv3x3 = nn.Sequential(
                conv3x3(final_channels, 512, bias=True),
                norm_layer(512, activation=norm_act),
            )
            self.ocr_gather_head = SpatialOCR_Gather()
            self.ocr_distri_head = SpatialOCR(in_channels=512,
                                              key_channels=256,
                                              out_channels=512,
                                              norm_layer=norm_layer,
                                              norm_act=norm_act)
            self.head = conv1x1(512, num_classes, bias=True)
            self.aux_head = nn.Sequential(  # in OCR first conv is 3x3
                conv3x3(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )
        else:
            self.head = nn.Sequential(
                conv1x1(final_channels, final_channels, bias=True),
                norm_layer(final_channels, activation=norm_act),
                conv1x1(final_channels, num_classes, bias=True),
            )

        up_kwargs = dict(mode="bilinear", align_corners=True)
        self.up_x2 = nn.Upsample(scale_factor=2, **up_kwargs)
        self.up_x4 = nn.Upsample(scale_factor=4, **up_kwargs)
        self.up_x8 = nn.Upsample(scale_factor=8, **up_kwargs)
        self.last_upsample = nn.Upsample(
            scale_factor=4, **up_kwargs) if last_upsample else nn.Identity()
        self.dropout = nn.Dropout2d(
            drop_rate)  # can't use inplace. it would raise a backprop error
        self.name = f"segm-{encoder_name}"
        # use lower momemntum
        patch_bn_mom(self)
        self._init_weights()
Пример #15
0
def test_agn_repr():
    """Checks that repr for AGN includes number of groups"""
    l = modules.bn_from_name("agn")(10, num_groups=2, activation="leaky_relu")
    expected = "AGN(10, num_groups=2, eps=1e-05, affine=True, activation=ACT.LEAKY_RELU[0.01])"
    assert repr(l) == expected
Пример #16
0
    def __init__(
        self,
        block=None,
        layers=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        attn_type=None,
        groups=1,
        base_width=64,
        stem_type="",
        output_stride=32,
        norm_layer="abn",
        norm_act="relu",
        antialias=False,
        encoder=False,
        drop_rate=0.0,
        drop_connect_rate=0.0,
        init_bn0=True,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = groups
        self.base_width = base_width
        self.block = block
        self.expansion = block.expansion
        self.norm_act = norm_act
        self.block_idx = 0
        self.num_blocks = sum(layers)
        self.drop_connect_rate = drop_connect_rate
        super(ResNet, self).__init__()

        # move stem creation in separate function for simplicity
        self._make_stem(stem_type, stem_width, in_channels, norm_layer,
                        norm_act)

        if output_stride not in [8, 16, 32]:
            raise ValueError("Output stride should be in [8, 16, 32]")
        if output_stride == 8:
            stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
        elif output_stride == 16:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
        elif output_stride == 32:
            stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1
        largs = dict(attn_type=attn_type,
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=antialias)
        self.layer1 = self._make_layer(64, layers[0], stride=1, **largs)
        self.layer2 = self._make_layer(128, layers[1], stride=2, **largs)
        self.layer3 = self._make_layer(256,
                                       layers[2],
                                       stride=stride_3,
                                       dilation=dilation_3,
                                       **largs)
        self.layer4 = self._make_layer(512,
                                       layers[3],
                                       stride=stride_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = FastGlobalAvgPool2d()
        self.num_features = 512 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.dropout = nn.Dropout(p=drop_rate, inplace=True)
            self.last_linear = nn.Linear(self.num_features, num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0)
Пример #17
0
def test_estimated_abn():
    """Checks that init works and output is the same in eval mode"""
    est_bn = modules.bn_from_name("estimated_abn")(10).eval()
    bn = modules.bn_from_name("estimated_abn")(10).eval()
    est_bn.load_state_dict(bn.state_dict())
    assert torch.allclose(est_bn(INP), bn(INP))
Пример #18
0
    def __init__(
        self,
        layers=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        width_factor=1.0,
        output_stride=32,
        norm_layer="inplaceabn",
        norm_act="leaky_relu",
        encoder=False,
        drop_rate=0.0,
        drop_connect_rate=0.0,
    ):
        nn.Module.__init__(self)
        stem_width = int(64 * width_factor)
        norm_layer = bn_from_name(norm_layer)
        self.inplanes = stem_width
        self.num_classes = num_classes
        self.groups = 1  # not really used but needed inside _make_layer
        self.base_width = 64  # used inside _make_layer
        self.norm_act = norm_act
        self.block_idx = 0
        self.num_blocks = sum(layers)
        self.drop_connect_rate = drop_connect_rate

        self._make_stem("space2depth", stem_width, in_channels, norm_layer,
                        norm_act)

        if output_stride not in [8, 16, 32]:
            raise ValueError("Output stride should be in [8, 16, 32]")
        # TODO add OS later
        # if output_stride == 8:
        # stride_3, stride_4, dilation_3, dilation_4 = 1, 1, 2, 4
        # elif output_stride == 16:
        # stride_3, stride_4, dilation_3, dilation_4 = 2, 1, 1, 2
        # elif output_stride == 32:
        stride_3, stride_4, dilation_3, dilation_4 = 2, 2, 1, 1

        largs = dict(attn_type="se",
                     norm_layer=norm_layer,
                     norm_act=norm_act,
                     antialias=True)
        self.block = TBasicBlock
        self.expansion = TBasicBlock.expansion
        self.layer1 = self._make_layer(stem_width,
                                       layers[0],
                                       stride=1,
                                       **largs)
        self.layer2 = self._make_layer(stem_width * 2,
                                       layers[1],
                                       stride=2,
                                       **largs)

        self.block = TBottleneck  # first 2 - Basic, last 2 - Bottleneck
        self.expansion = TBottleneck.expansion
        self.layer3 = self._make_layer(stem_width * 4,
                                       layers[2],
                                       stride=stride_3,
                                       dilation=dilation_3,
                                       **largs)
        largs.update(attn_type=None)  # no se in last layer
        self.layer4 = self._make_layer(stem_width * 8,
                                       layers[3],
                                       stride=stride_4,
                                       dilation=dilation_4,
                                       **largs)
        self.global_pool = FastGlobalAvgPool2d(flatten=True)
        self.num_features = stem_width * 8 * self.expansion
        self.encoder = encoder
        if not encoder:
            self.dropout = nn.Dropout(p=drop_rate, inplace=True)
            self.last_linear = nn.Linear(self.num_features, num_classes)
        else:
            self.forward = self.encoder_features

        self._initialize_weights(init_bn0=True)
Пример #19
0
def test_frozen_abn():
    l = modules.bn_from_name("frozen_abn")(10)
    assert list(l.parameters()) == []
    l = modules.ABN(10, frozen=True)
    assert list(l.parameters()) == []
Пример #20
0
def test_abcn():
    """Check that abcn init and forward works"""
    l = modules.bn_from_name("abcn")(10, num_groups=2)
Пример #21
0
    def __init__(
        self,
        pretrained="coco",  # Not used. here for proper signature
        encoder_name="efficientnet_b0",
        encoder_weights="imagenet",
        pyramid_channels=64,
        num_fpn_layers=3,
        num_head_repeats=3,
        num_classes=90,
        encoder_norm_layer="frozenabn",
        encoder_norm_act="swish",
        decoder_norm_layer="abn",
        decoder_norm_act="swish",
        match_tf_same_padding=False,
        anchors_per_location=9,
        **encoder_params,
    ):
        super().__init__()
        self.encoder = get_encoder(
            encoder_name,
            norm_layer=encoder_norm_layer,
            norm_act=encoder_norm_act,
            encoder_weights=encoder_weights,
            **encoder_params,
        )
        norm_layer = bn_from_name(decoder_norm_layer)
        bn_args = dict(norm_layer=norm_layer, norm_act=decoder_norm_act)
        self.pyramid6 = nn.Sequential(
            conv1x1(self.encoder.out_shapes[0], pyramid_channels, bias=True),
            norm_layer(pyramid_channels, activation="identity"),
            nn.MaxPool2d(3, stride=2, padding=1),
        )
        self.pyramid7 = nn.MaxPool2d(
            3, stride=2, padding=1)  # in EffDet it's a simple maxpool

        self.bifpn = BiFPN(
            encoder_channels=(pyramid_channels, ) * 2 +
            self.encoder.out_shapes[:-2],
            pyramid_channels=pyramid_channels,
            num_layers=num_fpn_layers,
            **bn_args,
        )

        def make_head(out_size):
            layers = []
            for _ in range(num_head_repeats):
                layers += [
                    DepthwiseSeparableConv(pyramid_channels,
                                           pyramid_channels,
                                           use_norm=False)
                ]
            return nn.ModuleList(layers)

        # The convolution layers in the head are shared among all levels, but
        # each level has its batch normalization to capture the statistical
        # difference among different levels.
        def make_head_norm():
            return nn.ModuleList([
                nn.ModuleList([
                    norm_layer(pyramid_channels, activation=decoder_norm_act)
                    for _ in range(num_head_repeats)
                ]) for _ in range(5)
            ])

        self.cls_convs = make_head(num_classes * anchors_per_location)
        self.cls_head_conv = DepthwiseSeparableConv(pyramid_channels,
                                                    num_classes *
                                                    anchors_per_location,
                                                    use_norm=False)
        self.cls_norms = make_head_norm()

        self.box_convs = make_head(4 * anchors_per_location)
        self.box_head_conv = DepthwiseSeparableConv(pyramid_channels,
                                                    4 * anchors_per_location,
                                                    use_norm=False)
        self.box_norms = make_head_norm()

        self.num_classes = num_classes
        self.num_head_repeats = num_head_repeats

        patch_bn_tf(self)
        self._initialize_weights()
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)
Пример #22
0
    def __init__(
            self,
            growth_rate=None,
            block_config=None,
            pretrained=None,  # not used. here for proper signature
            num_classes=1000,
            drop_rate=0.0,
            in_channels=3,
            norm_layer="abn",
            norm_act="relu",
            deep_stem=False,
            stem_width=64,
            encoder=False,
            global_pool="avg",
            memory_efficient=True,
            output_stride=32,  # not used! only here to allow using as encoder
    ):

        super(DenseNet, self).__init__()
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        if deep_stem:
            self.conv0 = nn.Sequential(
                conv3x3(in_channels, stem_width // 2, 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width // 2),
                norm_layer(stem_width // 2, activation=norm_act),
                conv3x3(stem_width // 2, stem_width, 2),
            )
        else:
            self.conv0 = nn.Conv2d(in_channels,
                                   stem_width,
                                   kernel_size=7,
                                   stride=2,
                                   padding=3,
                                   bias=False)

        self.norm0 = norm_layer(stem_width, activation=norm_act)
        self.pool0 = nn.MaxPool2d(kernel_size=3,
                                  stride=2,
                                  padding=1,
                                  ceil_mode=False)

        largs = dict(
            growth_rate=growth_rate,
            drop_rate=drop_rate,
            memory_efficient=memory_efficient,
            norm_layer=norm_layer,
            norm_act=norm_act,
        )
        in_planes = stem_width
        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers, in_planes, **largs)
            setattr(self, f"denseblock{i+1}", block)
            in_planes += num_layers * growth_rate
            if i != len(block_config) - 1:
                trans = _Transition(in_planes=in_planes,
                                    out_planes=in_planes // 2,
                                    norm_layer=norm_layer,
                                    norm_act=norm_act)
                setattr(self, f"transition{i+1}", trans)
                in_planes //= 2

        # Final normalization
        self.norm5 = nn.BatchNorm2d(in_planes)

        # Linear layer
        self.encoder = encoder
        if not encoder:
            self.global_pool = GlobalPool2d(global_pool)
            self.classifier = nn.Linear(in_planes, num_classes)
        else:
            assert len(block_config) == 4, "Need 4 blocks to use as encoder"
            self.forward = self.encoder_features
Пример #23
0
    def __init__(
            self,
            stage_fns=None,  # list of nn.Module
            block_fns=None,  # list of nn.Module
            stage_args=None,  # list of dicts
            layers=None,  # num layers in each block
            channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
            # pretrained=None,  # not used. here for proper signature
        num_classes=1000,
            in_channels=3,
            norm_layer="abn",
            norm_act="leaky_relu",
            head_norm_act="leaky_relu",  # activation in head
            stem_type="default",
            # antialias=False,
            # encoder=False,
            # drop_rate=0.0,
            drop_connect_rate=0.0,
            head_width=2048,
            stem_width=64,
            head_type="default",  # type of head
    ):
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        first_norm = nn.Identity() if block_fns[0].startswith(
            "Pre") else norm_layer(stem_width, activation=norm_act)
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                conv3x3(in_channels, stem_width, stride=2), first_norm)
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                first_norm,
            )
        else:
            raise ValueError(f"Stem type `{stem_type}` is not supported")

        bn_args = dict(norm_layer=norm_layer, norm_act=norm_act)
        block_name_to_module = {
            "XX": SimpleBasicBlock,
            "Pre_XX": SimplePreActBasicBlock,
            "Pre_XX_Res2": SimplePreActRes2BasicBlock,
            "Btl": SimpleBottleneck,
            "Pre_Btl": SimplePreActBottleneck,
            "IR": SimpleInvertedResidual,
            "Pre_IR": SimplePreActInvertedResidual,
            "Sep2": SimpleSeparable_2,
            "Pre_Sep2": SimplePreActSeparable_2,
            "Sep3": SimpleSeparable_3,
            "Pre_Custom_2": PreBlock_2,
        }
        stage_name_to_module = {"simpl": SimpleStage}
        # set stride=2 for all blocks
        # using **{**bn_args, **stage_args} to allow updating norm layer for particular stage
        self.layer1 = stage_name_to_module[stage_fns[0]](
            block_fn=block_name_to_module[block_fns[0]],
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            stride=2,
            **{
                **bn_args,
                **stage_args[0]
            },
        )
        self.layer2 = stage_name_to_module[stage_fns[1]](
            block_fn=block_name_to_module[block_fns[1]],
            in_chs=channels[0],
            out_chs=channels[1],
            num_blocks=layers[1],
            stride=2,
            **{
                **bn_args,
                **stage_args[1]
            },
        )
        self.layer3 = stage_name_to_module[stage_fns[2]](
            block_fn=block_name_to_module[block_fns[2]],
            in_chs=channels[1],
            out_chs=channels[2],
            num_blocks=layers[2],
            stride=2,
            **{
                **bn_args,
                **stage_args[2]
            },
        )
        extra_stage3_filters = stage_args[2].get("filter_steps",
                                                 0) * (layers[2] - 1)
        self.layer4 = stage_name_to_module[stage_fns[3]](
            block_fn=block_name_to_module[block_fns[3]],
            in_chs=channels[2] + extra_stage3_filters,
            out_chs=channels[3],
            num_blocks=layers[3],
            stride=2,
            **{
                **bn_args,
                **stage_args[3]
            },
        )
        extra_stage4_filters = stage_args[3].get("filter_steps",
                                                 0) * (layers[3] - 1)
        channels[
            3] += extra_stage4_filters  # TODO rewrite it cleaner instead of doing inplace
        last_norm = norm_layer(channels[3],
                               activation=norm_act) if block_fns[0].startswith(
                                   "Pre") else nn.Identity()
        if head_type == "mobilenetv3":
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mobilenetv3_norm":  # mobilenet with last norm
            self.head = nn.Sequential(  # Mbln v3 head. GAP first, then expand convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width),
                nn.BatchNorm1d(head_width),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width),
                norm_layer(head_width, activation=head_norm_act),
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "default_nonorm":  # if used in angular losses don't want norm
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], head_width,
                        bias=True),  # need bias because not followed by norm
                FastGlobalAvgPool2d(flatten=True),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc_bn":
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
                nn.BatchNorm1d(head_width, affine=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_bn_fc":  # same as above but without last BN
            self.head = nn.Sequential(
                last_norm,
                conv1x1(channels[3], channels[3]),
                FastGlobalAvgPool2d(flatten=True),
                nn.BatchNorm1d(channels[3]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(channels[3], head_width, bias=False),
            )
            self.last_linear = nn.Linear(head_width, num_classes)
        elif head_type == "mlp_2":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[1], num_classes)
        elif head_type == "mlp_3":
            assert isinstance(head_width, (tuple, list)), head_width
            self.head = nn.Sequential(  # like Mbln v3 head. GAP first, then MLP convs
                last_norm,
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(channels[3], head_width[0]),
                nn.BatchNorm1d(head_width[0]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[0], head_width[1]),
                nn.BatchNorm1d(head_width[1]),
                pt.modules.activations.activation_from_name(head_norm_act),
                nn.Linear(head_width[1], head_width[2]),
                nn.BatchNorm1d(head_width[2]),
                pt.modules.activations.activation_from_name(head_norm_act),
            )
            self.last_linear = nn.Linear(head_width[2], num_classes)
        else:
            raise ValueError(f"Head type: {head_type} is not supported!")
        initialize(self)
Пример #24
0
    def __init__(
        self,
        blocks_args=None,
        width_multiplier=None,
        depth_multiplier=None,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        output_stride=32,
        encoder=False,
        drop_rate=0,
        drop_connect_rate=0,
        stem_size=32,
        norm_layer="abn",
        norm_act="swish",
        match_tf_same_padding=False,
    ):
        super().__init__()
        norm_layer = bn_from_name(norm_layer)
        self.norm_layer = norm_layer
        self.norm_act = norm_act
        self.width_multiplier = width_multiplier
        self.depth_multiplier = depth_multiplier
        stem_size = make_divisible(stem_size * width_multiplier)
        self.conv_stem = conv3x3(in_channels, stem_size, stride=2)
        self.bn1 = norm_layer(stem_size, activation=norm_act)
        in_channels = stem_size
        self.blocks = nn.ModuleList([])
        # modify block args to account for output_stride strategy
        blocks_args = _patch_block_args(blocks_args, output_stride)
        for block_idx, block_arg in enumerate(blocks_args):
            block = []
            block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier)
            block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier)
            block_arg["norm_layer"] = norm_layer
            block_arg["norm_act"] = norm_act
            # linearly scale keep prob
            block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args)
            repeats = block_arg.pop("num_repeat")
            repeats = int(math.ceil(repeats * self.depth_multiplier))
            # when dilating conv with stride 2 we want it to have dilation // 2
            # it prevents checkerboard artifacts with OS=16 and OS=8
            dilation = block_arg.get("dilation", 1)  # save block values
            if block_arg.pop("no_first_dilation", False):
                block_arg["dilation"] = max(1, block_arg["dilation"] // 2)
            block.append(InvertedResidual(**block_arg))
            # only first layer in block is strided
            block_arg["stride"] = 1
            block_arg["dilation"] = dilation
            block_arg["in_channels"] = block_arg["out_channels"]
            for _ in range(repeats - 1):
                block.append(InvertedResidual(**block_arg))

            self.blocks.append(nn.Sequential(*block))

        # Head

        if encoder:
            self.forward = self.encoder_features
        else:
            out_channels = block_arg["out_channels"]
            num_features = make_divisible(1280 * width_multiplier)
            self.conv_head = conv1x1(out_channels, num_features)
            self.bn2 = norm_layer(num_features, activation=norm_act)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.dropout = nn.Dropout(drop_rate, inplace=True)
            self.classifier = nn.Linear(num_features, num_classes)

        patch_bn(self)  # adjust epsilon
        initialize(self)
        if match_tf_same_padding:
            conv_to_same_conv(self)
            maxpool_to_same_maxpool(self)
Пример #25
0
    def __init__(
        self,
        stage_fn=None,
        block_fn=None,
        layers=None,  # num layers in each block
        channels=None,  # it's actually output channels. 256, 512, 1024, 2048 for R50
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        attn_type=None,
        # base_width=64,
        stem_type="default",
        norm_layer="abn",
        norm_act="leaky_relu",
        antialias=False,
        # encoder=False,
        bottle_ratio=0.25,  # how much to shrink channels in bottleneck layer
        no_first_csp=False,  # make first stage a Simple Stage
        drop_rate=0.0,
        drop_connect_rate=0.0,
        expand_before_head=True,  # add addition conv from 512 -> 2048 to avoid representational bottleneck
        mobilenetv3_head=False,  # put GAP first, then expand convs
        **block_kwargs,
    ):

        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.num_classes = num_classes
        self.norm_act = norm_act
        self.block_idx = 0  # for drop connect
        self.drop_connect_rate = drop_connect_rate
        super().__init__()

        if block_fn != SimplePreActBottleneck:
            stem_norm = norm_layer(stem_width, activation=norm_act)
        else:
            stem_norm = nn.Identity()
        if stem_type == "default":
            self.stem_conv1 = nn.Sequential(
                nn.Conv2d(3,
                          stem_width,
                          kernel_size=7,
                          stride=2,
                          padding=3,
                          bias=False),
                stem_norm,
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 1
        elif stem_type == "s2d":
            # instead of default stem I'm using Space2Depth followed by conv. no norm because there is one at the beginning
            # of DarkStage. upd. there is norm in not PreAct version
            self.stem_conv1 = nn.Sequential(
                SpaceToDepth(block_size=2),
                conv3x3(in_channels * 4, stem_width),
                stem_norm,
                # nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            )
            first_stride = 2

        # blocks
        largs = dict(
            stride=2,
            bottle_ratio=bottle_ratio,
            block_fn=block_fn,
            attn_type=attn_type,
            norm_layer=norm_layer,
            norm_act=norm_act,
            # antialias=antialias,
            **block_kwargs,
        )
        first_stage_fn = SimpleStage if no_first_csp else stage_fn
        # fmt: off
        self.layer1 = first_stage_fn(
            in_chs=stem_width,
            out_chs=channels[0],
            num_blocks=layers[0],
            keep_prob=self.keep_prob,
            **{
                **largs, "stride": first_stride
            },  # overwrite default stride
        )
        # **{**largs, "antialias": False} # antialias in first stage is too expensive
        self.layer2 = stage_fn(in_chs=channels[0],
                               out_chs=channels[1],
                               num_blocks=layers[1],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer3 = stage_fn(in_chs=channels[1],
                               out_chs=channels[2],
                               num_blocks=layers[2],
                               keep_prob=self.keep_prob,
                               **largs)
        self.layer4 = stage_fn(in_chs=channels[2],
                               out_chs=channels[3],
                               num_blocks=layers[3],
                               keep_prob=self.keep_prob,
                               **largs)
        # fmt: on

        # self.global_pool = FastGlobalAvgPool2d(flatten=True)
        # self.dropout = nn.Dropout(p=drop_rate, inplace=True)
        head_layers = []
        # this is a very dirty if but i don't care for now
        if mobilenetv3_head:
            head_layers.append(FastGlobalAvgPool2d(flatten=True))
            if channels[3] < 2048 and expand_before_head:
                head_layers.append(
                    nn.Linear(channels[3], 2048)
                )  # no norm here as in original MobilnetV3 from google
                head_layers.append(
                    pt.modules.activations.activation_from_name(norm_act))
            head_layers.append(
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes))
        else:
            if channels[3] < 2048 and expand_before_head:
                if block_fn == SimplePreActBottleneck:  # for PreAct add additional BN here
                    head_layers.append(
                        norm_layer(channels[3], activation=norm_act))
                head_layers.extend([
                    conv1x1(channels[3], 2048),
                    norm_layer(2048, activation=norm_act)
                ])
            head_layers.extend([
                FastGlobalAvgPool2d(flatten=True),
                nn.Linear(2048 if expand_before_head else channels[3],
                          num_classes)
            ])
        # self.head = nn.Sequential(
        #     conv1x1(channels[3], 2048),
        #     norm_layer(activation=norm_act),
        #     # norm_layer(1024, activation=norm_act),
        #     FastGlobalAvgPool2d(flatten=True),
        #     nn.Linear(2048, num_classes),
        # )
        self.head = nn.Sequential(*head_layers)
        initialize(self)
Пример #26
0
    def __init__(
        self,
        width=18,
        small=False,
        pretrained=None,  # not used. here for proper signature
        num_classes=1000,
        in_channels=3,
        norm_layer="abn",
        norm_act="relu",
        encoder=False,
    ):
        super(HighResolutionNet, self).__init__()
        stem_width = 64
        norm_layer = bn_from_name(norm_layer)
        self.bn_args = bn_args = {
            "norm_layer": norm_layer,
            "norm_act": norm_act
        }
        self.conv1 = conv3x3(in_channels, stem_width, stride=2)
        self.bn1 = norm_layer(stem_width, activation=norm_act)

        self.conv2 = conv3x3(stem_width, stem_width, stride=2)
        self.bn2 = norm_layer(stem_width, activation=norm_act)

        channels = [width, width * 2, width * 4, width * 8]
        n_blocks = [2 if small else 4] * 4

        self.layer1 = make_layer(stem_width, stem_width, n_blocks[0],
                                 **bn_args)

        self.transition1 = TransitionBlock([stem_width * Bottleneck.expansion],
                                           channels[:2], **bn_args)
        self.stage2 = self._make_stage(n_modules=1,
                                       n_branches=2,
                                       n_blocks=n_blocks[:2],
                                       n_chnls=channels[:2])

        self.transition2 = TransitionBlock(channels[:2], channels[:3],
                                           **bn_args)
        self.stage3 = self._make_stage(  # 3 if small else 4
            n_modules=(4, 3)[small],
            n_branches=3,
            n_blocks=n_blocks[:3],
            n_chnls=channels[:3])

        self.transition3 = TransitionBlock(channels[:3], channels, **bn_args)
        self.stage4 = self._make_stage(  # 2 if small else 3
            n_modules=(3, 2)[small],
            n_branches=4,
            n_blocks=n_blocks,
            n_chnls=channels,
        )

        self.encoder = encoder
        if encoder:
            self.forward = self.encoder_features
        else:
            # Classification Head
            self.cls_head = HRClassificationHead(channels, **bn_args)
            self.global_pool = nn.AdaptiveAvgPool2d(1)
            self.last_linear = nn.Linear(2048, num_classes)
        # initialize weights
        initialize(self)
Пример #27
0
def test_no_norm():
    """check that can init without params"""
    modules.bn_from_name("none")(10)
    modules.bn_from_name("none")()