def build_resnet_backbone(cfg): arch = cfg.MODEL.BACKBONE.ARCH in_planes = cfg.MODEL.BACKBONE.IN_PLANES base_planes = cfg.MODEL.BACKBONE.BASE_PLANES layer_planes = cfg.MODEL.BACKBONE.LAYER_PLANES down_samples = cfg.MODEL.BACKBONE.DOWNSAMPLES conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) zero_init_residual = cfg.MODEL.RECOGNIZER.ZERO_INIT_RESIDUAL # for attention with_attentions = cfg.MODEL.ATTENTION.WITH_ATTENTIONS reduction = cfg.MODEL.ATTENTION.REDUCTION attention_type = cfg.MODEL.ATTENTION.ATTENTION_TYPE block_layer, layer_blocks, groups, width_per_group = arch_settings[arch] return ResNetBackbone( in_channels=in_planes, base_channels=base_planes, layer_channels=layer_planes, layer_blocks=layer_blocks, down_samples=down_samples, groups=groups, width_per_group=width_per_group, with_attentions=with_attentions, reduction=reduction, attention_type=attention_type, block_layer=block_layer, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer, zero_init_residual=zero_init_residual, )
def build_mnasnet(cfg): arch = cfg.MODEL.BACKBONE.ARCH in_planes = cfg.MODEL.BACKBONE.IN_PLANES norm_layer = get_norm(cfg) # compression width_multiplier = cfg.MODEL.COMPRESSION.WIDTH_MULTIPLIER round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST # attention with_attention = cfg.MODEL.ATTENTION.WITH_ATTENTION reduction = cfg.MODEL.ATTENTION.REDUCTION attention_type = cfg.MODEL.ATTENTION.ATTENTION_TYPE # conv conv_layer = get_conv(cfg) # act act_layer = get_act(cfg) out_planes = 1280 stage_setting = arch_settings[arch] return MNASNetBackbone(in_channels=in_planes, out_channels=out_planes, stage_setting=stage_setting, width_multiplier=width_multiplier, round_nearest=round_nearest, with_attention=with_attention, reduction=reduction, attention_type=attention_type, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer)
def build_sfv2_backbone(cfg): arch = cfg.MODEL.BACKBONE.ARCH in_channels = cfg.MODEL.BACKBONE.IN_PLANES base_channels = cfg.MODEL.BACKBONE.BASE_PLANES round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST block_layer, stage_channels, stage_blocks, out_channels = arch_settings[ arch] base_channels = make_divisible(base_channels, round_nearest) for i in range(len(stage_channels)): stage_channels[i] = make_divisible(stage_channels[i], round_nearest) out_channels = make_divisible(out_channels, round_nearest) down_samples = cfg.MODEL.BACKBONE.DOWNSAMPLES conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) return ShuffleNetV2Backbone(in_channels=in_channels, base_channels=base_channels, out_channels=out_channels, stage_channels=stage_channels, stage_blocks=stage_blocks, downsamples=down_samples, block_layer=block_layer, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer)
def build_sfv1_backbone(cfg): arch = cfg.MODEL.BACKBONE.ARCH round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST in_channels = cfg.MODEL.BACKBONE.IN_PLANES base_channels = cfg.MODEL.BACKBONE.BASE_PLANES downsamples = cfg.MODEL.BACKBONE.DOWNSAMPLES with_groups = cfg.MODEL.BACKBONE.WITH_GROUPS conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) block_layer, groups, stage_channels, layer_blocks, width_multiplier = arch_settings[ arch] base_channels = make_divisible(base_channels * width_multiplier, round_nearest) for i in range(len(stage_channels)): stage_channels[i] = make_divisible( stage_channels[i] * width_multiplier, round_nearest) return ShuffleNetV1Backbone( in_channels=in_channels, base_channels=base_channels, groups=groups, stage_channels=stage_channels, stage_blocks=layer_blocks, downsamples=downsamples, with_groups=with_groups, block_layer=block_layer, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer, )
def build_mbv3_backbone(cfg): arch = cfg.MODEL.BACKBONE.ARCH in_channels = cfg.MODEL.BACKBONE.IN_PLANES norm_layer = get_norm(cfg) # compression width_multiplier = cfg.MODEL.COMPRESSION.WIDTH_MULTIPLIER round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST # attention with_attention = cfg.MODEL.ATTENTION.WITH_ATTENTION reduction = cfg.MODEL.ATTENTION.REDUCTION attention_type = cfg.MODEL.ATTENTION.ATTENTION_TYPE # conv conv_layer = get_conv(cfg) # act act_layer = get_act(cfg) sigmoid_type = cfg.MODEL.ACT.SIGMOID_TYPE base_channels, feature_dims, inner_dims, layer_setting = arch_settings[ arch] return MobileNetV3Backbone(in_channels=in_channels, base_channels=base_channels, out_channels=feature_dims, width_multiplier=width_multiplier, round_nearest=round_nearest, with_attention=with_attention, reduction=reduction, attention_type=attention_type, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer, sigmoid_type=sigmoid_type)
def build_resnet3d_backbone(cfg): arch = cfg.MODEL.BACKBONE.ARCH torchvision_pretrained = cfg.MODEL.RECOGNIZER.TORCHVISION_PRETRAINED conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) zero_init_residual = cfg.MODEL.RECOGNIZER.ZERO_INIT_RESIDUAL # for backbone in_planes = cfg.MODEL.BACKBONE.IN_PLANES base_planes = cfg.MODEL.BACKBONE.BASE_PLANES conv1_kernel = cfg.MODEL.BACKBONE.CONV1_KERNEL conv1_stride = cfg.MODEL.BACKBONE.CONV1_STRIDE conv1_padding = cfg.MODEL.BACKBONE.CONV1_PADDING pool1_kernel = cfg.MODEL.BACKBONE.POOL1_KERNEL pool1_stride = cfg.MODEL.BACKBONE.POOL1_STRIDE pool1_padding = cfg.MODEL.BACKBONE.POOL1_PADDING with_pool2 = cfg.MODEL.BACKBONE.WITH_POOL2 layer_planes = cfg.MODEL.BACKBONE.LAYER_PLANES down_samples = cfg.MODEL.BACKBONE.DOWNSAMPLES temporal_strides = cfg.MODEL.BACKBONE.TEMPORAL_STRIDES inflate_list = cfg.MODEL.BACKBONE.INFLATE_LIST inflate_style = cfg.MODEL.BACKBONE.INFLATE_STYLE block_layer, layer_blocks, groups, width_per_group = arch_settings[arch] state_dict_2d = load_state_dict_from_url(model_urls[arch], progress=True) \ if torchvision_pretrained else None return ResNet3DBackbone(in_planes=in_planes, base_planes=base_planes, conv1_kernel=conv1_kernel, conv1_stride=conv1_stride, conv1_padding=conv1_padding, pool1_kernel=pool1_kernel, pool1_stride=pool1_stride, pool1_padding=pool1_padding, with_pool2=with_pool2, layer_planes=layer_planes, layer_blocks=layer_blocks, down_samples=down_samples, temporal_strides=temporal_strides, inflate_list=inflate_list, inflate_style=inflate_style, groups=groups, width_per_group=width_per_group, block_layer=block_layer, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer, zero_init_residual=zero_init_residual, state_dict_2d=state_dict_2d)
def test_resnet_gn(): cfg.MODEL.NORM.TYPE = 'GroupNorm' norm_layer = get_norm(cfg) print(norm_layer) # for custom model = ResNet(arch="resnet50", num_classes=1000, norm_layer=norm_layer) print(model) test_data(model, (1, 3, 224, 224), (1, 1000)) # resnetxt_32x4d model = ResNet(arch="resnext50_32x4d", num_classes=1000, norm_layer=norm_layer) print(model) test_data(model, (3, 3, 224, 224), (3, 1000))
def build_torchvision_mbv2(cfg): torchvision_pretrained = cfg.MODEL.RECOGNIZER.TORCHVISION_PRETRAINED pretrained_num_classes = cfg.MODEL.RECOGNIZER.PRETRAINED_NUM_CLASSES fix_bn = cfg.MODEL.NORM.FIX_BN partial_bn = cfg.MODEL.NORM.PARTIAL_BN num_classes = cfg.MODEL.HEAD.NUM_CLASSES norm_layer = get_norm(cfg) width_multiplier = cfg.MODEL.COMPRESSION.WIDTH_MULTIPLIER return TorchvisionMobileNetV2( num_classes=num_classes, torchvision_pretrained=torchvision_pretrained, pretrained_num_classes=pretrained_num_classes, width_multiplier=width_multiplier, fix_bn=fix_bn, partial_bn=partial_bn, norm_layer=norm_layer)
def build_mbv2_backbone(cfg): in_channels = cfg.MODEL.BACKBONE.IN_PLANES base_channels = cfg.MODEL.BACKBONE.BASE_PLANES out_channels = cfg.MODEL.HEAD.FEATURE_DIMS round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST width_multiplier = cfg.MODEL.COMPRESSION.WIDTH_MULTIPLIER conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] base_channels = make_divisible(base_channels * width_multiplier, round_nearest) for i in range(len(inverted_residual_setting)): channel = inverted_residual_setting[i][1] inverted_residual_setting[i][1] = make_divisible( channel * width_multiplier, round_nearest) out_channels = make_divisible(out_channels * width_multiplier, round_nearest) return MobileNetV2Backbone( in_channels=in_channels, out_channels=out_channels, base_channels=base_channels, inverted_residual_setting=inverted_residual_setting, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer)
def build_mbv1_backbone(cfg): in_planes = cfg.MODEL.BACKBONE.IN_PLANES base_planes = cfg.MODEL.BACKBONE.BASE_PLANES layer_planes = cfg.MODEL.BACKBONE.LAYER_PLANES strides = cfg.MODEL.BACKBONE.STRIDES conv_layer = get_conv(cfg) norm_layer = get_norm(cfg) act_layer = get_act(cfg) width_multiplier = cfg.MODEL.COMPRESSION.WIDTH_MULTIPLIER round_nearest = cfg.MODEL.COMPRESSION.ROUND_NEAREST base_planes = make_divisible(base_planes * width_multiplier, round_nearest) layer_planes = [ make_divisible(layer_plane * width_multiplier, round_nearest) for layer_plane in layer_planes ] return MobileNetV1Backbone(in_channels=in_planes, base_channels=base_planes, layer_channels=layer_planes, strides=strides, conv_layer=conv_layer, norm_layer=norm_layer, act_layer=act_layer)