def __init__(self, cfg, input_shape): super().__init__(cfg, input_shape) assert len(self.in_features) == 1 # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / self.feature_strides[self.in_features[0]], ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO self.resnet_version = cfg.MODEL.BUA.RESNET_VERSION self.attr_on = cfg.MODEL.BUA.ATTRIBUTE_ON self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS self.num_attr_classes = cfg.MODEL.BUA.ATTRIBUTE.NUM_CLASSES self.extractor_mode = cfg.MODEL.BUA.EXTRACTOR.MODE self.pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box2box_transform = BUABox2BoxTransform( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) self.res5, out_channels = self._build_res5_block(cfg) if self.resnet_version == 2: self.res5_bn = BatchNorm2d(out_channels, eps=2e-5) self.box_predictor = BUACaffeFastRCNNOutputLayers( out_channels, self.num_classes, self.cls_agnostic_bbox_reg, attr_on=self.attr_on, num_attr_classes=self.num_attr_classes)
def __init__(self, in_channels=3, out_channels=64, norm="BN"): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.norm = BatchNorm2d(in_channels, eps=2e-5) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=BatchNorm2d(out_channels, eps=2e-5), ) # weight_init.c2_msra_fill(self.norm) weight_init.c2_msra_fill(self.conv1)
def shrink_layer(layer, Fi, Fo): in_shape = list(layer.in_shape) in_shape[1] = Fi layer.in_shape = torch.Size(in_shape) out_shape = list(layer.out_shape) out_shape[1] = Fo layer.out_shape = torch.Size(out_shape) b = layer.base if layer.base_type == 'Conv2d': groups = b.groups if (groups == b.in_channels and b.in_channels == b.out_channels and Fi == Fo): groups = Fi norm = None if b.norm is not None: norm = BatchNorm2d(Fo) norm.weight = nn.Parameter(b.norm.weight[:Fo].clone().detach()) norm.bias = nn.Parameter(b.norm.bias[:Fo].clone().detach()) norm.running_mean = b.norm.running_mean[:Fo].clone().detach() norm.running_var = b.norm.running_var[:Fo].clone().detach() conv = Conv2d(Fi, Fo, b.kernel_size, stride=b.stride, padding=b.padding, dilation=b.dilation, groups=groups, bias=(b.bias is not None), norm=norm, activation=b.activation) conv.weight = nn.Parameter(b.weight[:Fo, :(Fi // groups)].clone().detach()) if b.bias is not None: conv.bias = nn.Parameter(b.bias[:Fo].clone().detach()) replace_layer(layer, conv) elif layer.base_type == 'Linear': ln = nn.Linear(Fi, Fo, bias=(b.bias is not None)) ln.weight = nn.Parameter(b.weight[:Fo, :Fi].clone().detach()) if b.bias is not None: ln.bias = nn.Parameter(b.bias[:Fo].clone().detach()) replace_layer(layer, ln) else: init_layer_rep(layer)
def __init__(self, cfg, input_shape): # super().__init__(cfg, input_shape) super().__init__(cfg) self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES self.feature_strides = {k: v.stride for k, v in input_shape.items()} self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA assert len(self.in_features) == 1 # fmt: off pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / self.feature_strides[self.in_features[0]], ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO self.resnet_version = cfg.MODEL.BUA.RESNET_VERSION self.attr_on = cfg.MODEL.BUA.ATTRIBUTE_ON self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS self.num_attr_classes = cfg.MODEL.BUA.ATTRIBUTE.NUM_CLASSES self.extractor_mode = cfg.MODEL.BUA.EXTRACTOR.MODE self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST self.test_detections_per_img = cfg.TEST.DETECTIONS_PER_IMAGE self.pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.box2box_transform = BUABox2BoxTransform( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) self.res5, out_channels = self._build_res5_block(cfg) if self.resnet_version == 2: self.res5_bn = BatchNorm2d(out_channels, eps=2e-5) self.box_predictor = BUACaffeFastRCNNOutputLayers( out_channels, self.num_classes, self.cls_agnostic_bbox_reg, attr_on=self.attr_on, num_attr_classes=self.num_attr_classes)
def replace_resnet(model, arch): model = copy.deepcopy(model) model.backbone.bottom_up = arch in_channels = [arch.V[i].out_shape[1] for i in arch.out_layers] in_channels = in_channels[::-1] for i in range(len(in_channels)): b = model.backbone.lateral_convs[i] Fi = in_channels[i] Fo = b.out_channels # Copied from shrink_layer groups = b.groups if (groups == b.in_channels and b.in_channels == b.out_channels and Fi == Fo): groups = Fi norm = None if b.norm is not None: norm = BatchNorm2d(Fo) norm.weight = nn.Parameter(b.norm.weight[:Fo].clone().detach()) norm.bias = nn.Parameter(b.norm.bias[:Fo].clone().detach()) norm.running_mean = b.norm.running_mean[:Fo].clone().detach() norm.running_var = b.norm.running_var[:Fo].clone().detach() conv = Conv2d(Fi, Fo, b.kernel_size, stride=b.stride, padding=b.padding, dilation=b.dilation, groups=groups, bias=(b.bias is not None), norm=norm, activation=b.activation) conv.weight = nn.Parameter(b.weight[:Fo, :(Fi // groups)].clone().detach()) if b.bias is not None: conv.bias = nn.Parameter(b.bias[:Fo].clone().detach()) model.backbone.lateral_convs[i] = conv return model
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2dv2( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=None, ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2dv2( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=None, ) self.conv2 = Conv2dv2( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=BatchNorm2d(bottleneck_channels, eps=2e-5), activation=F.relu_, ) self.conv3 = Conv2dv2( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=BatchNorm2d(bottleneck_channels, eps=2e-5), activation=F.relu_, ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) self.norm = BatchNorm2d(in_channels, eps=2e-5)