def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.dual_modal = cfg.MODEL.ROI_MASK_HEAD.DUAL_MODAL self.use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN if self.dual_modal: self.conv5_mask = Conv2d(num_inputs, dim_reduced * 4, 3, padding=1) self.pixel_shuffle = nn.PixelShuffle(2) else: self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_final = Conv2d(dim_reduced, num_classes, 1, 1, 0) self.cfg = cfg for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, inplanes, planes, stride=1): super(Bottleneck, self).__init__() self.inplanes = inplanes self.planes = planes self.conv1 = Conv2d(inplanes, planes, kernel_size=1, bias=False) self.bn1 = FrozenBatchNorm2d(planes) self.conv2 = Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) self.bn2 = FrozenBatchNorm2d(planes) self.conv3 = Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.bn3 = FrozenBatchNorm2d(planes * self.expansion) self.relu = nn.ReLU(inplace=True) if self.inplanes != self.planes * self.expansion: self.downsample = nn.Sequential( Conv2d(self.inplanes, self.planes * self.expansion, kernel_size=1, stride=stride, bias=False), FrozenBatchNorm2d(self.planes * self.expansion), )
def __init__(self, cfg): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) if cfg.MODEL.ROI_MASK_HEAD.OVERLAP: self.conv5_overlap = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.overlap_fcn_logits = Conv2d(dim_reduced, 1, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(MaskIoUFeatureExtractor, self).__init__() input_channels = 257 self.maskiou_fcn1 = Conv2d(input_channels, 256, 3, 1, 1) self.maskiou_fcn2 = Conv2d(256, 256, 3, 1, 1) self.maskiou_fcn3 = Conv2d(256, 256, 3, 1, 1) self.maskiou_fcn4 = Conv2d(256, 256, 3, 2, 1) self.maskiou_fc1 = nn.Linear(256 * 7 * 7, 1024) self.maskiou_fc2 = nn.Linear(1024, 1024) self.acf = cfg.MODEL.ACTIVATION_FUNCTION for l in [ self.maskiou_fcn1, self.maskiou_fcn2, self.maskiou_fcn3, self.maskiou_fcn4 ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0) for l in [self.maskiou_fc1, self.maskiou_fc2]: nn.init.kaiming_uniform_(l.weight, a=1) nn.init.constant_(l.bias, 0)
def __init__(self, cfg): super(RoiAlignMaskFeatureExtractor, self).__init__() input_channels = 257 self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1) self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1) self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1: self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0) self.mask_fcn_logits = Conv2d(256, 1, 3, 1, 1) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.conv5_mask, self.mask_fcn_logits ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0) else: self.mask_fcn_logits = None self.conv5_mask = Conv2d(256, 16, 3, 1, 1) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.conv5_mask, ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0)
def __init__(self, inp, oup, stride, expand_ratio): super(InvertedResidual, self).__init__() self.stride = stride assert stride in [1, 2] hidden_dim = round(inp * expand_ratio) self.use_res_connect = self.stride == 1 and inp == oup if expand_ratio == 1: self.conv = nn.Sequential( # dw Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # pw-linear Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), BatchNorm2d(oup), ) else: self.conv = nn.Sequential( # pw Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # dw Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), # pw-linear Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), BatchNorm2d(oup), )
def __init__(self, cfg): super(SeqMaskRCNNC4Predictor, self).__init__() num_classes = 1 # char_num_classes = cfg.MODEL.ROI_MASK_HEAD.CHAR_NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: if cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'CAT': num_inputs = dim_reduced + 1 elif cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'MIX' or cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'ATTENTION_CHANNEL': num_inputs = dim_reduced * 2 else: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) if cfg.SEQUENCE.SEQ_ON: self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) self.seq = make_roi_seq_predictor(cfg, dim_reduced) else: self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, inplanes, planes, stride=1, dilation=1, batch_norm=FrozenBatchNorm2d, **_): super(DlaBasic, self).__init__() self.conv1 = Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=dilation, bias=False, dilation=dilation) self.bn1 = batch_norm(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = Conv2d(planes, planes, kernel_size=3, stride=1, padding=dilation, bias=False, dilation=dilation) self.bn2 = batch_norm(planes) self.stride = stride
def __init__(self, C): super(SEModule, self).__init__() mid = max(C // self.reduction, 8) conv1 = Conv2d(C, mid, 1, 1, 0) conv2 = Conv2d(mid, C, 1, 1, 0) self.op = nn.Sequential(nn.AdaptiveAvgPool2d(1), conv1, nn.ReLU(inplace=True), conv2, nn.Sigmoid())
def __init__( self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func ): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d( in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False ), norm_func(out_channels), ) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation ) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False ) self.bn3 = norm_func(out_channels)
def __init__(self, kernel_size, inchannel, outchannel, stride=1): super(dwconv, self).__init__() self.depthwise = Conv2d(inchannel, inchannel, kernel_size=kernel_size, stride=stride, padding=(kernel_size - 1) // 2, groups=inchannel) self.pointwise = Conv2d(inchannel, outchannel, kernel_size=1, stride=1) self.reset_parameters()
def _make_fuse_layers(self): if self.num_branches == 1: return None num_branches = self.num_branches num_inchannels = self.num_inchannels fuse_layers = [] for i in range(num_branches if self.multi_scale_output else 1): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), FrozenBatchNorm2d(num_inchannels[i]), nn.Upsample(scale_factor=2**(j - i), mode='nearest'))) elif j == i: fuse_layer.append(None) else: conv3x3s = [] for k in range(i - j): if k == i - j - 1: num_outchannels_conv3x3 = num_inchannels[i] conv3x3s.append( nn.Sequential( Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), FrozenBatchNorm2d( num_outchannels_conv3x3))) else: num_outchannels_conv3x3 = num_inchannels[j] conv3x3s.append( nn.Sequential( Conv2d(num_inchannels[j], num_outchannels_conv3x3, 3, 2, 1, bias=False), FrozenBatchNorm2d(num_outchannels_conv3x3), nn.ReLU(True))) fuse_layer.append(nn.Sequential(*conv3x3s)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers)
def __init__(self, in_channels, bottleneck_channels, out_channels, num_groups=1, stride_in_1x1=True, stride=1, activation_function=F.relu_): super(BottleneckWithFixedBatchNorm, self).__init__() self.downsample = None if in_channels != out_channels: self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), FrozenBatchNorm2d(out_channels), ) # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = FrozenBatchNorm2d(bottleneck_channels) # TODO: specify init for the above self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1, bias=False, groups=num_groups, ) self.bn2 = FrozenBatchNorm2d(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = FrozenBatchNorm2d(out_channels) self.acf = activation_function
def __init__(self, C_in, C_out, stride): assert stride in [1, 2] ops = [ Conv2d(C_in, C_in, 3, stride, 1, bias=False), BatchNorm2d(C_in), nn.ReLU(inplace=True), Conv2d(C_in, C_out, 3, 1, 1, bias=False), BatchNorm2d(C_out), ] super(CascadeConv3x3, self).__init__(*ops) self.res_connect = (stride == 1) and (C_in == C_out)
def __init__(self, cfg): super(PRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO self.conv1 = Conv2d(3, 32, 3, stride=1, padding=1) self.conv2 = Conv2d(32, 32, 3, stride=1, padding=1) self.conv3 = Conv2d(32, 64, 3, stride=1, padding=1) self.conv4 = Conv2d(64, 64, 3, stride=1, padding=1) self.conv5 = Conv2d(64, 128, 3, stride=1, padding=1) self.conv6 = Conv2d(128, 128, 3, stride=1, padding=1) self.conv7 = Conv2d(128, 256, 3, stride=1, padding=1) self.conv8 = Conv2d(256, 256, 3, stride=1, padding=1) # pdb.set_trace() self.pooler1 = Pooler( output_size=(25, 25), scales=(1., ), sampling_ratio=sampling_ratio, ) self.p1 = nn.MaxPool2d(3, 2, 1) self.pooler2 = Pooler( output_size=(25, 25), scales=(0.5, ), sampling_ratio=sampling_ratio, ) self.p2 = nn.MaxPool2d(3, 2, 1) self.pooler3 = Pooler( output_size=(25, 25), scales=(0.25, ), sampling_ratio=sampling_ratio, ) self.p3 = nn.MaxPool2d(3, 2, 1) self.pooler4 = Pooler( output_size=(25, 25), scales=(0.125, ), sampling_ratio=sampling_ratio, ) self.posconv1 = Conv2d(480, 256, 3, stride=1, padding=1) self.posconv2 = Conv2d(256, 32, 3, stride=1, padding=1) for layer in [ self.conv1, self.conv2, self.conv3, self.conv4, self.conv5, self.conv6, self.conv7, self.conv8, self.posconv1, self.posconv2 ]: nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(layer.bias, 0)
def __init__(self, inplanes, outplanes, stride=1, dilation=1, scale=4, cardinality=8, base_width=4, batch_norm=FrozenBatchNorm2d): super(DlaBottle2neck, self).__init__() self.is_first = stride > 1 self.scale = scale mid_planes = int( math.floor(outplanes * (base_width / 64)) * cardinality) mid_planes = mid_planes // self.expansion self.width = mid_planes self.conv1 = Conv2d(inplanes, mid_planes * scale, kernel_size=1, bias=False) self.bn1 = batch_norm(mid_planes * scale) num_scale_convs = max(1, scale - 1) convs = [] bns = [] for _ in range(num_scale_convs): convs.append( Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=cardinality, bias=False)) bns.append(batch_norm(mid_planes)) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) if self.is_first: self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1) self.conv3 = Conv2d(mid_planes * scale, outplanes, kernel_size=1, bias=False) self.bn3 = batch_norm(outplanes) self.relu = nn.ReLU(inplace=True)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(TextsnakeFPNFeatureExtractor, self).__init__() scales = cfg.MODEL.ROI_TEXTSNAKE_HEAD.POOLER_SCALES level_mapper = LevelMapper(scales=scales) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.level_mapper = level_mapper layers = cfg.MODEL.ROI_TEXTSNAKE_HEAD.CONV_LAYERS next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg): """ Arguments: cfg: YACS config node containing configuration settings """ super(VLineFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.VLINE_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.VLINE_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.VLINE_HEAD.POOLER_SAMPLING_RATIO input_size = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS layers = cfg.MODEL.VLINE_HEAD.CONV_LAYERS self.pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio) next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "vp_mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def _init_adaptor(self, s_channel, t_channel): adaptor = Conv2d(s_channel, t_channel, 1, 1, 1) nn.init.kaiming_normal_(adaptor.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(adaptor.bias, 0) return adaptor
def __init__(self, cfg, norm_func): super(RootStem, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS self.activation = Mish() if cfg.MODEL.RESNETS.USE_MISH else nn.ReLU() self.conv1 = Conv2d(3, out_channels, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = norm_func(out_channels) # self.conv2 = Conv2d( # out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False # ) # self.bn2 = norm_func(out_channels) # self.conv3 = Conv2d( # out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False # ) # self.bn3 = norm_func(out_channels) for l in [self.conv1]: #, self.conv2, self.conv3 nn.init.kaiming_uniform_(l.weight, a=1)
def __init__(self, in_channels): super(KeypointRCNNFeatureExtractor, self).__init__() # resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION # scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES # sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO # pooler = Pooler( # output_size=(resolution, resolution), # scales=scales, # sampling_ratio=sampling_ratio, # ) # self.pooler = pooler input_features = in_channels # layers = tuple(512 for _ in range(5)) layers = (256, 128, 64, 32, 16) next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "conv_refinenet{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") # nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, cfg, in_channels): super(BB8KeypointRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_features = in_channels layers = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.CONV_LAYERS next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "conv_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, cfg): input_channels = 256 self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1) self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1) self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1) self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0) self.mask_fcn_logits = Conv2d(256, 1, 1, 1, 0) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.mask_fcn4, self.conv5_mask, self.mask_fcn_logits ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0)
def __init__(self, cfg): super(StemWithFixedBatchNorm, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS self.conv1 = Conv2d(3, out_channels, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = FrozenBatchNorm2d(out_channels)
def make_conv3x3_with_norm_func(in_channels, out_channels, dilation=1, stride=1, norm_func=None, use_relu=False, kaiming_init=True, Conv2d=Conv2d): conv = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False, ) if kaiming_init: nn.init.kaiming_normal_(conv.weight, mode="fan_out", nonlinearity="relu") else: torch.nn.init.normal_(conv.weight, std=0.01) if norm_func is None: nn.init.constant_(conv.bias, 0) module = [ conv, ] if norm_func is not None: module.append(norm_func(out_channels)) if use_relu: module.append(nn.ReLU()) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def make_conv( in_channels, out_channels, kernel_size, stride=1, dilation=1 ): conv = Conv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation * (kernel_size - 1) // 2, dilation=dilation, bias=False if use_gn else True ) # Caffe2 implementation uses XavierFill, which in fact # corresponds to kaiming_uniform_ in PyTorch nn.init.kaiming_uniform_(conv.weight, a=1) if not use_gn: nn.init.constant_(conv.bias, 0) module = [conv,] if use_gn: module.append(group_norm(out_channels)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def make_conv3x3( in_channels, out_channels, dilation=1, stride=1, use_gn=False, use_relu=False, kaiming_init=True ): conv = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False if use_gn else True ) if kaiming_init: nn.init.kaiming_normal_( conv.weight, mode="fan_out", nonlinearity="relu" ) else: torch.nn.init.normal_(conv.weight, std=0.01) if not use_gn: nn.init.constant_(conv.bias, 0) module = [conv,] if use_gn: module.append(group_norm(out_channels)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def make_conv1x1( in_channels, out_channels, use_gn=False, use_relu=False, use_bias=True, kaiming_init=True, adaptive_group_norm=False, ): conv = Conv2d(in_channels, out_channels, kernel_size=1, bias=False if not use_bias or use_gn else True) if kaiming_init: nn.init.kaiming_normal_(conv.weight, mode="fan_out", nonlinearity="relu") else: torch.nn.init.normal_(conv.weight, std=0.01) if not use_gn and use_bias: nn.init.constant_(conv.bias, 0) module = [ conv, ] if use_gn: module.append(group_norm(out_channels, adaptive=adaptive_group_norm)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, C_in, C_out, expansion, stride): assert stride in [1, 2] self.res_connect = (stride == 1) and (C_in == C_out) C_mid = _get_divisible_by(C_in * expansion, 8, 8) ops = [ # pw Conv2d(C_in, C_mid, 1, 1, 0, bias=False), BatchNorm2d(C_mid), nn.ReLU(inplace=True), # shift Shift(C_mid, 5, stride, 2), # pw-linear Conv2d(C_mid, C_out, 1, 1, 0, bias=False), BatchNorm2d(C_out), ] super(ShiftBlock5x5, self).__init__(*ops)