def __init__(self, cfg, norm_func):
        super(BaseStem, self).__init__()

        out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS

        self.conv1 = Conv2d(3,
                            out_channels,
                            kernel_size=7,
                            stride=2,
                            padding=3,
                            bias=False)
        self.bn1 = norm_func(out_channels)

        for l in [
                self.conv1,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#2
0
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        # resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        if cfg.MODEL.CHAR_MASK_ON:
            resolution_h = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_H
            resolution_w = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_W
        else:
            resolution_h = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
            resolution_w = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution_h, resolution_w),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = Conv2d(next_feature,
                            layer_features,
                            3,
                            stride=1,
                            padding=1)
            # Caffe2 implementation uses MSRAFill, which in fact
            # corresponds to kaiming_normal_ in PyTorch
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
示例#3
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNN_panet_Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        num_inputs = in_channels
        self.cfg = cfg

        self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
示例#4
0
    def __init__(self, cfg):
        super(StemWithSyncBN, self).__init__()

        out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS

        self.conv1 = Conv2d(
            3,
            out_channels,
            kernel_size=7,
            stride=2,
            padding=3,
        )
        self.bn1 = SyncBatchNorm2d(out_channels)

        for l in [
                self.conv1,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#5
0
    def __init__(self, cfg, in_channels):
        super(DepthMaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_DEPTHMASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
 def make_conv(
     in_channels, out_channels, kernel_size, stride=1, dilation=1
 ):
     if use_deconv:
         conv = Deconv(
             in_channels, 
             out_channels, 
             kernel_size=kernel_size, 
             stride=stride, 
             padding=dilation * (kernel_size - 1) // 2, 
             dilation=dilation, 
             bias=True,
             block=block,
             sampling_stride=sampling_stride,
             sync=sync,
             norm_type=norm_type,
         )
     else:
         conv = Conv2d(
             in_channels, 
             out_channels, 
             kernel_size=kernel_size, 
             stride=stride, 
             padding=dilation * (kernel_size - 1) // 2, 
             dilation=dilation, 
             bias=False if use_gn else True
         )
     # Caffe2 implementation uses XavierFill, which in fact
     # corresponds to kaiming_uniform_ in PyTorch
     nn.init.kaiming_uniform_(conv.weight, a=1)
     if (not (use_gn or use_gw)) or use_deconv:
         nn.init.constant_(conv.bias, 0)
     module = [conv,]
     if not use_deconv:
         if use_gn: 
             module.append(group_norm(out_channels))
         if use_gw: 
             module.append(Whitening_IGWItN(out_channels))    
     if use_relu:
         module.append(nn.ReLU(inplace=True))
     if len(module) > 1:
         return nn.Sequential(*module)
     return conv
示例#7
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor_Upsample, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
        num_inputs = in_channels

        self.conv5_mask = nn.Sequential(
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
            nn.Conv2d(num_inputs, dim_reduced, 3, 1, 1),
        )
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
示例#8
0
    def __init__(self, cfg, norm_func):
        super(BaseStem, self).__init__()

        out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
        stride = cfg.MODEL.RESNETS.STEM_STRIDE

        self.conv1 = Conv2d(3,
                            out_channels,
                            kernel_size=7,
                            stride=stride,
                            padding=3,
                            bias=False)
        self.bn1 = norm_func(out_channels)

        for l in [
                self.conv1,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)

        self.remove_max_pooling = cfg.MODEL.RESNETS.REMOVE_STEM_POOL  # add by hui
示例#9
0
    def __init__(self, cfg, norm_func):
        super(BaseStem, self).__init__()

        out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS
        in_channels = cfg.MODEL.RESNETS.STEM_IN_CHANNELS
        # if cfg.MODEL.RGB_ON and cfg.MODEL.DEPTH_ON:
        #     in_channels = cfg.MODEL.RESNETS.STEM_RGBDIN_CHANNELS
        # if isrgb:#cfg.MODEL.RGB_ON and not cfg.MODEL.DEPTH_ON:#if isrgb:
        #     in_channels = cfg.MODEL.RESNETS.STEM_IN_CHANNELS

        self.conv1 = Conv2d(in_channels,
                            out_channels,
                            kernel_size=7,
                            stride=2,
                            padding=3,
                            bias=False)
        self.bn1 = norm_func(out_channels)

        for l in [
                self.conv1,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#10
0
 def _make_conv_level(self,
                      inplanes,
                      planes,
                      convs,
                      stride=1,
                      dilation=1,
                      batch_norm=FrozenBatchNorm2d):
     modules = []
     for i in range(convs):
         modules.extend([
             Conv2d(inplanes,
                    planes,
                    kernel_size=3,
                    stride=stride if i == 0 else 1,
                    padding=dilation,
                    bias=False,
                    dilation=dilation),
             batch_norm(planes),
             nn.ReLU(inplace=True)
         ])
         inplanes = planes
     return nn.Sequential(*modules)
示例#11
0
    def __init__(self, cfg, in_channels):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES  # 81
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]  # 256
        num_inputs = in_channels  # 256

        # 转置卷积, 上采样两倍, 14-->28
        self.conv5_mask = ConvTranspose2d(in_channels=num_inputs,
                                          out_channels=dim_reduced,
                                          kernel_size=2,
                                          stride=2,
                                          padding=0)
        # 1x1卷积
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                nn.init.kaiming_normal_(param,
                                        mode="fan_out",
                                        nonlinearity="relu")
    def __init__(self, cfg):
        super(MaskRCNNC4Predictor, self).__init__()
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]

        if cfg.MODEL.ROI_HEADS.USE_FPN:
            num_inputs = dim_reduced
        else:
            stage_index = 4
            stage2_relative_factor = 2 ** (stage_index - 1)
            res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
            num_inputs = res2_out_channels * stage2_relative_factor

        self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
        self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)

        for name, param in self.named_parameters():
            if "bias" in name:
                nn.init.constant_(param, 0)
            elif "weight" in name:
                # Caffe2 implementation uses MSRAFill, which in fact
                # corresponds to kaiming_normal_ in PyTorch
                nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
示例#13
0
def make_conv3x3(
    in_channels,
    out_channels,
    dilation=1,
    stride=1,
    use_gn=False,
    use_relu=False,
    use_bias=True,
    kaiming_init=True,
    adaptive_group_norm=False,
):
    conv = Conv2d(in_channels,
                  out_channels,
                  kernel_size=3,
                  stride=stride,
                  padding=dilation,
                  dilation=dilation,
                  bias=False if not use_bias or use_gn else True)
    if kaiming_init:
        nn.init.kaiming_normal_(conv.weight,
                                mode="fan_out",
                                nonlinearity="relu")
    else:
        torch.nn.init.normal_(conv.weight, std=0.01)
    if not use_gn and use_bias:
        nn.init.constant_(conv.bias, 0)
    module = [
        conv,
    ]
    if use_gn:
        module.append(group_norm(out_channels, adaptive=adaptive_group_norm))
    if use_relu:
        module.append(nn.ReLU(inplace=True))
    if len(module) > 1:
        return nn.Sequential(*module)
    return conv
示例#14
0
    def __init__(self, cfg, in_channels):

        super(KeypointRCNNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        input_features = in_channels

        layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
        next_feature = input_features
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "conv_fcn{}".format(layer_idx)
            module = Conv2d(next_feature,
                            layer_features,
                            3,
                            stride=1,
                            padding=1)
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")

            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)

        self.out_channels = layer_features
示例#15
0
    def __init__(self,
                 levels,
                 channels,
                 num_classes=1000,
                 in_chans=3,
                 cardinality=1,
                 base_width=64,
                 block=DlaBottle2neck,
                 residual_root=False,
                 linear_root=False,
                 batch_norm=FrozenBatchNorm2d,
                 drop_rate=0.0,
                 global_pool='avg',
                 feature_only=True,
                 dcn_config=(False, )):
        super(DLA, self).__init__()
        self.channels = channels
        self.num_classes = num_classes
        self.cardinality = cardinality
        self.base_width = base_width
        self.drop_rate = drop_rate

        # check whether deformable conv config is right
        if len(dcn_config) != 6:
            raise ValueError("Deformable configuration is not correct, "
                             "every level should specifcy a configuration.")

        self.base_layer = nn.Sequential(
            Conv2d(in_chans,
                   channels[0],
                   kernel_size=7,
                   stride=1,
                   padding=3,
                   bias=False), batch_norm(channels[0]), nn.ReLU(inplace=True))
        self.level0 = self._make_conv_level(channels[0],
                                            channels[0],
                                            levels[0],
                                            batch_norm=batch_norm)
        self.level1 = self._make_conv_level(channels[0],
                                            channels[1],
                                            levels[1],
                                            stride=2,
                                            batch_norm=batch_norm)
        cargs = dict(cardinality=cardinality,
                     base_width=base_width,
                     root_residual=residual_root,
                     batch_norm=batch_norm)
        self.level2 = DlaTree(levels[2],
                              block,
                              channels[1],
                              channels[2],
                              2,
                              level_root=False,
                              with_dcn=dcn_config[2],
                              **cargs)
        self.level3 = DlaTree(levels[3],
                              block,
                              channels[2],
                              channels[3],
                              2,
                              level_root=True,
                              with_dcn=dcn_config[3],
                              **cargs)
        self.level4 = DlaTree(levels[4],
                              block,
                              channels[3],
                              channels[4],
                              2,
                              level_root=True,
                              with_dcn=dcn_config[4],
                              **cargs)
        self.level5 = DlaTree(levels[5],
                              block,
                              channels[4],
                              channels[5],
                              2,
                              level_root=True,
                              with_dcn=dcn_config[5],
                              **cargs)

        if not feature_only:
            self.num_features = channels[-1]
            self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
            self.fc = nn.Conv2d(self.num_features *
                                self.global_pool.feat_mult(),
                                num_classes,
                                1,
                                bias=True)
示例#16
0
 def __init__(self,
              levels,
              block,
              in_channels,
              out_channels,
              stride=1,
              dilation=1,
              cardinality=1,
              base_width=64,
              level_root=False,
              root_dim=0,
              root_kernel_size=1,
              root_residual=False,
              batch_norm=FrozenBatchNorm2d,
              with_dcn=False):
     super(DlaTree, self).__init__()
     if root_dim == 0:
         root_dim = 2 * out_channels
     if level_root:
         root_dim += in_channels
     cargs = dict(dilation=dilation,
                  cardinality=cardinality,
                  base_width=base_width,
                  batch_norm=batch_norm,
                  with_dcn=with_dcn)
     if levels == 1:
         self.tree1 = block(in_channels, out_channels, stride, **cargs)
         self.tree2 = block(out_channels, out_channels, 1, **cargs)
     else:
         cargs.update(
             dict(root_kernel_size=root_kernel_size,
                  root_residual=root_residual))
         self.tree1 = DlaTree(levels - 1,
                              block,
                              in_channels,
                              out_channels,
                              stride,
                              root_dim=0,
                              **cargs)
         self.tree2 = DlaTree(levels - 1,
                              block,
                              out_channels,
                              out_channels,
                              root_dim=root_dim + out_channels,
                              **cargs)
     if levels == 1:
         self.root = DlaRoot(root_dim,
                             out_channels,
                             root_kernel_size,
                             root_residual,
                             batch_norm=batch_norm)
     self.level_root = level_root
     self.root_dim = root_dim
     self.downsample = nn.MaxPool2d(stride,
                                    stride=stride) if stride > 1 else None
     self.project = None
     if in_channels != out_channels:
         self.project = nn.Sequential(
             Conv2d(in_channels,
                    out_channels,
                    kernel_size=1,
                    stride=1,
                    bias=False), batch_norm(out_channels))
     self.levels = levels
示例#17
0
    def __init__(self, in_channels, bottleneck_channels, out_channels,
                 num_groups, stride_in_1x1, stride, dilation, norm_func,
                 dcn_config, dw_config):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=down_stride,
                       bias=False),
                norm_func(out_channels),
            )
            for modules in [
                    self.downsample,
            ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above
        with_dcn = dcn_config.get("stage_with_dcn", False)
        if with_dcn:
            deformable_groups = dcn_config.get("deformable_groups", 1)
            with_modulated_dcn = dcn_config.get("with_modulated_dcn", False)
            self.conv2 = DFConv2d(bottleneck_channels,
                                  bottleneck_channels,
                                  with_modulated_dcn=with_modulated_dcn,
                                  kernel_size=3,
                                  stride=stride_3x3,
                                  groups=num_groups,
                                  dilation=dilation,
                                  deformable_groups=deformable_groups,
                                  bias=False)
        else:
            self.conv2 = Conv2d(bottleneck_channels,
                                bottleneck_channels,
                                kernel_size=3,
                                stride=stride_3x3,
                                padding=dilation,
                                bias=False,
                                groups=num_groups,
                                dilation=dilation)
            nn.init.kaiming_uniform_(self.conv2.weight, a=1)

        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(bottleneck_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        self.bn3 = norm_func(out_channels)
        self.with_dw = dw_config.get("stage_with_dw", False)
        if self.with_dw:
            self.insert_pos = dw_config.get('insert_pos', 'after1x1')
            assert self.insert_pos in ['after1x1', 'after3x3', 'afterAdd']
            if self.insert_pos == 'afterAdd':
                dw_block = DynamicWeightsCat11
                dw_channels = out_channels
            elif self.insert_pos == 'after3x3':
                dw_block = ReDynamicWeightsCat33  #ReDynamicWeightsCat33, DeformDGMN
                dw_channels = bottleneck_channels
            dw_group = dw_config.get('group', 1)
            dw_kernel = dw_config.get('kernel', 3)
            dw_dilation = dw_config.get('dilation', (1, 4, 8, 12))
            dw_shuffle = dw_config.get('shuffle', False)
            dw_deform = dw_config.get('deform', 'none')
            self.dw_block = dw_block(channels=dw_channels,
                                     group=dw_group,
                                     kernel=dw_kernel,
                                     dilation=dw_dilation,
                                     shuffle=dw_shuffle,
                                     deform=dw_deform)
        else:
            self.dw_block = None

        for l in [
                self.conv1,
                self.conv3,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#18
0
def conv_1x1_bn(inp, oup):
    return nn.Sequential(Conv2d(inp, oup, 1, 1, 0, bias=False),
                         BatchNorm2d(oup), nn.ReLU6(inplace=True))
示例#19
0
def conv_bn(inp, oup, stride):
    return nn.Sequential(Conv2d(inp, oup, 3, stride, 1, bias=False),
                         BatchNorm2d(oup), nn.ReLU6(inplace=True))
示例#20
0
    def __init__(self, cfg):
        super(Panoptic_FPN_Segmentation_Branch, self).__init__()
        self.cfg = cfg.clone()
        assert 'FPN' in cfg.MODEL.BACKBONE.CONV_BODY, 'Segmentation Branch should build on FPN backbone'
        # Resnet backbone has 4 stages
        self.upsample_level1 = nn.Sequential(
            Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True))

        self.upsample_level2 = nn.Sequential(
            Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))

        self.upsample_level3 = nn.Sequential(
            Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))

        self.upsample_level4 = nn.Sequential(
            Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
            Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL,
                   cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1),
            nn.GroupNorm(num_groups=32,
                         num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True))

        if cfg.MODEL.SEG_BRANCH.MERGE_OP == "add":
            self.to_segment_conv = Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL,
                                          cfg.MODEL.SEG_BRANCH.CLS_NUM, 1, 1)
        else:
            self.to_segment_conv = Conv2d(
                cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL * 4,
                cfg.MODEL.SEG_BRANCH.CLS_NUM, 1, 1)

        self.to_segment_upsample = nn.Upsample(scale_factor=4,
                                               mode='bilinear',
                                               align_corners=True)
示例#21
0
    def __init__(
        self,
        in_channels,
        bottleneck_channels,
        out_channels,
        num_groups,
        stride_in_1x1,
        stride,
        dilation,
        norm_func
    ):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(
                    in_channels, out_channels,
                    kernel_size=1, stride=down_stride, bias=False
                ),
                norm_func(out_channels),
            )
            for modules in [self.downsample, ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=stride_3x3,
            padding=dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation
        )
        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(
            bottleneck_channels, out_channels, kernel_size=1, bias=False
        )
        self.bn3 = norm_func(out_channels)

        for l in [self.conv1, self.conv2, self.conv3, ]:
            nn.init.kaiming_uniform_(l.weight, a=1)

        self.layers_ = out_channels

        #         if out_channels != 256 and cfg.SNL_ATTENTION:
        if cfg.SNL_ATTENTION:
            if cfg.ADD_C2:
                self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1)
                self.Wv_layer = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1)

                for l in [self.Wk_layer, self.Wv_layer]:
                    nn.init.kaiming_uniform_(l.weight, a=1)
            elif out_channels != 256:
                self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1)
                self.Wv_layer = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1)

                for l in [self.Wk_layer, self.Wv_layer]:
                    nn.init.kaiming_uniform_(l.weight, a=1)

        if cfg.GC_ATTENTION:
            if cfg.ADD_C2:
                self.ratios = 4
                self.planes = out_channels // self.ratios
                self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1)
                self.softmax = nn.Softmax(dim=2)
                self.channel_add_conv = nn.Sequential(
                    nn.Conv2d(out_channels, self.planes, kernel_size=1),
                    nn.LayerNorm([self.planes, 1, 1]),
                    nn.ReLU(inplace=True),  # yapf: disable
                    nn.Conv2d(self.planes, out_channels, kernel_size=1)
                )

                for l in [self.Wk_layer]:
                    nn.init.kaiming_uniform_(l.weight, a=1)

                for l in [self.channel_add_conv[0], self.channel_add_conv[-1]]:
                    nn.init.constant_(l.weight, 0)
                    nn.init.constant_(l.bias, 0)
            elif out_channels != 256:
                self.ratios = 4
                self.planes = out_channels // self.ratios
                self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1)
                self.softmax = nn.Softmax(dim=2)
                self.channel_add_conv = nn.Sequential(
                    nn.Conv2d(out_channels, self.planes, kernel_size=1),
                    nn.LayerNorm([self.planes, 1, 1]),
                    nn.ReLU(inplace=True),  # yapf: disable
                    nn.Conv2d(self.planes, out_channels, kernel_size=1)
                )

                for l in [self.Wk_layer]:
                    nn.init.kaiming_uniform_(l.weight, a=1)

                for l in [self.channel_add_conv[0], self.channel_add_conv[-1]]:
                    nn.init.constant_(l.weight, 0)
                    nn.init.constant_(l.bias, 0)
    def __init__(self,
                 in_channels,
                 bottleneck_channels,
                 out_channels,
                 num_groups,
                 stride_in_1x1,
                 stride,
                 dilation,
                 norm_func,
                 reduction=16):
        super(SEBottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=down_stride,
                       bias=False),
                norm_func(out_channels),
            )
            for modules in [
                    self.downsample,
            ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above

        self.conv2 = Conv2d(bottleneck_channels,
                            bottleneck_channels,
                            kernel_size=3,
                            stride=stride_3x3,
                            padding=dilation,
                            bias=False,
                            groups=num_groups,
                            dilation=dilation)
        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(bottleneck_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        self.bn3 = norm_func(out_channels)

        for l in [
                self.conv1,
                self.conv2,
                self.conv3,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)

        self.relu = nn.ReLU(inplace=True)
        self.se = SELayer(out_channels, reduction)
示例#23
0
    def __init__(
        self,
        in_channels,
        bottleneck_channels,
        out_channels,
        num_groups,
        stride_in_1x1,
        stride,
        dilation,
        norm_func,
        use_dcn
    ):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(
                    in_channels, out_channels,
                    kernel_size=1, stride=down_stride, bias=False
                ),
                norm_func(out_channels),
            )
            for modules in [self.downsample,]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1 # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.use_dcn = use_dcn

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above

        # DCN
        if not self.use_dcn:
            # pdb.set_trace()
            # print("stride_3x3: {}".format(stride_3x3))
            self.conv2 = Conv2d(
                bottleneck_channels,
                bottleneck_channels,
                kernel_size=3,
                stride=stride_3x3,
                padding=dilation,
                bias=False,
                groups=num_groups,
                dilation=dilation
            )
        ########################## DCN ##########################
        elif self.use_dcn:
            deformable_groups = 1
            offset_channels = 18
            conv_op = DeformConv
            self.conv2_offset = nn.Conv2d(
                bottleneck_channels,
                deformable_groups * offset_channels,
                kernel_size=3,
                stride=stride_3x3,
                padding=dilation,
                bias=False,
                groups=num_groups,
                dilation=dilation)
            self.conv2 = conv_op(
                bottleneck_channels,
                bottleneck_channels,
                kernel_size=3,
                stride=stride_3x3,
                padding=dilation,
                dilation=dilation,
                deformable_groups=deformable_groups,
                bias=False)
            # pdb.set_trace()
        else:
            # with_modulated_dcn
            pass
        ########################## DCN ##########################
        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(
            bottleneck_channels, out_channels, kernel_size=1, bias=False
        )
        self.bn3 = norm_func(out_channels)

        for l in [self.conv1, self.conv2, self.conv3,]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#24
0
    def __init__(self, cfg):
        super(VLineHead, self).__init__()
        self.feature_extractor = make_vline_feature_extractor(cfg)
        self.detections_per_img = cfg.MODEL.VLINE_HEAD.DETECTIONS_PER_IMG

        num_backbon_feats_dim = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
        num_extract_feats_dim = cfg.MODEL.VLINE_HEAD.NUM_EXTRACT_FEATS_DIM
        num_feats_linear = cfg.MODEL.VLINE_HEAD.NUM_FIRST_LINEAR
        self.use_first_linear = cfg.MODEL.VLINE_HEAD.USE_FIRST_LINEAR
        num_feats_global = 256
        num_linear_another = 512

        num_vlines = cfg.MODEL.VLINE_HEAD.BINS if not cfg.MODEL.VLINE_HEAD.USE_FBP else 20
        # print("num_vlines: ", num_vlines)
        self.num_boundary = cfg.MODEL.VLINE_HEAD.NUM_BOUDARY
        self.use_stack = cfg.MODEL.VLINE_HEAD.USE_STACK
        self.use_eye = cfg.MODEL.VLINE_HEAD.USE_EYE
        self.use_global = cfg.MODEL.VLINE_HEAD.USE_GLOBAL
        self.train_nonrf = cfg.MODEL.VLINE_HEAD.TRAIN_NONRF

        if self.use_stack:
            print("using self.use_stack!!!!!!!!!!!!")
            print("using self.use_stack!!!!!!!!!!!!")
            print("using self.use_stack!!!!!!!!!!!!")
            num_stack_out = 256
            if self.use_first_linear:
                self.stack_mean = nn.Linear(num_feats_linear * 2,
                                            num_stack_out)
                self.stack_mean_vert = nn.Linear(num_feats_linear * 2,
                                                 num_stack_out)
                self.stack_max = nn.Linear(num_feats_linear * 2, num_stack_out)
                self.stack_max_vert = nn.Linear(num_feats_linear * 2,
                                                num_stack_out)
            else:
                self.stack_mean = nn.Linear(num_extract_feats_dim * 2,
                                            num_stack_out)
                self.stack_mean_vert = nn.Linear(num_extract_feats_dim * 2,
                                                 num_stack_out)
                self.stack_max = nn.Linear(num_extract_feats_dim * 2,
                                           num_stack_out)
                self.stack_max_vert = nn.Linear(num_extract_feats_dim * 2,
                                                num_stack_out)

        self.use_indsgroupmap = cfg.MODEL.VLINE_HEAD.USE_INDSGROUPMAP
        if self.use_indsgroupmap:
            self.vline_pooling_mean = VLinePooling().apply
            self.vline_pooling_max = VLinePooling4().apply
        else:
            self.vline_pooling_mean = VLinePooling2()
            self.vline_pooling_max = VLinePooling3().apply
        num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
        num_vline_classes = 1 if cfg.MODEL.CLS_AGNOSTIC_VLINE_REG else num_classes

        if self.use_first_linear:
            d_vline_feats = num_feats_linear * 4
            d_vline_feats2 = num_feats_linear * 2
            self.linear = nn.Linear(num_extract_feats_dim, num_feats_linear)
            self.linear_max = nn.Linear(num_extract_feats_dim,
                                        num_feats_linear)
            self.linear_vert = nn.Linear(num_extract_feats_dim,
                                         num_feats_linear)
            self.linear_vert_max = nn.Linear(num_extract_feats_dim,
                                             num_feats_linear)
        else:
            d_vline_feats = num_extract_feats_dim * 4
            d_vline_feats2 = num_extract_feats_dim * 2

        if self.use_eye:
            self.eye = torch.tensor(np.eye(num_vlines)).float()
            d_vline_feats += num_vlines
            d_vline_feats2 += num_vlines
        if self.use_global:
            d_vline_feats += num_feats_global
            d_vline_feats2 += num_feats_global
        self.classifier = nn.Linear(d_vline_feats, num_linear_another)
        self.classifier_vert = nn.Linear(d_vline_feats, num_linear_another)
        self.classifier_another = nn.Linear(
            num_linear_another, self.num_boundary * num_vline_classes)
        self.classifier_vert_another = nn.Linear(
            num_linear_another, self.num_boundary * num_vline_classes)

        self.classifier_mean = nn.Linear(d_vline_feats2,
                                         self.num_boundary * num_vline_classes)
        self.classifier_mean_vert = nn.Linear(
            d_vline_feats2, self.num_boundary * num_vline_classes)
        self.classifier_max = nn.Linear(d_vline_feats2,
                                        self.num_boundary * num_vline_classes)
        self.classifier_max_vert = nn.Linear(
            d_vline_feats2, self.num_boundary * num_vline_classes)

        self.loss_evaluator = make_vline_loss_evaluator(cfg)
        self.post_processor = make_vline_post_processor(cfg)

        input_size = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
        next_feature = input_size
        self.blocks = []

        # TODO(H): If it's possible to remove these convs when extracting feats?
        # Or maybe let's use the one trained on mask?
        layers = cfg.MODEL.VLINE_HEAD.CONV_LAYERS
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "vp_mask_fcn{}".format(layer_idx)
            module = Conv2d(next_feature,
                            layer_features,
                            3,
                            stride=1,
                            padding=1)
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)

        self.logger = logging.getLogger("maskrcnn_benchmark.trainer")
        self.logger.info("Get logger in model")

        num_pooler_resol = cfg.MODEL.VLINE_HEAD.POOLER_RESOLUTION
        # Global: Choice of 1 channel filter
        # self.conv_global = Conv2d(num_backbon_feats_dim, 1, 1)
        # self.linear_global = nn.Linear(num_pooler_resol*num_pooler_resol, num_feats_global)

        # Global: Choice of pooling (same as box regre head)
        # self.avgpool = nn.AdaptiveAvgPool2d(1)
        # self.linear_global = nn.Linear(num_backbon_feats_dim, num_feats_global)

        # Global: Choice of pooling (same as shape head)
        ker_size = 8
        strid = 8
        self.avgpool = nn.AvgPool2d(kernel_size=ker_size, stride=strid)
        num_inputs_global = int(np.floor(
            (num_pooler_resol - ker_size) / strid)) + 1
        self.linear_global = nn.Linear(
            num_inputs_global * num_inputs_global * num_backbon_feats_dim,
            num_feats_global)

        # nn.init.normal_(self.linear_global.weight, mean=0, std=0.01)
        # nn.init.constant_(self.linear_global.bias, 0)
        self.softmax = nn.Softmax(dim=1)
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(PANetMaskBranch, self).__init__()

        pooler = make_mask_pooler(cfg)
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler


        parallel_layers = (256, 256, 256, 256)
        common_fcn_layers = (256, 256)

        # Parallel Block: 4 parallel fcn1
        self.parallel_block = []
        for layer_idx, layer_features in enumerate(parallel_layers, 1):
            layer_name = "mask_fcn_parallel{}".format(layer_idx)
            module = Conv2d(input_size, layer_features, 3, stride=1, padding=1)
            nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            self.parallel_block.append(layer_name)
        # Common Block: fcn2, fcn3
        self.common_blocks = []
        for layer_idx, layer_features in enumerate(common_fcn_layers, 2):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = Conv2d(layer_features, layer_features, 3, stride=1, padding=1)
            # Caffe2 implementation uses MSRAFill, which in fact
            # corresponds to kaiming_normal_ in PyTorch
            nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            self.common_blocks.append(layer_name)

        layer_features = 256
        # FCN branch: fcn4 + original_mask_predictor
        self.mask_fcn_4 = Conv2d(layer_features, layer_features, 3, stride=1, padding=1)
        nn.init.kaiming_normal_(self.mask_fcn_4.weight, mode="fan_out", nonlinearity="relu")
        nn.init.constant_(self.mask_fcn_4.bias, 0)

        self.mask_predictor = make_roi_mask_predictor(cfg)

        # FC Branch: conv4_fc, conv5_fc, fc
        # fc_fcn_layers = (256, 128)
        # self.fc_blocks = []
        # for layer_idx, layer_features in enumerate(fc_fcn_layers, 4):
        #     layer_name = "mask_fc_fcn{}".format(layer_idx)
        #     module = Conv2d(layer_features, layer_features, 3, stride=1, padding=1)
        #     # Caffe2 implementation uses MSRAFill, which in fact
        #     # corresponds to kaiming_normal_ in PyTorch
        #     nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
        #     nn.init.constant_(module.bias, 0)
        #     self.add_module(layer_name, module)
        #     self.fc_blocks.append(layer_name)

        # FC Branch: conv4_fc, conv5_fc, fc
        self.mask_fc_fcn_4 = Conv2d(256, 256, 3, stride=1, padding=1)
        nn.init.kaiming_normal_(self.mask_fc_fcn_4.weight, mode="fan_out", nonlinearity="relu")
        nn.init.constant_(self.mask_fc_fcn_4.bias, 0)

        self.mask_fc_fcn_5 = Conv2d(256, 128, 3, stride=1, padding=1)
        nn.init.kaiming_normal_(self.mask_fc_fcn_5.weight, mode="fan_out", nonlinearity="relu")
        nn.init.constant_(self.mask_fc_fcn_5.bias, 0)

        fc_layer = 128
        self.pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        self.mask_resolution = cfg.MODEL.ROI_MASK_HEAD.RESOLUTION
        self.fc = nn.Linear(fc_layer * self.pooler_resolution * self.pooler_resolution,
                            self.mask_resolution * self.mask_resolution,
                            bias=True)

        nn.init.kaiming_uniform_(self.fc.weight, a=1)
        nn.init.constant_(self.fc.bias, 0)
示例#26
0
    def __init__(self, cfg):
        super(SampleBlock, self).__init__()
        out_ch = cfg.MODEL.SAMPLE_STREAM.OUT_CHANNELS

        self.sup_conv_d1 = Conv2d(out_ch,
                                  out_ch // 2,
                                  kernel_size=(3, 3),
                                  stride=1,
                                  groups=1,
                                  dilation=1,
                                  padding=1)
        self.sup_conv_d2 = Conv2d(out_ch,
                                  out_ch // 4,
                                  kernel_size=(3, 3),
                                  stride=1,
                                  groups=1,
                                  dilation=2,
                                  padding=2)
        self.sup_conv_d3 = Conv2d(out_ch,
                                  out_ch // 4,
                                  kernel_size=(3, 3),
                                  stride=1,
                                  groups=1,
                                  dilation=3,
                                  padding=3)

        self.offset_lateral = Conv2d(2 * out_ch,
                                     out_ch,
                                     kernel_size=(3, 3),
                                     stride=1,
                                     groups=1,
                                     dilation=1,
                                     padding=1)
        self.offset_pred = Conv2d(out_ch,
                                  73,
                                  kernel_size=(3, 3),
                                  stride=1,
                                  groups=1,
                                  dilation=1,
                                  padding=1)

        self.sample_conv = DeformConv(out_ch,
                                      out_ch,
                                      kernel_size=(3, 3),
                                      stride=1,
                                      groups=1,
                                      dilation=1,
                                      padding=1,
                                      deformable_groups=4,
                                      bias=False)

        # self.sample_conv_1x1 = Conv2d(3*256, 256, kernel_size=(1, 1), stride=1, groups=1, dilation=1, padding=0)

        # Initialization
        for modules in [
                self.sup_conv_d1, self.sup_conv_d2, self.sup_conv_d3,
                self.offset_lateral, self.offset_pred
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.kaiming_uniform_(l.weight, a=1)
                    torch.nn.init.constant_(l.bias, 0)
示例#27
0
    def __init__(self, cfg, in_channels):
        super(KeypointRCNNPredictor, self).__init__()
        self.in_channels = in_channels
        self.num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
        self.num_convs = 4
        self.point_feat_channels = 32
        self.conv_out_channels = self.point_feat_channels * self.num_keypoints
        conv_kernel_size = 3
        conv_kernel_size1 = 5
        deconv_kernel_size = 4
        # deconv_kernel = 4
        # self.kps_score_lowres = layers.ConvTranspose2d(
        #     input_features,
        #     num_keypoints,
        #     deconv_kernel,
        #     stride=2,
        #     padding=deconv_kernel // 2 - 1,
        # )
        # nn.init.kaiming_normal_(
        #     self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
        # )
        # nn.init.constant_(self.kps_score_lowres.bias, 0)
        # self.up_scale = 2
        # self.out_channels = num_keypoints

        self.convs = []
        for i in range(self.num_convs):
            _in_channels = (self.in_channels
                            if i == 0 else self.conv_out_channels)
            strides = 1
            padding = (conv_kernel_size - 1) // 2
            self.convs.append(
                nn.Sequential(
                    Conv2d(_in_channels, self.conv_out_channels,
                           conv_kernel_size, strides, padding),
                    GroupNorm(32, self.conv_out_channels),
                    nn.ReLU(inplace=True)))
        self.convs = nn.Sequential(*self.convs)

        # self.convs1 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size1 - 1) // 2
        #     self.convs1.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size1,
        #                 strides,
        #                 padding),
        #             GroupNorm(32, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs1 = nn.Sequential(*self.convs1)

        # self.convs2 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size1 - 1) // 2
        #     self.convs2.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size1,
        #                 strides,
        #                 padding),
        #             GroupNorm(32, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs2 = nn.Sequential(*self.convs2)

        # self.updeconv1_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.norm1 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2)
        # self.updeconv1_2 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.norm2 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2)
        # self.updeconv2_1 = ConvTranspose2d(
        #     self.conv_out_channels // 2,
        #     self.num_keypoints // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)
        # self.updeconv2_2 = ConvTranspose2d(
        #     self.conv_out_channels // 2,
        #     self.num_keypoints // 2,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints // 2)

        self.updeconv1_ = ConvTranspose2d(self.conv_out_channels,
                                          self.conv_out_channels,
                                          kernel_size=deconv_kernel_size,
                                          stride=2,
                                          padding=(deconv_kernel_size - 2) //
                                          2,
                                          groups=self.num_keypoints)
        self.norm1 = GroupNorm(self.num_keypoints, self.conv_out_channels)

        self.updeconv2_ = ConvTranspose2d(self.conv_out_channels,
                                          self.num_keypoints,
                                          kernel_size=deconv_kernel_size,
                                          stride=2,
                                          padding=(deconv_kernel_size - 2) //
                                          2,
                                          groups=self.num_keypoints)

        #        self.conv_guide = Conv2d(
        #            self.conv_out_channels,
        #            self.conv_out_channels,
        #            3,
        #            1,
        #            1)

        #        self.dcn = DFConv2d_guide(self.conv_out_channels,
        #            self.num_keypoints,
        #            groups=self.num_keypoints)

        # self.norm2 = GroupNorm(self.num_keypoints, self.conv_out_channels)
        # self.final_conv = Conv2d(
        #                 self.conv_out_channels,
        #                 self.num_keypoints,
        #                 1,
        #                 1,
        #                 0,
        #                 groups=self.num_keypoints)
        # self.conv_offset = Conv2d(
        #                 self.conv_out_channels,
        #                 self.num_keypoints * 2,
        #                 1,
        #                 1,
        #                 0,
        #                 groups=self.num_keypoints)

        # self.convs_1 = []
        # for i in range(self.num_convs):
        #     _in_channels = (
        #         self.in_channels if i == 0 else self.conv_out_channels)
        #     strides = 1
        #     padding = (conv_kernel_size - 1) // 2
        #     self.convs_1.append(
        #         nn.Sequential(
        #             Conv2d(
        #                 _in_channels,
        #                 self.conv_out_channels,
        #                 conv_kernel_size,
        #                 strides,
        #                 padding),
        #             GroupNorm(36, self.conv_out_channels),
        #             nn.ReLU(inplace=True)))
        # self.convs_1 = nn.Sequential(*self.convs_1)

        # self.updeconv1_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.conv_out_channels,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints)
        # self.norm1_1 = GroupNorm(self.num_keypoints, self.conv_out_channels)
        # self.updeconv2_1 = ConvTranspose2d(
        #     self.conv_out_channels,
        #     self.num_keypoints,
        #     kernel_size=deconv_kernel_size,
        #     stride=2,
        #     padding=(deconv_kernel_size - 2) // 2,
        #     groups=self.num_keypoints)

        # #TODO 20201015
        # self.neighbor_points = []
        # grid_size = 3
        # for i in range(grid_size):  # i-th column
        #     for j in range(grid_size):  # j-th row
        #         neighbors = []
        #         if i > 0:  # left: (i - 1, j)
        #             neighbors.append((i - 1) * grid_size + j)
        #         if j > 0:  # up: (i, j - 1)
        #             neighbors.append(i * grid_size + j - 1)
        #         if j < grid_size - 1:  # down: (i, j + 1)
        #             neighbors.append(i * grid_size + j + 1)
        #         if i < grid_size - 1:  # right: (i + 1, j)
        #             neighbors.append((i + 1) * grid_size + j)
        #         self.neighbor_points.append(tuple(neighbors))

        # self.forder_trans = nn.ModuleList()  # first-order feature transition
        # self.sorder_trans = nn.ModuleList()  # second-order feature transition
        # for neighbors in self.neighbor_points:
        #     fo_trans = nn.ModuleList()
        #     so_trans = nn.ModuleList()
        #     for _ in range(len(neighbors)):
        #         # each transition module consists of a 5x5 depth-wise conv and
        #         # 1x1 conv.
        #         fo_trans.append(
        #             nn.Sequential(
        #                 Conv2d(
        #                     self.point_feat_channels,
        #                     self.point_feat_channels,
        #                     5,
        #                     stride=1,
        #                     padding=2,
        #                     groups=self.point_feat_channels),
        #                 Conv2d(self.point_feat_channels,
        #                           self.point_feat_channels, 1)))
        #         so_trans.append(
        #             nn.Sequential(
        #                 Conv2d(
        #                     self.point_feat_channels,
        #                     self.point_feat_channels,
        #                     5,
        #                     1,
        #                     2,
        #                     groups=self.point_feat_channels),
        #                 Conv2d(self.point_feat_channels,
        #                           self.point_feat_channels, 1)))
        #     self.forder_trans.append(fo_trans)
        #     self.sorder_trans.append(so_trans)

        # representation_size = 14 * 14 * 288
        # self.keypoints_weight = nn.Linear(representation_size, self.num_keypoints)
        # nn.init.normal_(self.cls_score.weight, std=0.01)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
        for m in self.modules():
            if isinstance(m, nn.ConvTranspose2d):
                nn.init.normal_(m.weight.data, std=0.001)
                if m.bias is not None:
                    m.bias.data.zero_()
        # nn.init.constant_(self.final_conv.bias,-np.log(0.99/0.01))
        # nn.init.constant_(self.dcn.bias,-np.log(0.99/0.01))
        nn.init.constant_(self.updeconv2_.bias, -np.log(0.99 / 0.01))
 def _init_adaptor(self, s_channel, t_channel):
     adaptor = Conv2d( s_channel, t_channel, 1, 1, 0)
     nn.init.kaiming_uniform_(adaptor.weight, a=1)
     nn.init.constant_(adaptor.bias, 0)
     return adaptor
示例#29
0
    def __init__(self, in_channels, bottleneck_channels, out_channels,
                 num_groups, stride_in_1x1, stride, dilation, norm_func,
                 dcn_config):
        super(Bottleneck, self).__init__()

        self.downsample = None
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(in_channels,
                       out_channels,
                       kernel_size=1,
                       stride=down_stride,
                       bias=False),
                norm_func(out_channels),
            )
            for modules in [
                    self.downsample,
            ]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)

        if dilation > 1:
            stride = 1  # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels)
        # TODO: specify init for the above
        with_dcn = dcn_config.get("stage_with_dcn", False)
        if with_dcn:
            deformable_groups = dcn_config.get("deformable_groups", 1)
            with_modulated_dcn = dcn_config.get("with_modulated_dcn", False)
            self.conv2 = DFConv2d(bottleneck_channels,
                                  bottleneck_channels,
                                  with_modulated_dcn=with_modulated_dcn,
                                  kernel_size=3,
                                  stride=stride_3x3,
                                  groups=num_groups,
                                  dilation=dilation,
                                  deformable_groups=deformable_groups,
                                  bias=False)
        else:
            self.conv2 = Conv2d(bottleneck_channels,
                                bottleneck_channels,
                                kernel_size=3,
                                stride=stride_3x3,
                                padding=dilation,
                                bias=False,
                                groups=num_groups,
                                dilation=dilation)
            nn.init.kaiming_uniform_(self.conv2.weight, a=1)

        self.bn2 = norm_func(bottleneck_channels)

        self.conv3 = Conv2d(bottleneck_channels,
                            out_channels,
                            kernel_size=1,
                            bias=False)
        self.bn3 = norm_func(out_channels)

        for l in [
                self.conv1,
                self.conv3,
        ]:
            nn.init.kaiming_uniform_(l.weight, a=1)
示例#30
0
    def __init__(
        self,
        in_channels,
        bottleneck_channels,
        out_channels,
        num_groups,
        stride_in_1x1,
        stride,
        dilation,
        norm_func,
        scale = 4
    ):
        super(Bottle2neck, self).__init__()

        self.downsample = None
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)
        if in_channels != out_channels:
            down_stride = stride if dilation == 1 else 1
            self.downsample = nn.Sequential(
                Conv2d(
                    in_channels, out_channels,
                    kernel_size=1, stride=down_stride, bias=False
                ),
                norm_func(out_channels),
            )
            for modules in [self.downsample,]:
                for l in modules.modules():
                    if isinstance(l, Conv2d):
                        nn.init.kaiming_uniform_(l.weight, a=1)
            self.stype = 'stage'
            self.pool = nn.AvgPool2d(kernel_size=3, stride = stride_3x3, padding=dilation)
        else:
            self.stype = 'normal'
        if dilation > 1:
            stride = 1 # reset to be 1

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        
        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels*scale,
            kernel_size=1,
            stride=stride_1x1,
            bias=False,
        )
        self.bn1 = norm_func(bottleneck_channels*scale)

        if scale == 1:
          self.nums = 1
        else:
          self.nums = scale -1
        convs = []
        bns = []
        for i in range(self.nums):
          convs.append(nn.Conv2d(
              bottleneck_channels, 
              bottleneck_channels, 
              kernel_size=3, 
              stride = stride_3x3, 
              padding=dilation, 
              groups=num_groups,
              dilation=dilation,
              bias=False
              ))
          bns.append(norm_func(bottleneck_channels))
        self.convs = nn.ModuleList(convs)
        self.bns = nn.ModuleList(bns)

        self.conv3 = Conv2d(
            bottleneck_channels*scale, out_channels, kernel_size=1, bias=False
        )
        self.bn3 = norm_func(out_channels)

        self.scale = scale
        self.width  = bottleneck_channels

        for l in [self.conv1, self.conv3,]:
            nn.init.kaiming_uniform_(l.weight, a=1)
        for l in self.convs:
            nn.init.kaiming_uniform_(l.weight, a=1)