示例#1
0
    def __init__(self, cfg):
        super(PRCNNFeatureExtractor, self).__init__()
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        self.conv1 = Conv2d(3, 32, 3, stride=1, padding=1)
        self.conv2 = Conv2d(32, 32, 3, stride=1, padding=1)
        self.conv3 = Conv2d(32, 64, 3, stride=1, padding=1)
        self.conv4 = Conv2d(64, 64, 3, stride=1, padding=1)
        self.conv5 = Conv2d(64, 128, 3, stride=1, padding=1)
        self.conv6 = Conv2d(128, 128, 3, stride=1, padding=1)
        self.conv7 = Conv2d(128, 256, 3, stride=1, padding=1)
        self.conv8 = Conv2d(256, 256, 3, stride=1, padding=1)
        # pdb.set_trace()
        self.pooler1 = Pooler(
            output_size=(25, 25),
            scales=(1., ),
            sampling_ratio=sampling_ratio,
        )
        self.p1 = nn.MaxPool2d(3, 2, 1)
        self.pooler2 = Pooler(
            output_size=(25, 25),
            scales=(0.5, ),
            sampling_ratio=sampling_ratio,
        )
        self.p2 = nn.MaxPool2d(3, 2, 1)
        self.pooler3 = Pooler(
            output_size=(25, 25),
            scales=(0.25, ),
            sampling_ratio=sampling_ratio,
        )
        self.p3 = nn.MaxPool2d(3, 2, 1)
        self.pooler4 = Pooler(
            output_size=(25, 25),
            scales=(0.125, ),
            sampling_ratio=sampling_ratio,
        )

        self.posconv1 = Conv2d(480, 256, 3, stride=1, padding=1)
        self.posconv2 = Conv2d(256, 32, 3, stride=1, padding=1)
        for layer in [
                self.conv1, self.conv2, self.conv3, self.conv4, self.conv5,
                self.conv6, self.conv7, self.conv8, self.posconv1,
                self.posconv2
        ]:
            nn.init.kaiming_normal_(layer.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(layer.bias, 0)
    def __init__(self, cfg, in_channels):
        super(FPN2ORNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        nOrientation = cfg.MODEL.OR_RESNETS.ORIENTATION

        self.pooler = pooler
        self.orn6 = ORConv2d(int(input_size / nOrientation),
                             int(input_size * 4 / nOrientation),
                             arf_config=nOrientation,
                             kernel_size=3,
                             stride=2)
        self.orn7 = ORConv2d(int(input_size * 4 / nOrientation),
                             int(representation_size * 2 / nOrientation),
                             arf_config=nOrientation,
                             kernel_size=3,
                             stride=2)

        nn.init.kaiming_uniform_(self.orn6.weight, a=1)
        nn.init.constant_(self.orn6.bias, 0)

        nn.init.kaiming_uniform_(self.orn7.weight, a=1)
        nn.init.constant_(self.orn7.bias, 0)

        self.out_channels = representation_size
示例#3
0
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
            drop_last=True,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution ** 2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.pooler = pooler
        self.fc6 = nn.Linear(input_size, representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)

        for l in [self.fc6, self.fc7]:
            nn.init.kaiming_uniform_(l.weight, a=1)
            nn.init.constant_(l.bias, 0)
示例#4
0
    def __init__(self, config, pretrained=None):
        super(ResNet50Conv5ROIFeatureExtractor, self).__init__()

        resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
            drop_last=False,
        )

        stage = resnet.StageSpec(index=5, block_count=3, return_features=False)
        head = resnet.ResNetHead(
            block_module=config.MODEL.RESNETS.TRANS_FUNC,
            stages=(stage,),
            num_groups=config.MODEL.RESNETS.NUM_GROUPS,
            width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP,
            stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1,
            stride_init=None,
        )

        if pretrained:
            state_dict = torch.load(pretrained)
            load_state_dict(head, state_dict, strict=False)

        self.pooler = pooler
        self.head = head
示例#5
0
    def __init__(self, cfg):
        """
        Arguments:
            cfg: YACS config node containing configuration settings
        """
        super(VLineFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.VLINE_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.VLINE_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.VLINE_HEAD.POOLER_SAMPLING_RATIO
        input_size = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
        layers = cfg.MODEL.VLINE_HEAD.CONV_LAYERS
        self.pooler = Pooler(output_size=(resolution, resolution),
                             scales=scales,
                             sampling_ratio=sampling_ratio)

        next_feature = input_size
        self.blocks = []

        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "vp_mask_fcn{}".format(layer_idx)
            module = Conv2d(next_feature,
                            layer_features,
                            3,
                            stride=1,
                            padding=1)
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
示例#6
0
    def __init__(self, cfg, in_channels, half_out=False, cat_all_levels=False):
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
            in_channels=in_channels,
            cat_all_levels=cat_all_levels,
        )
        input_size = in_channels * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = make_fc(input_size, representation_size, use_gn)

        if half_out:
            out_dim = int(representation_size / 2)
        else:
            out_dim = representation_size

        self.fc7 = make_fc(representation_size, out_dim, use_gn)
        self.out_channels = out_dim
    def __init__(self, cfg):
        super(FPN2MLPFeatureExtractor, self).__init__()
        self.cfg = cfg
        self.in_channels = 1024
        self.out_channels = cfg.REID.OUT_CHANNELS
        self.fc = make_fc(self.in_channels, self.out_channels)

        if self.cfg.REID.USE_DIFF_FEAT:
            resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
            in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
            input_size = in_channels * resolution ** 2
            representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
            use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
            self.fc6 = make_fc(input_size, representation_size, use_gn)
            self.fc7 = make_fc(representation_size, representation_size, use_gn)

        if self.cfg.MODEL.RETINANET_ON:
            scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
            sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
            pooler = Pooler(
                output_size=(resolution, resolution),
                scales=scales,
                sampling_ratio=sampling_ratio,
            )
            self.pooler = pooler
示例#8
0
    def __init__(self, config):
        super(ResNet50Conv5ROIFeatureExtractor, self).__init__()

        resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )

        stage = resnet.StageSpec(index=4, block_count=3, return_features=False)
        head = resnet.ResNetHead(
            block_module=config.MODEL.RESNETS.TRANS_FUNC,
            stages=(stage, ),
            num_groups=config.MODEL.RESNETS.NUM_GROUPS,
            width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP,
            stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1,
            stride_init=None,
            res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS,
            dilation=config.MODEL.RESNETS.RES5_DILATION)

        self.pooler = pooler
        self.head = head
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(AlignHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        resolution = cfg.MODEL.ALIGN.POOLER_RESOLUTION
        canonical_scale = cfg.MODEL.ALIGN.POOLER_CANONICAL_SCALE

        self.scales = cfg.MODEL.ALIGN.POOLER_SCALES
        self.pooler = Pooler(
            output_size=resolution,
            scales=self.scales,
            sampling_ratio=1,
            canonical_scale=canonical_scale,
            mode='bezier')

        for head in ['rec']:
            tower = []
            conv_block = conv_with_kaiming_uniform(
                True, True, False, False)
            for i in range(cfg.MODEL.ALIGN.NUM_CONVS):
                tower.append(
                    conv_block(in_channels, in_channels, 3, 1))
            self.add_module('{}_tower'.format(head),
                            nn.Sequential(*tower))
        
        self.predict_type = cfg.MODEL.ALIGN.PREDICTOR
        if self.predict_type == "ctc":
            self.predictor = CTCPredictor(cfg, in_channels)
        elif self.predict_type == "attention":
            self.predictor = ATTPredictor(cfg, in_channels)
        else:
            raise("Unknown recognition predictor.")
    def __init__(self, cfg, in_channels):

        super(BB8KeypointRCNNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        input_features = in_channels

        layers = cfg.MODEL.ROI_BB8KEYPOINT_HEAD.CONV_LAYERS
        next_feature = input_features
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "conv_fcn{}".format(layer_idx)
            module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
            nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")

            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)

        self.out_channels = layer_features
示例#11
0
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
            # Caffe2 implementation uses MSRAFill, which in fact
            # corresponds to kaiming_normal_ in PyTorch
            nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
            nn.init.constant_(module.bias, 0)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        pooler = Pooler(cfg.MODEL.ROI_MASK_HEAD)

        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features
示例#13
0
    def __init__(self, config, in_channels, RCNN_top=None):
        super(ResNet50Conv5ROIFeatureExtractorFlatten, self).__init__()

        resolution = config.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = config.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = config.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )

        stage = resnet.StageSpec(index=4, block_count=3, return_features=False)
        head = resnet.ResNetHead(
            block_module=config.MODEL.RESNETS.TRANS_FUNC,
            stages=(stage, ),
            num_groups=config.MODEL.RESNETS.NUM_GROUPS,
            width_per_group=config.MODEL.RESNETS.WIDTH_PER_GROUP,
            stride_in_1x1=config.MODEL.RESNETS.STRIDE_IN_1X1,
            stride_init=None,
            res2_out_channels=config.MODEL.RESNETS.RES2_OUT_CHANNELS,
            dilation=config.MODEL.RESNETS.RES5_DILATION)

        self.pooler = pooler
        self.head = head
        hidden_channels = head.out_channels
        use_gn = config.MODEL.ROI_BOX_HEAD.USE_GN
        self.out_channels = config.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            make_fc(hidden_channels, self.out_channels, use_gn), nn.ReLU())
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature, layer_features, 
                dilation=dilation, stride=1, use_gn=use_gn
            )
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
    def __init__(self, cfg, in_channels):
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO

        if cfg.MODEL.ROI_HEADS.USE_CASCADE_POOLING:
            pooler = CascadePooler(
                output_size=(resolution, resolution),
                scales=scales,
                sampling_ratio=sampling_ratio,
            )
        else:
            pooler = Pooler(
                output_size=(resolution, resolution),
                scales=scales,
                sampling_ratio=sampling_ratio,
            )

        input_size = in_channels * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = make_fc(input_size, representation_size, use_gn)
        self.fc7 = make_fc(representation_size, representation_size, use_gn)
        self.out_channels = representation_size
示例#16
0
    def __init__(self, cfg):
        super().__init__()
        self.cfg = cfg.clone()
        resolution = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2
        representation_size = cfg.MODEL.ROI_CAR_CLS_ROT_HEAD.MLP_HEAD_DIM
        self.pooler = pooler
        self.fc6 = nn.Linear(input_size, representation_size)
        self.fc6_bn = nn.BatchNorm1d(representation_size)
        self.fc7 = nn.Linear(representation_size, representation_size)
        self.fc7_bn = nn.BatchNorm1d(representation_size)

        nn.init.constant_(self.fc6_bn.weight, 1.0)
        nn.init.constant_(self.fc7_bn.weight, 1.0)

        for l in [self.fc6, self.fc7]:
            # Caffe2 implementation uses XavierFill, which in fact
            # corresponds to kaiming_uniform_ in PyTorch
            XavierFill(l.weight)
            nn.init.constant_(l.bias, 0)
示例#17
0
    def __init__(self, cfg):
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        # @depreated
        # self.fc6 = nn.Linear(input_size, representation_size)
        # self.fc7 = nn.Linear(representation_size, representation_size)

        # for l in [self.fc6, self.fc7]:
        #     # Caffe2 implementation uses XavierFill, which in fact
        #     # corresponds to kaiming_uniform_ in PyTorch
        #     nn.init.kaiming_uniform_(l.weight, a=1)
        #     nn.init.constant_(l.bias, 0)
        self.fc6 = make_fc(input_size, representation_size, use_gn)
        self.fc7 = make_fc(representation_size, representation_size, use_gn)
    def __init__(self, cfg, in_channels):
        super(FPNDetNetFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION

        pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD)

        input_size = in_channels * resolution**2
        self.use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = nn.Conv2d(
            in_channels,
            in_channels * 4,
            kernel_size=resolution,
            stride=resolution,
            padding=0,
            bias=False if self.use_gn else True,
        )
        if self.use_gn:
            self.gn6 = group_norm(in_channels * 4)
        self.fc7 = nn.Conv2d(
            in_channels * 4,
            in_channels * 4,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False if self.use_gn else True,
        )
        if self.use_gn:
            self.gn7 = group_norm(in_channels * 4)
        self.out_channels = in_channels * 4
    def __init__(self, cfg):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW

        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
        use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV
        block = cfg.MODEL.DECONV.BLOCK

        if use_deconv:
            use_gn = False
            use_gw = False

        next_feature = input_size
        self.blocks = []

        if cfg.MODEL.DECONV.LAYERWISE_NORM:
            norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE
        else:
            norm_type = 'none'
            if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm':
                self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS)

        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn,
                                  use_gw=use_gw,
                                  use_deconv=use_deconv,
                                  block=block,
                                  sampling_stride=cfg.MODEL.DECONV.STRIDE,
                                  sync=cfg.MODEL.DECONV.SYNC,
                                  norm_type=norm_type)
            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
示例#20
0
    def __init__(self, cfg, in_channels):
        super(FPNXconv1fcFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        cfg=cfg)
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        conv_head_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_HEAD_DIM
        num_stacked_convs = cfg.MODEL.ROI_BOX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MODEL.ROI_BOX_HEAD.DILATION

        use_ws = cfg.MODEL.USE_WS

        xconvs = []
        for ix in range(num_stacked_convs):
            if use_ws:
                xconvs.append(
                    Conv2dWS(in_channels,
                             conv_head_dim,
                             kernel_size=3,
                             stride=1,
                             padding=dilation,
                             dilation=dilation,
                             bias=False if use_gn else True))
            else:
                xconvs.append(
                    nn.Conv2d(in_channels,
                              conv_head_dim,
                              kernel_size=3,
                              stride=1,
                              padding=dilation,
                              dilation=dilation,
                              bias=False if use_gn else True))
            in_channels = conv_head_dim
            if use_gn:
                xconvs.append(group_norm(in_channels))
            xconvs.append(nn.ReLU(inplace=True))

        self.add_module("xconvs", nn.Sequential(*xconvs))
        for modules in [
                self.xconvs,
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    if not use_gn:
                        torch.nn.init.constant_(l.bias, 0)

        input_size = conv_head_dim * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        self.fc6 = make_fc(input_size, representation_size, use_gn=False)
        self.out_channels = representation_size
    def __init__(self, cfg, in_channels, architecture=None):
        super(AutoPanoptic_MaskRCNNFPNFeatureExtractor, self).__init__()
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        next_feature = input_size
        self.blocks = []

        self.architecture = None

        if 'AutoPanoptic' in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR and \
            'search' not in cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR:
            assert architecture is not None, 'architecture not specified in AutoPanoptic mask head'
            assert len(architecture) == len(
                cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS)
            self.architecture = architecture

        for layer_idx, layer_features in enumerate(layers, 1):
            if architecture is None:
                _ops = nn.ModuleList()
                for i in range(len(head_ss_keys)):
                    _ops.append(
                        make_layer(head_ss_keys[i],
                                   next_feature,
                                   layer_features,
                                   relu=False,
                                   gn=True))
                next_feature = layer_features
                self.blocks.append(_ops)
            else:
                _ops = make_layer(head_ss_keys[architecture[layer_idx - 1]],
                                  next_feature,
                                  layer_features,
                                  relu=False,
                                  gn=True)
                next_feature = layer_features
                self.blocks.append(_ops)
            self.add_module(
                'AutoPanoptic_mask_fcn_{}'.format(layer_idx), _ops
            )  # inconsistent module name between search and single model can incur problem in model reloading

        self.out_channels = layer_features
    def __init__(self, cfg):
        super(Box3dPCFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX3D_HEAD.POOLER_RESOLUTION
        scales = (1., )
        sampling_ratio = cfg.MODEL.ROI_BOX3D_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler
示例#23
0
    def __init__(self, cfg):
        super(make_roi_box_feature_extractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler
示例#24
0
 def init_pooler(self):
     """ build roi pooler """
     scales = (0.25, 0.125, 0.0625, 0.03125
               )  # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
     sampling_ratio = 2  # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
     resolution = 14  # benchmark_cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
     self.collector = Pooler(
         output_size=(resolution, resolution),
         scales=scales,
         sampling_ratio=sampling_ratio,
     )
     self.num_levels = len(self.collector.poolers)
     self.output_size = (resolution, resolution)
示例#25
0
 def __init__(self, cfg, in_channels, RCNN_top=None):
     super(BottomUpFeatureExtractor, self).__init__()
     resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
     scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
     sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
     pooler = Pooler(
         output_size=(resolution, resolution),
         scales=scales,
         sampling_ratio=sampling_ratio,
     )
     representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
     self.pooler = pooler
     self.out_channels = representation_size
示例#26
0
    def __init__(self, cfg, in_channels):
        super(FPN2ROIFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION # 7
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125)
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO # 2
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler
        self.out_channels = in_channels
示例#27
0
    def __init__(self, cfg):
        self.cfg = cfg.clone()
        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )

        self.pooler = pooler
        self.avgpool = nn.AvgPool2d(kernel_size=resolution, stride=resolution)
    def __init__(self, cfg):
        super(MaskRCNNFPNFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES,
            sampling_ratio=cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO,
        )
        self.pooler = pooler
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        next_feature = cfg.MODEL.BACKBONE.OUT_CHANNELS
        self.blocks = []
        self.use_attn = False if cfg.MODEL.ROI_MASK_HEAD.ATTN == "" else True

        # Determine whether upsampling is necessary from the resolution
        # if cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / (2.0 * resolution) == 2.0:
        #     use_upsample = True
        # else:
        #     use_upsample = False

        use_upsample = \
            True if (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / resolution) == 4.0 \
                else False
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            # if layer_idx % 2 == 1 and use_upsample:
            #     module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0)
            # else:
            #     module = Conv2d(next_feature, layer_features, 3, 1, 1)
            if layer_idx == 3 and use_upsample:
                module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0)
            else:
                module = Conv2d(next_feature, layer_features, 3, 1, 1)

            # Caffe2 implementation uses MSRAFill, which in fact
            # corresponds to kaiming_normal_ in PyTorch
            nn.init.kaiming_normal_(module.weight,
                                    mode="fan_out",
                                    nonlinearity="relu")
            nn.init.constant_(module.bias, 0)

            if self.use_attn and layer_idx in [2]:
                attn_name = "mask_attn{}".format(layer_idx)
                size = (layer_features, resolution, resolution)
                self.add_module(attn_name, RoIAttnModule(cfg, size))

            self.add_module(layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
    def __init__(self, cfg, in_channels):
        super(FPN2MLPFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION

        pooler = Pooler(cfg.MODEL.ROI_BOX_HEAD)

        input_size = in_channels * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler
        self.fc6 = make_fc(input_size, representation_size, use_gn)
        self.fc7 = make_fc(representation_size, representation_size, use_gn)
        self.out_channels = representation_size
示例#30
0
    def __init__(self, cfg, in_channels):
        super(SupportFPN2ROIFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler = Pooler(
            output_size=(resolution, resolution),
            scales=scales,
            sampling_ratio=sampling_ratio,
        )
        input_size = in_channels * resolution**2
        representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM  # default to 1024
        use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN
        self.pooler = pooler