Exemplo n.º 1
0
    def __init__(self, cfg, in_channels):
        super(MaskIoUFeatureExtractor, self).__init__()

        layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS
        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION // 2
        input_features = in_channels + 1
        fc_input_size = layers[0] * resolution * resolution

        self.blocks = []
        stride = 1
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "maskiou_fcn{}".format(layer_idx)
            if layer_idx == len(layers):
                stride = 2
            module = make_conv3x3(input_features,
                                  layer_features,
                                  stride=stride)
            setattr(self, layer_name, module)
            input_features = layer_features
            self.blocks.append(layer_name)

        self.maskiou_fc1 = nn.Linear(fc_input_size, 1024)
        self.maskiou_fc2 = nn.Linear(1024, 1024)

        for l in [self.maskiou_fc1, self.maskiou_fc2]:
            nn.init.kaiming_uniform_(l.weight, a=1)
            nn.init.constant_(l.bias, 0)
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(MaskRCNNFPNSpatialAttentionFeatureExtractor, self).__init__()

        resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        lvl_map_func = cfg.MODEL.ROI_MASK_HEAD.LEVEL_MAP_FUNCTION
        self.maskiou = cfg.MODEL.MASKIOU_ON
        pooler = Pooler(output_size=(resolution, resolution),
                        scales=scales,
                        sampling_ratio=sampling_ratio,
                        lvl_map_func=lvl_map_func)
        input_size = in_channels
        self.pooler = pooler

        use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
        layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
        dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION

        #spatial attention module
        self.spatialAtt = SpatialAttention()
        self.num_pooler = len(scales)

        next_feature = input_size
        self.blocks = []
        for layer_idx, layer_features in enumerate(layers, 1):
            layer_name = "mask_fcn{}".format(layer_idx)
            module = make_conv3x3(next_feature,
                                  layer_features,
                                  dilation=dilation,
                                  stride=1,
                                  use_gn=use_gn)
            setattr(self, layer_name, module)
            next_feature = layer_features
            self.blocks.append(layer_name)
        self.out_channels = layer_features