def __init__(self, feature_dim=256, output_dims=[256,256], downsample_rate=16, mode=None):
        super().__init__()
        self.pool_size = 7
        self.feature_dim = feature_dim
        self.output_dims = output_dims
        self.downsample_rate = downsample_rate
        self.mode = mode


        self.object_roi_pool = RoIAlign(self.pool_size, 1.0 / self.downsample_rate, -1)
        self.object_feature_fc = nn.Sequential(nn.ReLU(True), nn.Linear(output_dims[0] * self.pool_size ** 2, output_dims[0]))

#         self.context_roi_pool = RoIAlign(self.pool_size, 1.0 / self.downsample_rate, -1)
#         self.context_feature_extract = nn.Conv2d(feature_dim, feature_dim, 1)
#         self.object_feature_fuse = nn.Conv2d(feature_dim * 2, output_dims[0], 1)

        if mode=="spatial":
            self.relation_roi_pool = RoIAlign(self.pool_size, 1.0 / self.downsample_rate, -1)
            self.relation_feature_extract = nn.Conv2d(feature_dim, feature_dim // 2 * 3, 1)
            self.relation_feature_fuse = nn.Conv2d(feature_dim // 2 * 3 + output_dims[0] * 2, output_dims[1], 1)
            self.relation_feature_fc = nn.Sequential(nn.ReLU(True), nn.Linear(output_dims[1] * self.pool_size ** 2, output_dims[1]))

        # this will change for models with multiple objects in future
        # in that case, it will pick up the pretrained weights
        if mode=="node":
            self.reset_parameters()

        if self.mode=="spatial":
            print("freezing feature extractor scene graph")
            self.set_parameter_requires_grad()
Exemplo n.º 2
0
    def __init__(self, num_classes=0, input_dim=3, ch=64):
        super(ResnetDiscriminator128_app, self).__init__()
        self.num_classes = num_classes

        self.block1 = OptimizedBlock(3, ch, downsample=True)
        self.block2 = ResBlock(ch, ch * 2, downsample=True)
        self.block3 = ResBlock(ch * 2, ch * 4, downsample=True)
        self.block4 = ResBlock(ch * 4, ch * 8, downsample=True)
        self.block5 = ResBlock(ch * 8, ch * 16, downsample=True)
        self.block6 = ResBlock(ch * 16, ch * 16, downsample=False)
        self.l7 = nn.utils.spectral_norm(nn.Linear(ch * 16, 1))
        self.activation = nn.ReLU()

        self.roi_align_s = RoIAlign((8, 8), 1.0 / 4.0, int(0))
        self.roi_align_l = RoIAlign((8, 8), 1.0 / 8.0, int(0))

        self.block_obj3 = ResBlock(ch * 2, ch * 4, downsample=False)
        self.block_obj4 = ResBlock(ch * 4, ch * 8, downsample=False)
        self.block_obj5 = ResBlock(ch * 8, ch * 16, downsample=True)
        self.l_obj = nn.utils.spectral_norm(nn.Linear(ch * 16, 1))
        self.l_y = nn.utils.spectral_norm(nn.Embedding(num_classes, ch * 16))
        # apperance discriminator
        self.app_conv = ResBlock(ch * 8, ch * 8, downsample=False)
        self.l_y_app = nn.utils.spectral_norm(nn.Embedding(
            num_classes, ch * 8))
        self.app = nn.utils.spectral_norm(nn.Linear(ch * 16, 1))
Exemplo n.º 3
0
 def __init__(self):
     self.valid_anchor_boxes = self.get_valid_anchor_boxes().astype(np.float32)
     # extracting RoI of size 56x56 on 224x224
     self.scale_base_roi_align = RoIAlign((56, 56), spatial_scale=1.0, sampling_ratio=2)  # base map is of size 56x56
     self.scale_1_roi_align = RoIAlign((28, 28), spatial_scale=1.0, sampling_ratio=2)     # wil look as 28x28 on the 112x112 image
     self.scale_2_roi_align = RoIAlign((14, 14), spatial_scale=1.0, sampling_ratio=2)     # will look at 14x14 on the 56x56 image
     self.scale_3_roi_align = RoIAlign((7, 7), spatial_scale=1.0, sampling_ratio=2)      # will look at 7x7 on the 28x28 image
Exemplo n.º 4
0
    def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF',
                              resolution=7, spatial_scale=1. / 16., sampling_ratio=0):
        """Add the specified RoI pooling method. The sampling_ratio argument
        is supported for some, but not all, RoI transform methods.

        RoIFeatureTransform abstracts away:
          - Use of FPN or not
          - Specifics of the transform method
        """
        assert method in {'RoIPoolF', 'RoIAlign'}, \
            'Unknown pooling method: {}'.format(method)

        if isinstance(blobs_in, list):
            # FPN case: add RoIFeatureTransform to each FPN level
            device_id = blobs_in[0].get_device()
            k_max = cfg.FPN.ROI_MAX_LEVEL  # coarsest level of pyramid
            k_min = cfg.FPN.ROI_MIN_LEVEL  # finest level of pyramid
            assert len(blobs_in) == k_max - k_min + 1
            bl_out_list = []
            for lvl in range(k_min, k_max + 1):
                bl_in = blobs_in[k_max - lvl]  # blobs_in is in reversed order
                sc = spatial_scale[k_max - lvl]  # in reversed order
                bl_rois = blob_rois + '_fpn' + str(lvl)
                if len(rpn_ret[bl_rois]):
                    rois = Variable(torch.from_numpy(rpn_ret[bl_rois])).cuda(device_id)
                    if method == 'RoIPoolF':
                        # Warning!: Not check if implementation matches Detectron
                        xform_out = RoIPool((resolution, resolution), sc)(bl_in, rois)
                    elif method == 'RoIAlign':
                        xform_out = RoIAlign(
                            (resolution, resolution), sc, sampling_ratio)(bl_in, rois)
                    bl_out_list.append(xform_out)

            # The pooled features from all levels are concatenated along the
            # batch dimension into a single 4D tensor.
            xform_shuffled = torch.cat(bl_out_list, dim=0)

            # Unshuffle to match rois from dataloader
            device_id = xform_shuffled.get_device()
            restore_bl = rpn_ret[blob_rois + '_idx_restore_int32']
            restore_bl = Variable(
                torch.from_numpy(restore_bl.astype('int64', copy=False))).cuda(device_id)
            xform_out = xform_shuffled[restore_bl]
        else:
            # Single feature level
            # rois: holds R regions of interest, each is a 5-tuple
            # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a
            # rectangle (x1, y1, x2, y2)
            device_id = blobs_in.get_device()
            rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id)
            if method == 'RoIPoolF':
                xform_out = RoIPool((resolution, resolution), spatial_scale)(blobs_in, rois)
            elif method == 'RoIAlign':
                xform_out = RoIAlign(
                    (resolution, resolution), spatial_scale, sampling_ratio)(blobs_in, rois)

        return xform_out
Exemplo n.º 5
0
 def get_roi_features(self, features, rois):
     """
     Gets ROI features
     :param features: [batch_size, dim, IM_SIZE/4, IM_SIZE/4] (features at level p2)
     :param rois: [num_rois, 5] array of [img_num, x0, y0, x1, y1].
     :return: [num_rois, #dim] array
     """
     feature_pool = RoIAlign((self.pooling_size, self.pooling_size), spatial_scale=1 / 16, sampling_ratio=-1)(
         features, rois)
     return self.roi_fmap_obj(feature_pool.view(rois.size(0), -1))
Exemplo n.º 6
0
 def obj_feature_map(self, features, rois):
     """
     Gets the ROI features
     :param features: [batch_size, dim, IM_SIZE/4, IM_SIZE/4] (features at level p2)
     :param rois: [num_rois, 5] array of [img_num, x0, y0, x1, y1].
     :return: [num_rois, #dim] array
     """
     feature_pool = RoIAlign((self.pooling_size, self.pooling_size), spatial_scale=1 / 16, sampling_ratio=-1)(
         self.compress(features) if self.use_resnet else features, rois)
     return self.roi_fmap(feature_pool.view(rois.size(0), -1))
Exemplo n.º 7
0
    def forward(self, out):
        # outs
        out1, out2, out3 = out['out1'], out['out2'], out['out3']

        # spatial
        batch_size, c, h, w = list(out1.size())

        # forward prop
        layer1_out = self.leaky_relu(self.conv_layer1(out1))
        layer2_out = self.leaky_relu(self.conv_layer2(layer1_out))
        layer3_out = self.leaky_relu(self.conv_layer3(layer2_out))
        roi_align_out = RoIAlign(
            (h // 2, w // 2), spatial_scale=1.0,
            sampling_ratio=2)(layer2_out,
                              get_rois(batch_size,
                                       [0, 0, h - 1, w - 1]).to(device))
        aux_out = self.leaky_relu(self.conv_aux_out(self.dropout(layer3_out)))
        block_out1 = torch.add(roi_align_out, aux_out)

        intermediate_curr_block = torch.cat([layer1_out, layer2_out], dim=1)
        block_out2 = self.leaky_relu(self.conv_block2(intermediate_curr_block))
        block_out2 = self.dropout(block_out2)

        prev_block = torch.cat([out2, out3], dim=1)
        prev_block_concat = self.leaky_relu(self.conv_prev_block(prev_block))
        out1_moveaxis = out1.permute(0, 2, 3, 1).view(batch_size, -1, c)

        # dense layers - time distributed
        attn_weights = []
        for item in range(out1_moveaxis.size(1)):
            attn_weights.append(self.linear_units[item](
                out1_moveaxis[:, item, :]))

        formatted = torch.stack(attn_weights).permute(
            1, 2, 0)  # h*w elements of shape (batch_size , 1)
        softmax_formatted = nn.Softmax(dim=-1)(formatted)

        softmax_attn_matrix = softmax_formatted.view(batch_size, 1, h, w)

        # get the attention tensor for Out1 from the previous block.
        attn_out = torch.mul(prev_block_concat, softmax_attn_matrix)
        block_out3 = RoIAlign(
            (h // 2, w // 2), spatial_scale=1.0,
            sampling_ratio=2)(attn_out,
                              get_rois(batch_size,
                                       [0, 0, h - 1, w - 1]).to(device))

        return {
            'out1': torch.add(torch.add(block_out1, block_out2), block_out3),
            'out2': block_out2,
            'out3': block_out3
        }
Exemplo n.º 8
0
    def get_roi_features_depth(self, features, rois):
        """
        Gets ROI features (depth)
        :param features: [batch_size, dim, IM_SIZE/4, IM_SIZE/4] (features at level p2)
        :param rois: [num_rois, 5] array of [img_num, x0, y0, x1, y1].
        :return: [num_rois, #dim] array
        """
        feature_pool = RoIAlign((self.pooling_size, self.pooling_size), spatial_scale=1 / 16, sampling_ratio=-1)(
            features, rois)

        # -- Flatten the layer if the model is not RESNET/SQZNET
        if self.depth_model not in ('resnet18', 'resnet50', 'sqznet'):
            feature_pool = feature_pool.view(rois.size(0), -1)

        return self.depth_rel_head(feature_pool)
Exemplo n.º 9
0
    def __init__(self, config):
        super(Faster_Rcnn, self).__init__()
        self.config = config
        self.Mean = torch.tensor(config.Mean, dtype=torch.float32)
        self.num_anchor = len(config.anchor_scales) * len(config.anchor_ratios)
        self.anchors = get_anchors(np.ceil(self.config.img_max / 16 + 1),
                                   self.config.anchor_scales,
                                   self.config.anchor_ratios)

        self.PC = ProposalCreator(nms_thresh=config.roi_nms_thresh,
                                  n_train_pre_nms=config.roi_train_pre_nms,
                                  n_train_post_nms=config.roi_train_post_nms,
                                  n_test_pre_nms=config.roi_test_pre_nms,
                                  n_test_post_nms=config.roi_test_post_nms,
                                  min_size=config.roi_min_size)

        self.features = vgg16().features[:-1]
        self.rpn = RPN_net(512, self.num_anchor)
        self.roialign = RoIAlign((7, 7), 1 / 16., 2)
        self.fast = Fast_net(config.num_cls, 512 * 7 * 7, 2048)
        self.fast_2 = Fast_net(config.num_cls, 512 * 7 * 7, 2048)
        self.fast_3 = Fast_net(config.num_cls, 512 * 7 * 7, 2048)

        self.loc_std1 = [1. / 10, 1. / 10, 1. / 5, 1. / 5]
        self.loc_std2 = [1. / 20, 1. / 20, 1. / 10, 1. / 10]
        self.loc_std3 = [1. / 30, 1. / 30, 1. / 15, 1. / 15]
        self.weights = [1.0, 1.0, 1.0]
Exemplo n.º 10
0
def union_boxes(fmap, rois, union_inds, pooling_size=14, stride=16):
    """
    :param fmap: (batch_size, d, IM_SIZE/stride, IM_SIZE/stride)
    :param rois: (num_rois, 5) with [im_ind, x1, y1, x2, y2]
    :param union_inds: (num_urois, 2) with [roi_ind1, roi_ind2]
    :param pooling_size: we'll resize to this
    :param stride:
    :return:
    """
    assert union_inds.size(1) == 2
    im_inds = rois[:, 0][union_inds[:, 0]]
    assert (im_inds.data == rois.data[:, 0][union_inds[:, 1]]
            ).sum() == union_inds.size(0)
    union_rois = torch.cat((
        im_inds[:, None],
        torch.min(rois[:, 1:3][union_inds[:, 0]], rois[:, 1:3][union_inds[:,
                                                                          1]]),
        torch.max(rois[:, 3:5][union_inds[:, 0]], rois[:, 3:5][union_inds[:,
                                                                          1]]),
    ), 1)

    # (num_rois, d, pooling_size, pooling_size)
    union_pools = RoIAlign((pooling_size, pooling_size),
                           spatial_scale=1 / stride,
                           sampling_ratio=-1)(fmap, union_rois)
    return union_pools
Exemplo n.º 11
0
    def __init__(self, classes0, classes1, class_agnostic, use_share_regress=False, use_progress=False):
        super(_fasterRCNN, self).__init__()
        self.classes0 = classes0
        self.classes1 = classes1
        self.n_classes0 = len(classes0)
        self.n_classes1 = len(classes1)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn0 = _RPN(self.dout_base_model)
        self.RCNN_rpn1 = _RPN(self.dout_base_model)
        self.RCNN_proposal_target0 = _ProposalTargetLayer(self.n_classes0)
        self.RCNN_proposal_target1 = _ProposalTargetLayer(self.n_classes1)

        self.use_share_regress = use_share_regress
        self.use_progress = use_progress

        self.RCNN_roi_align = RoIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), spatial_scale=1.0/16.0, sampling_ratio=0)
        if self.use_share_regress:
            self.RCNN_share_regress = nn.Linear(2048, 1)

        if self.use_progress:
            self.fc_progress = nn.Linear(1024, 3)
Exemplo n.º 12
0
    def __init__(self, cfg):
        super(Pool, self).__init__()
        self.cfg = cfg
        # roi_align输出尺寸
        self.resolution = self.cfg.ROI_BOX.RESOLUTION
        # backbone输出的的降采样系数
        ratios = self.cfg.ROI_BOX.RATIOS
        # 插值时的采样点数量
        sample_ratio = self.cfg.ROI_BOX.SAMPLE_RATIO

        # 对不同降采样系数的输入, 使用不同的RoiAlign
        for index, ratio in enumerate(ratios):
            name = 'roi_align_{}'.format(index)
            roi_align = RoIAlign(
                (self.resolution, self.resolution),
                ratio,
                sample_ratio
            )
            self.add_module(name, roi_align)

        self.num_level = len(ratios)
        self.level_min = -torch.log2(torch.tensor(ratios[0])).item()
        self.level_max = -torch.log2(torch.tensor(ratios[-1])).item()
        self.base_size = 224
        self.base_level = 4
        self.eps = 1e-6
Exemplo n.º 13
0
    def __init__(self, base_filters=16):

        super(DiscriminatorROI, self).__init__()

        self.conv_layers = Sequential(
            ZeroPad2d((1, 2, 1, 2)),
            Conv2d(3, base_filters, kernel_size=4, stride=2, bias=False),
            LeakyReLU(0.2, inplace=True), ZeroPad2d((1, 2, 1, 2)),
            Conv2d(base_filters,
                   2 * base_filters,
                   kernel_size=4,
                   stride=2,
                   bias=False), BatchNorm2d(2 * base_filters, momentum=0.8),
            LeakyReLU(0.2, inplace=True), ZeroPad2d((1, 2, 1, 2)),
            Conv2d(2 * base_filters,
                   4 * base_filters,
                   kernel_size=4,
                   stride=2,
                   bias=False), BatchNorm2d(4 * base_filters, momentum=0.8),
            LeakyReLU(0.2, inplace=True))

        self.roi_pool = RoIAlign(output_size=(3, 3),
                                 spatial_scale=0.125,
                                 sampling_ratio=-1)

        self.classifier = Sequential(
            Conv2d(4 * base_filters, 1, kernel_size=3, padding=0, bias=False))
Exemplo n.º 14
0
    def __init__(self, num_classes, backbone=None):
        super(M_ROI_CLASSIFIER, self).__init__()

        self.num_classes = num_classes
        M_backbone = backbone
        if M_backbone is None:
            M_backbone = resnet50(
                pretrained=True,
                replace_stride_with_dilation=[False, True, True])

        M_conv_ = torch.nn.Conv2d(2048,
                                  512,
                                  kernel_size=(1, 1),
                                  stride=(1, 1),
                                  bias=False)
        M_batchn_ = torch.nn.BatchNorm2d(512,
                                         eps=1e-05,
                                         momentum=0.1,
                                         affine=True,
                                         track_running_stats=True)
        M_relu_ = torch.nn.ReLU(inplace=True)
        M_custom_layer = torch.nn.Sequential(M_conv_, M_batchn_, M_relu_)
        self.M_custom_backbone = torch.nn.Sequential(
            M_backbone.conv1, M_backbone.bn1, M_backbone.relu,
            M_backbone.maxpool, M_backbone.layer1, M_backbone.layer2,
            M_backbone.layer3, M_backbone.layer4, M_custom_layer)

        self.M_roi_align = RoIAlign(output_size=(5, 5),
                                    spatial_scale=1,
                                    sampling_ratio=-1)
        self.M_flatten = torch.nn.Flatten()
        self.M_classifier = torch.nn.Linear(in_features=12800,
                                            out_features=self.num_classes,
                                            bias=True)
    def __init__(self, classes0, classes1, class_agnostic, use_share_regress=False, use_progress=False):
        super(_FPN, self).__init__()
        self.classes0 = classes0
        self.classes1 = classes1
        self.n_classes0 = len(classes0)
        self.n_classes1 = len(classes1)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        self.maxpool2d = nn.MaxPool2d(1, stride=2)
        # define rpn
        self.RCNN_rpn0 = _RPN_FPN(self.dout_base_model)
        self.RCNN_rpn1 = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target0 = _ProposalTargetLayer(self.n_classes0)
        self.RCNN_proposal_target1 = _ProposalTargetLayer(self.n_classes1)

        self.use_share_regress = use_share_regress
        self.use_progress = use_progress

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        # self.RCNN_roi_crop = _RoICrop()
        # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)
        # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)
        self.RCNN_roi_align = RoIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), spatial_scale=1.0/16.0, sampling_ratio=0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE

        if self.use_share_regress:
            self.RCNN_share_regress = nn.Linear(1024, 1)

        if self.use_progress:
            self.fc_progress = nn.Linear(256*5, 3)
Exemplo n.º 16
0
    def __init__(self, base_model, in_channel=2048, out_channel=512,
                 nclass=174, dropout=0.3, nrois=10,
                 freeze_bn=True, freeze_bn_affine=True,
                 roi_size=7
                 ):
        super(STRG,self).__init__()
        self.base_model = base_model
        self.in_channel = in_channel
        self.out_channel = out_channel
        self.nclass = nclass
        self.nrois = nrois

        self.freeze_bn = freeze_bn
        self.freeze_bn_affine = freeze_bn_affine

        self.base_model.fc = nn.Identity()
        self.base_model.avgpool = nn.Identity()
        if False:
            self.base_model.maxpool.stride = (1,2,2)
            self.base_model.layer3[0].conv2.stride=(1,2,2)
            self.base_model.layer3[0].downsample[0].stride=(1,2,2)
            self.base_model.layer4[0].conv2.stride=(1,1,1)
            self.base_model.layer4[0].downsample[0].stride=(1,1,1)

        self.reducer = nn.Conv3d(self.in_channel, self.out_channel,1)
        self.classifier = nn.Linear(2*self.out_channel, nclass)
        self.avg_pool = nn.Sequential(
            nn.AdaptiveAvgPool3d(1),
            nn.Dropout(p=dropout)
        )
        self.max_pool = nn.AdaptiveAvgPool2d(1)

        self.strg_gcn = RGCN()
        self.roi_align = RoIAlign((roi_size,roi_size), 1/8, -1, aligned=True)
Exemplo n.º 17
0
    def build_roi_layers(self, layer_cfg, featmap_strides):
        """Build RoI operator to extract feature from each level feature map.

        Args:
            layer_cfg (dict): Dictionary to construct and config RoI layer
                operation. Options are modules under ``mmcv/ops`` such as
                ``RoIAlign``.
            featmap_strides (int): The stride of input feature map w.r.t to the
                original image size, which would be used to scale RoI
                coordinate (original image coordinate system) to feature
                coordinate system.

        Returns:
            nn.ModuleList: The RoI extractor modules for each level feature
                map.
        """

        cfg = layer_cfg.copy()

        roi_layers = nn.ModuleList([
            RoIAlign((cfg['output_size'], cfg['output_size']),
                     1.0 / s,
                     cfg['sampling_ratio'],
                     aligned=True) for s in featmap_strides
        ])

        return roi_layers
Exemplo n.º 18
0
    def forward(self, input):
        # spatial
        batch_size, c, h, w = list(input.size())

        layer1_out = self.leaky_relu(self.conv1(input))
        layer2_out = self.leaky_relu(self.conv2(layer1_out))
        block_out1 = RoIAlign(
            (h // 2, w // 2), spatial_scale=1.0,
            sampling_ratio=2)(layer2_out,
                              get_rois(batch_size,
                                       [0, 0, h - 1, w - 1]).to(device))

        # aux connection from layer 2
        aux_from_layer2 = self.leaky_relu(self.conv_aux_layer2_1(layer2_out))
        aux_layer2 = self.leaky_relu(self.conv_aux_layer2_2(aux_from_layer2))
        block_out2 = self.dropout(aux_layer2)

        aux_from_layer1 = torch.add(layer1_out, self.dropout(aux_from_layer2))
        block_out3 = self.leaky_relu(self.conv_block3(aux_from_layer1))

        return {
            'out1': torch.add(torch.add(block_out1, block_out2), block_out3),
            'out2': block_out2,
            'out3': block_out3
        }
Exemplo n.º 19
0
    def __init__(self,
                 classes,
                 class_agnostic,
                 pooling_size=7,
                 pooling_mode="align",
                 thresh=0.05,
                 relation_module=False):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        self.thresh = thresh
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.bbox_normalize_targets_precomputed = True,
        self.bbox_normalize_means = [0.0, 0.0, 0.0, 0.0]
        self.bbox_normalize_stds = [0.1, 0.1, 0.2, 0.2]
        self.RCNN_proposal_target = _ProposalTargetLayer(
            self.n_classes,
            bbox_normalize_targets_precomputed=self.
            bbox_normalize_targets_precomputed,
            bbox_normalize_means=self.bbox_normalize_means,
            bbox_normalize_stds=self.bbox_normalize_stds)
        self.pooling_mode = pooling_mode

        self.RCNN_roi_pool = RoIPool((pooling_size, pooling_size), 1.0 / 16.0)
        self.RCNN_roi_align = RoIAlign((pooling_size, pooling_size),
                                       1.0 / 16.0, 0)

        self.relation_module = relation_module  # whether add relation network
	def __init__(self, feature_extractor, pool_output_size, character_classifier, spatial_scale, dropout_rate = 0):
		super(StackedFeatureExtractorAndCharacterClassifier, self).__init__()
		self.FeatureExtractor = nn.parallel.DataParallel(feature_extractor)
		self.Dropout = nn.Dropout(p = dropout_rate)
		self.PoolOutputSize = pool_output_size
		self.RegionPool = RoIAlign(pool_output_size, spatial_scale, 1)

		self.CharacterClassifier = character_classifier
    def __init__(self, config):
        super(Mask_Rcnn, self).__init__()
        self.config = config
        self.Mean = torch.tensor(config.Mean, dtype=torch.float32)
        self.num_anchor = len(config.anchor_scales) * len(config.anchor_ratios)
        self.anchors = []
        self.num_anchor = []
        for i in range(5):
            self.num_anchor.append(
                len(config.anchor_scales[i]) * len(config.anchor_ratios[i]))
            stride = 4 * 2**i
            print(stride, self.config.anchor_scales[i],
                  self.config.anchor_ratios[i])
            anchors = get_anchors(np.ceil(self.config.img_max / stride + 1),
                                  self.config.anchor_scales[i],
                                  self.config.anchor_ratios[i],
                                  stride=stride)
            print(anchors.shape)
            self.anchors.append(anchors)
        self.ATC = AnchorTargetCreator(
            n_sample=config.rpn_n_sample,
            pos_iou_thresh=config.rpn_pos_iou_thresh,
            neg_iou_thresh=config.rpn_neg_iou_thresh,
            pos_ratio=config.rpn_pos_ratio)
        self.PC = ProposalCreator(nms_thresh=config.roi_nms_thresh,
                                  n_train_pre_nms=config.roi_train_pre_nms,
                                  n_train_post_nms=config.roi_train_post_nms,
                                  n_test_pre_nms=config.roi_test_pre_nms,
                                  n_test_post_nms=config.roi_test_post_nms,
                                  min_size=config.roi_min_size)
        self.PTC = ProposalTargetCreator(
            n_sample=config.fast_n_sample,
            pos_ratio=config.fast_pos_ratio,
            pos_iou_thresh=config.fast_pos_iou_thresh,
            neg_iou_thresh_hi=config.fast_neg_iou_thresh_hi,
            neg_iou_thresh_lo=config.fast_neg_iou_thresh_lo)

        self.features = resnet101()
        self.fpn = FPN_net([256, 512, 1024, 2048],
                           256,
                           extra_blocks=LastLevelMaxPool())
        self.rpn = RPN_net(256, self.num_anchor[0])
        self.roialign_7 = MultiScaleRoIAlign(
            ['feat0', 'feat1', 'feat2', 'feat3'], 7, 2)
        self.roialign_14 = MultiScaleRoIAlign(
            ['feat0', 'feat1', 'feat2', 'feat3'], 14, 2)
        self.roialign_28 = RoIAlign((28, 28), 1.0, 2)
        self.fast = Fast_net(config.num_cls, 256 * 7 * 7, 1024)
        self.mask_net = Mask_net(256, config.num_cls)
        self.a = 0
        self.b = 0
        self.c = 0
        self.d = 0
        self.fast_num = 0
        self.fast_num_P = 0
    def __init__(self, config):
        super(Faster_Rcnn, self).__init__()
        self.config = config
        self.Mean = torch.tensor(config.Mean, dtype=torch.float32)
        self.num_anchor = len(config.anchor_scales) * len(config.anchor_ratios)
        self.anchors = get_anchors(np.ceil(self.config.img_max / 16 + 1),
                                   self.config.anchor_scales,
                                   self.config.anchor_ratios)
        self.ATC = AnchorTargetCreator(
            n_sample=config.rpn_n_sample,
            pos_iou_thresh=config.rpn_pos_iou_thresh,
            neg_iou_thresh=config.rpn_neg_iou_thresh,
            pos_ratio=config.rpn_pos_ratio)
        self.PC = ProposalCreator(nms_thresh=config.roi_nms_thresh,
                                  n_train_pre_nms=config.roi_train_pre_nms,
                                  n_train_post_nms=config.roi_train_post_nms,
                                  n_test_pre_nms=config.roi_test_pre_nms,
                                  n_test_post_nms=config.roi_test_post_nms,
                                  min_size=config.roi_min_size)
        self.PTC_1 = ProposalTargetCreator(
            n_sample=config.fast_n_sample,
            pos_ratio=config.fast_pos_ratio,
            pos_iou_thresh=config.fast_pos_iou_thresh,
            neg_iou_thresh_hi=config.fast_neg_iou_thresh_hi,
            neg_iou_thresh_lo=config.fast_neg_iou_thresh_lo)
        self.PTC_2 = ProposalTargetCreator(
            n_sample=config.fast_n_sample,
            pos_ratio=config.fast_pos_ratio,
            pos_iou_thresh=0.6,
            neg_iou_thresh_hi=0.6,
            neg_iou_thresh_lo=config.fast_neg_iou_thresh_lo)
        self.PTC_3 = ProposalTargetCreator(
            n_sample=config.fast_n_sample,
            pos_ratio=config.fast_pos_ratio,
            pos_iou_thresh=0.7,
            neg_iou_thresh_hi=0.7,
            neg_iou_thresh_lo=config.fast_neg_iou_thresh_lo)

        self.features = vgg16().features[:-1]
        self.rpn = RPN_net(512, self.num_anchor)
        self.roialign = RoIAlign((7, 7), 1 / 16., 2)
        self.fast = Fast_net(config.num_cls, 512 * 7 * 7, 4096)
        self.fast_2 = Fast_net(config.num_cls, 512 * 7 * 7, 4096)
        self.fast_3 = Fast_net(config.num_cls, 512 * 7 * 7, 4096)
        self.a = 0
        self.b = 0
        self.c = 0
        self.d = 0
        self.fast_num = 0
        self.fast_num_P = 0

        self.loc_std1 = [1. / 10, 1. / 10, 1. / 5, 1. / 5]
        self.loc_std2 = [1. / 20, 1. / 20, 1. / 10, 1. / 10]
        self.loc_std3 = [1. / 30, 1. / 30, 1. / 15, 1. / 15]
        self.loss_weights = [1.0, 0.5, 0.25]
Exemplo n.º 23
0
    def __init__(self, num_classes):
        super(FasterRCNN, self).__init__()
        self.strides = [16]
        self.frozen_layer_num = 4

#         self.backbone = vgg16(pretrained=True, frozen_layer_num=self.frozen_layer_num)
        self.rpn_head = RPNHead(self.strides)

        self.roi_pool = RoIAlign(output_size=(7, 7), spatial_scale=1.0/self.strides[0], sampling_ratio=-1)
        self.bbox_head = BBoxHead(num_classes=num_classes, pretrained='vgg16')
        
        ### DEBUG ####
        import torchvision.models as models
        vgg = models.vgg16()
        model_path = '/home/zzy/Projects/faster-rcnn.pytorch/data/pretrained_model/vgg16_caffe.pth'
        print("Loading pretrained weights from %s" %(model_path))
        state_dict = torch.load(model_path)
        vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
        self.backbone = nn.Sequential(*list(vgg.features._modules.values())[:-1])
        # Fix the layers before conv3:
        for layer in range(10):
          for p in self.backbone[layer].parameters(): p.requires_grad = False
        self.bbox_head.shared_layers = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
        ####
        
        #### RPN ####
        self.train_before_rpn_proposal_num = 12000
        self.train_after_rpn_proposal_num = 2000
        self.test_before_rpn_proposal_num = 6000  # 6000
        self.test_after_rpn_proposal_num = 300  # 300

        self.pos_iou_thr = 0.5
        self.neg_iou_thr = 0.5

        self.roi_num_per_img = 128  # 512
        self.pos_sample_rate = 0.25

        self.rpn_nms_thr_iou = 0.7
        self.rpn_min_size = 16
        

        #### RCNN ####
        self.bbox_nms_thr_iou = 0.5
        self.bbox_nms_score_thr = 0.05
        # target normalize
        self.target_mean = [0.0, 0.0, 0.0, 0.0]
        self.target_std = [0.1, 0.1, 0.2, 0.2]
        
        # load pretrained
#         load_vgg_fc(self, 'vgg16')
        self._init_weights()
Exemplo n.º 24
0
    def __init__(self, n_class, roi_size, spatial_scale,classifier):

        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIAlign(self.roi_size, self.spatial_scale, 1)#池化操作 output (Tensor[K, C, output_size[0], output_size[1]])
    def __init__(self):
        super().__init__()
        self.image_size = 256
        self.feature_dim = 256
        self.output_dim = 256
        self.pool_size = 7
        self.downsample_rate = 16

        self.roi_align = RoIAlign(self.pool_size, 1.0 / self.downsample_rate, -1)
        self.context_feature_extract = nn.Conv2d(self.feature_dim, self.feature_dim, 1)
        self.object_feature_fuse = nn.Conv2d(self.feature_dim * 2, self.output_dim, 1)
        self.object_feature_fc = nn.Sequential(nn.ReLU(True),
                                               nn.Linear(self.output_dim * self.pool_size ** 2, self.output_dim))

        self.resnet = resnet34(pretrained=True)
        self.resnet_feature_extractor = nn.Sequential(*list(self.resnet.children())[:-3])
Exemplo n.º 26
0
    def forward(self, imgs, bboxes):
        feature_maps = self.resnet(imgs)
        fa = RoIAlign(output_size=(7, 7),
                      spatial_scale=1 / 8,
                      sampling_ratio=2,
                      aligned=True)
        ya = fa(feature_maps, bboxes)
        y = self.last_block(ya)
        y = self.conv_last(y)
        y = self.relu(y)
        y = self.dropout(y)
        y = y.view(1, -1)
        y = self.FC(y)
        y = self.act(y)

        return y
Exemplo n.º 27
0
    def __init__(self):
        super(ModelBuilder, self).__init__()

        # build backbone
        self.backbone = get_backbone(cfg.BACKBONE.TYPE, **cfg.BACKBONE.KWARGS)

        # build adjust layer
        self.neck = get_neck(cfg.ADJUST.TYPE, **cfg.ADJUST.KWARGS)

        # build non-local layer
        # self.non_local = get_nonlocal(cfg.NONLOCAL.TYPE,
        #                               **cfg.NONLOCAL.KWARGS)

        # roi align for cropping center
        self.roi_align = RoIAlign((7, 7), 1.0 / cfg.ANCHOR.STRIDE, 1)

        # build rpn head
        self.rpn_head = get_rpn_head(cfg.RPN.TYPE, **cfg.RPN.KWARGS)
Exemplo n.º 28
0
    def __init__(self, opt):
        super(PersonDetector, self).__init__(opt)
        self.frame_counter = 0
        self.feature_map = None

        def get(module, input, output):
            self.feature_map = input[0]  # input is tuple, dono why

        self.model.hm.register_forward_hook(get)
        mod = DHN.Munkrs(1, 256, 1, True, 1, False, False)
        weights = torch.load(
            '/home/wanghao/github/deepmot/model_weights/DHN.pth')
        mod.load_state_dict(weights)
        self.tracker = tracker.Tracker(mod)
        self.video_out = cv2.VideoWriter(
            'centernet.mp4', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 25,
            (960, 540))
        self.roi = RoIAlign([1, 1], opt.down_ratio, 6)
        self.kcfs = None
Exemplo n.º 29
0
    def __init__(self, num_classes=0, input_dim=3, ch=64):
        super(ResnetDiscriminator64, self).__init__()
        self.num_classes = num_classes

        self.block1 = OptimizedBlock(input_dim, ch, downsample=False)
        self.block2 = ResBlock(ch, ch * 2, downsample=False)
        self.block3 = ResBlock(ch * 2, ch * 4, downsample=True)
        self.block4 = ResBlock(ch * 4, ch * 8, downsample=True)
        self.block5 = ResBlock(ch * 8, ch * 16, downsample=True)
        self.l_im = nn.utils.spectral_norm(nn.Linear(ch * 16, 1))
        self.activation = nn.ReLU()

        # object path
        self.roi_align = RoIAlign((8, 8), 1.0 / 2.0, 0)
        self.block_obj4 = ResBlock(ch * 4, ch * 8, downsample=True)
        self.l_obj = nn.utils.spectral_norm(nn.Linear(ch * 8, 1))
        self.l_y = nn.utils.spectral_norm(nn.Embedding(num_classes, ch * 8))

        self.init_parameter()
Exemplo n.º 30
0
    def test_build_complex_head(self):
        """
        Test complex ResNetRoIHead.
        """
        # ROI layer configs
        resolution = (10, 15)
        spatial_scale = 1.0 / 5.0
        sampling_ratio = 0
        roi_layer = RoIAlign(resolution,
                             spatial_scale=spatial_scale,
                             sampling_ratio=sampling_ratio)

        for input_dim, output_dim in itertools.product((4, 8), (4, 8, 16)):

            model = ResNetRoIHead(
                proj=nn.Linear(input_dim, output_dim),
                activation=nn.Softmax(),
                pool=nn.AdaptiveAvgPool3d(1),
                pool_spatial=nn.MaxPool2d(resolution, stride=1),
                roi_layer=roi_layer,
                dropout=nn.Dropout(0.5),
                output_pool=nn.AdaptiveAvgPool3d(1),
            )

            # Test forwarding.
            for (input_tensor,
                 bboxes) in TestRoIHeadHelper._get_inputs(input_dim=input_dim):
                if input_tensor.shape[1] != input_dim:
                    with self.assertRaises(Exception):
                        output_tensor = model(input_tensor, bboxes)
                    continue
                output_tensor = model(input_tensor, bboxes)

                bboxes_shape = bboxes.shape
                output_shape = output_tensor.shape
                output_shape_gt = (bboxes_shape[0], output_dim)
                self.assertEqual(
                    output_shape,
                    output_shape_gt,
                    "Output shape {} is different from expected shape {}".
                    format(output_shape, output_shape_gt),
                )