Exemplo n.º 1
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Exemplo n.º 2
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Exemplo n.º 3
0
    def __init__(self, classes, class_agnostic, tb=None):
        super(_OICR, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic

        self.param_groups = [[], [], [], []]
        self.OICR_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 8.0)
        self.OICR_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 8.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.OICR_roi_crop = _RoICrop()
        self.ic_layers = []
        self.tb = tb

        # for RPN
        self.dout_base_model = 512
        self.OICR_rpn = _RPN(self.dout_base_model)
Exemplo n.º 4
0
    def init_modules(self):
        self.feature_extractor = FeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = GateRPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
    def __init__(self, main_classes, sub_classes, class_agnostic, casecade_type='add_score', alpha=0.5):
        super(_hierarchyCasecadeFasterRCNN, self).__init__()
        #self.classes = classes

        #type: add_score, add_prob, mul_score, mul_prob
        self.casecade_type = casecade_type
        self.alpha = alpha

        self.main_classes = main_classes
        self.sub_classes = sub_classes

        self.n_sub_classes = len(sub_classes)
        self.n_main_classes = len(main_classes)

        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_sub_classes)
        self.RCNN_roi_pool = _RoIPooling(
            cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(
            cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.RFCN_psroi_pool = None

        self.grid_size = cfg.POOLING_SIZE * \
            2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        self.main2sub_idx_dict = defaultdict(list)
        for key, val in sub2main_dict.items():
            try:
                # not all cls in dict are in this imdb
                self.main2sub_idx_dict[self.main_classes.index(
                    val)].append(self.sub_classes.index(key))
            except:
                print("key:{}, val:{} may not in this imdb".format(key, val))
    def __init__(self, classes, class_agnostic, lc, gc):
        super(_fasterRCNNAttention, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0
        self.lc = lc
        self.gc = gc
        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * \
            2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 1024)

        self.nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N
        self.attention_1 = attention_module_multi_head(
            nongt_dim=self.nongt_dim,
            fc_dim=16,
            feat_dim=1024,
            index=1,
            group=16,
            dim=(1024, 1024, 1024))

        self.attention_2 = attention_module_multi_head(
            nongt_dim=self.nongt_dim,
            fc_dim=16,
            feat_dim=1024,
            index=2,
            group=16,
            dim=(1024, 1024, 1024))
Exemplo n.º 7
0
    def _init_modules(self):

        # define backbone
        if self.backbone_type == 'vgg':
            pass
        elif self.backbone_type == 'res101':
            backbone_net = resnet_backbone(num_layers=101,
                                           pretrained=self.pretrained)
        self.RCNN_base1, self.RCNN_base2, self.RCNN_top = backbone_net.init_modules(
        )
        self.dout_base_model = backbone_net.dout_base_model

        # cross domain classifier
        self.netD_pixel = netD_pixel(input_dim=256,
                                     output_dim=128,
                                     context=self.lc)
        self.netD = netD(input_dim=1024, context=self.gc)

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * \
            2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        # define RCNN pred
        feat_d = 2048
        if self.lc:
            feat_d += 128
        if self.gc:
            feat_d += 128
        self.RCNN_cls_score = nn.Linear(feat_d, self.n_classes)
        if self.class_agnostic:
            self.RCNN_bbox_pred = nn.Linear(feat_d, 4)
        else:
            self.RCNN_bbox_pred = nn.Linear(feat_d, 4 * self.n_classes)
Exemplo n.º 8
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        # Start add by Jie, set the flag to use mobilenetV2 as the backbone network for feature extraction.
        self.dlb = False
        self.neg_rate = 1  # For resample
Exemplo n.º 9
0
    def __init__(self, classes, class_agnostic, pooling_size, teaching=False):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.teaching = teaching
        self.pooling_size = pooling_size
        self.RCNN_rpn = _RPN(self.dout_base_model, teaching=self.teaching)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(self.pooling_size, self.pooling_size,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                          1.0 / 16.0)

        self.grid_size = self.pooling_size * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else self.pooling_size
        self.RCNN_roi_crop = _RoICrop()
Exemplo n.º 10
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        
        if cfg.GCN.RE_CLASS:
            self.Class_GCN = CGCN(cfg.GCN.N_FEAT, cfg.GCN.N_HID, cfg.GCN.DROPOUT, self.n_classes, t = 0.05, adj_file = cfg.GCN.ADJ_FILE)
Exemplo n.º 11
0
    def __init__(self,
                 classes,
                 class_agnostic,
                 lighthead=False,
                 compact_mode=False):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        self.lighthead = lighthead

        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define Large Separable Convolution Layer
        if self.lighthead:
            self.lh_mode = 'S' if compact_mode else 'L'
            self.lsconv = LargeSeparableConv2d(self.dout_lh_base_model,
                                               bias=False,
                                               bn=False,
                                               setting=self.lh_mode)
            self.lh_relu = nn.ReLU(inplace=True)

        # define rpn
        if lighthead:
            self.dout_base_model = self.dout_lh_base_model
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        self.rpn_time = None
        self.pre_roi_time = None
        self.roi_pooling_time = None
        self.subnet_time = None
Exemplo n.º 12
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d

        self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4)

        self.rcnn_3d_loss = OrientationLoss(split_loss=True)
Exemplo n.º 13
0
    def __init__(self, classes, class_agnostic, meta_train, meta_test=None, meta_loss=None):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        self.meta_train = meta_train
        self.meta_test = meta_test
        self.meta_loss = meta_loss
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0
        self.edge_loss = nn.BCELoss()

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        self.num_layers_g = 3
Exemplo n.º 14
0
    def init_modules(self):
        self.feature_extractor = feature_extractors_builder.build(
            self.feature_extractor_config)
        # self.feature_extractor = ResNetFeatureExtractor(
        # self.feature_extractor_config)
        # self.feature_extractor = MobileNetFeatureExtractor(
        # self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        if self.use_self_attention:
            self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes)
        else:
            self.rcnn_cls_pred = nn.Conv2d(self.ndin, self.n_classes, 3, 1, 1)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4)
            # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1)
        else:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        if self.use_self_attention:
            self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1)
Exemplo n.º 15
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN_seg, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model + self.n_classes)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        self.criterion = CrossEntropyLoss2d(size_average=False,
                                            ignore_index=255).cuda()
Exemplo n.º 16
0
    def __init__(self, block, layers, num_acts=1):
        self.inplanes = 64
        self.num_acts = num_acts
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3,
                                    stride=2,
                                    padding=0,
                                    ceil_mode=True)  # change
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        # it is slightly better whereas slower to set stride = 1
        self.layer4 = self._make_layer(block, 512, layers[3], stride=1)

        self.RCNN_roi_align = RoIAlignAvg(7, 7, 1.0 / 16.0)

        self.fc8 = nn.Linear(2048, 4096)
        self.fc = nn.Linear(4096, self.num_acts)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                #m.weight.data.zero_()
                m.bias.data.zero_()
                m.weight.data.normal_(0, 0.01)
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.RCNN_deform_roi_pool_1 = DeformRoIFunction(pool_height=7,
                                                        pool_width=7,
                                                        spatial_scale=1.0 /
                                                        16.0,
                                                        no_trans=True,
                                                        trans_std=0.1,
                                                        sample_per_part=4,
                                                        output_dim=256,
                                                        group_size=1,
                                                        part_size=7)
        self.RCNN_deform_roi_pool_2 = DeformRoIFunction(pool_height=7,
                                                        pool_width=7,
                                                        spatial_scale=1.0 /
                                                        16.0,
                                                        no_trans=False,
                                                        trans_std=0.1,
                                                        sample_per_part=4,
                                                        output_dim=256,
                                                        group_size=1,
                                                        part_size=7)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Exemplo n.º 18
0
    def __init__(self, output_size, feat_size, up_scale, use_cpu=False):
        super(RoI_op, self).__init__()
        self.output_size = output_size
        self.feat_size = feat_size
        self.up_scale = up_scale
        self.use_cpu = use_cpu

        # roi pooling
        self.roi_pool = _RoIPooling(self.output_size, self.output_size,
                                    1.0 / self.feat_size)

        # roi align
        self.RoIAlignMax = RoIAlignMax(self.output_size, self.output_size,
                                       1.0 / self.feat_size)
        self.RoIAlignAvg = RoIAlignAvg(self.output_size, self.output_size,
                                       1.0 / self.feat_size)

        # roi warp
        self.grid_size = self.output_size * 2
        self.roi_crop = _RoICrop()

        # roi refine
        self.up_sample = torch.nn.UpsamplingBilinear2d(scale_factor=up_scale)
Exemplo n.º 19
0
    def __init__(self, pretrained_model_path=None, num_class=20):
        super(WSDDN_VGG16, self).__init__()
        vgg = torchvision.models.vgg16()
        if pretrained_model_path is None:
            print("Create WSDDN_VGG16 without pretrained weights")
        else:
            print("Loading pretrained VGG16 weights from %s" %
                  (pretrained_model_path))
            state_dict = torch.load(pretrained_model_path)
            vgg.load_state_dict(
                {k: v
                 for k, v in state_dict.items() if k in vgg.state_dict()})

        self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
        self.top = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
        self.num_classes = num_class

        self.fc8c = nn.Linear(4096, self.num_classes)
        self.fc8d = nn.Linear(4096, self.num_classes)
        self.roi_pooling = _RoIPooling(7, 7, 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(7, 7, 1.0 / 16.0)
        self.num_classes = self.num_classes
        self._init_weights()
Exemplo n.º 20
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        """
        Faster RCNN Architecture
            IMAGE->ConvLayer->conv feature maps->RPN->proposals
                                               |                \
                                               |                  RoI Polling->classifier
                                               |________________/
       Mask RCNN      
            conv feature maps->RPN->proposals               classifier -> box    
                             |                \            /          \    
                             |                  RoI Allign              class
                             |________________/            \
                                                             conv->conv-> matrix
        
        
        """
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.MaskRCNN = _Mask(self.n_classes)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # decouple cls and bbox
        self.rcnn_conv = nn.Conv2d(1024, 512, 3, 1, 1, bias=True)
        self.rcnn_pooled_feat_cls = nn.Conv2d(512, 1024, 1, 1, 0)
        self.rcnn_pooled_feat_bbox = nn.Conv2d(512, 1024, 1, 1, 0)
Exemplo n.º 22
0
    def __init__(self, classes, class_agnostic, cls_a_prob, cls_r_prob,
                 modules_size):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0
        # modules size and exist
        self.modules_size = modules_size
        self.module_exist = [x != 0 for x in modules_size]

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        # define three modules
        if self.module_exist[0]:
            self.one_Know_Rout_mod_a = Know_Rout_mod(2048, modules_size[0])
            self.gt_adj_a = nn.Parameter(torch.from_numpy(cls_a_prob),
                                         requires_grad=False)
        if self.module_exist[1]:
            self.one_Know_Rout_mod_r = Know_Rout_mod(2048, modules_size[1])
            self.gt_adj_r = nn.Parameter(torch.from_numpy(cls_r_prob),
                                         requires_grad=False)
        if self.module_exist[2]:
            self.one_Know_Rout_mod_s = Know_Rout_mod_im(
                10, 2048, modules_size[2])
Exemplo n.º 23
0
    def __init__(self,
                 classes_num,
                 ANCHOR_SCALES,
                 ANCHOR_RATIOS,
                 class_agnostic=True,
                 base_feature_mean=False,
                 is_add_rpnconv=False,
                 is_pool=True):
        super(my_faster_rcnn, self).__init__()
        self.n_classes = classes_num
        self.ANCHOR_SCALES = ANCHOR_SCALES
        self.ANCHOR_RATIOS = ANCHOR_RATIOS
        self.class_agnostic = class_agnostic
        self.base_feature_mean = base_feature_mean
        self.is_add_rpnconv = is_add_rpnconv
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_loc = 0
        self.is_pool = is_pool

        # define rpn
        self.RCNN_rpn = _RPN(
            self.dout_base_model,
            ANCHOR_SCALES=ANCHOR_SCALES,
            ANCHOR_RATIOS=ANCHOR_RATIOS,
            is_add_rpnconv=self.is_add_rpnconv)  # rpn之前的特征输出channel

        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Exemplo n.º 24
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_proposal_target_for_target = _ProposalTargetLayerForTarget(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

		# local discriminator
        from model.faster_rcnn.vgg16 import vgg16
        from model.faster_rcnn.resnet import resnet101
        if isinstance(self, vgg16):
            self.local_discriminator = LocalDiscriminator(256)
        else:
            self.local_discriminator = LocalDiscriminator(512)
        self.local_loss_layer = LossForLocal()

		# cls entropy minimization CLS训练
        self.loss_rpn_cls_layer = LossForRPNCLS(5)

		# generate rpn target for pl box RPN训练
        self.rpn_training_target = RPN_training_target()

		# minimize discrepancy (MCD)
        self.loss_for_discrepancy = LossForDiscrepancy()
Exemplo n.º 25
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.RCNN_ocr_roi_pooling = roi_pooling(2)  # ocr roi_pooling

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        # rnn初始化,隐藏节点256
        nh = 256
        self.rnn = nn.Sequential(BidirectionalLSTM(512, nh, nh),
                                 BidirectionalLSTM(nh, nh, nh))
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        ################
        in_channels = 256
        use_bias = True
        use_dropout = False
        e1_conv = nn.Conv2d(in_channels,
                            512,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        e1_norm = nn.BatchNorm2d(512)  # inner_nc
        e1_relu = nn.LeakyReLU(0.2, True)
        self.e1 = nn.Sequential(e1_conv, e1_norm, e1_relu)

        e2_conv = nn.Conv2d(512,
                            1024,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        e2_norm = nn.BatchNorm2d(1024)
        e2_relu = nn.LeakyReLU(0.2, True)
        self.e2 = nn.Sequential(e2_conv, e2_norm, e2_relu)

        e3_conv = nn.Conv2d(1024,
                            2048,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        # e3_norm = nn.BatchNorm2d(2048)
        e3_relu = nn.LeakyReLU(0.2, True)
        self.e3 = nn.Sequential(e3_conv, e3_relu)

        self.d1_deconv = nn.ConvTranspose2d(2048,
                                            1024,
                                            kernel_size=4,
                                            stride=2,
                                            padding=1,
                                            bias=use_bias)
        # d1_deconv_pad = d1_deconv(output_size=10)
        d1_norm = nn.BatchNorm2d(1024)
        d1_relu = nn.LeakyReLU(True)
        if use_dropout:
            self.d1 = nn.Sequential(d1_norm, nn.Dropout(0.5), d1_relu)
        else:
            self.d1 = nn.Sequential(d1_norm, d1_relu)

        self.d2_deconv = nn.ConvTranspose2d(2048,
                                            512,
                                            kernel_size=4,
                                            stride=2,
                                            padding=1,
                                            bias=use_bias)
        d2_norm = nn.BatchNorm2d(512)
        d2_relu = nn.LeakyReLU(True)
        if use_dropout:
            self.d2 = nn.Sequential(d2_norm, nn.Dropout(0.5), d2_relu)
        else:
            self.d2 = nn.Sequential(d2_norm, d2_relu)

        self.d3_deconv = nn.ConvTranspose2d(1024,
                                            256,
                                            kernel_size=4,
                                            stride=2,
                                            padding=1,
                                            bias=use_bias)
        d3_norm = nn.BatchNorm2d(256)
        d3_relu = nn.LeakyReLU(True)
        if use_dropout:
            self.d3 = nn.Sequential(d3_norm, nn.Dropout(0.5), d3_relu)
        else:
            self.d3 = nn.Sequential(d3_norm, d3_relu)

        d4_conv = nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1)
        d4_relu = nn.ReLU(True)
        self.d4 = nn.Sequential(d4_conv, d4_relu)
Exemplo n.º 27
0
    def __init__(self,
                 pretrained_model_path=None,
                 num_class=20,
                 pooling_method='roi_pooling',
                 share_level=2,
                 mil_topk=1,
                 num_group=1,
                 attention_lr=1.0):
        super(SM_MCL_TDET_VGG16, self).__init__()
        assert 0 <= share_level <= 2
        self.num_classes = num_class
        self.mil_topk = mil_topk
        self.num_group = num_group
        self.attention_layer = AttentionLayer(num_group, num_class)
        self.attention_lr = attention_lr

        vgg = torchvision.models.vgg16()
        if pretrained_model_path is None:
            print("Create WSDDN_VGG16 without pretrained weights")
        else:
            print("Loading pretrained VGG16 weights from %s" %
                  (pretrained_model_path))
            state_dict = torch.load(pretrained_model_path)
            vgg.load_state_dict(
                {k: v
                 for k, v in state_dict.items() if k in vgg.state_dict()})

        self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
        top = list()
        if share_level >= 1:
            top.append(vgg.classifier[0])
            top.append(nn.ReLU(True))

        if share_level == 2:
            top.append(vgg.classifier[3])
            top.append(nn.ReLU(True))

        self.top = nn.Sequential(*top)

        cls = list()
        det = list()

        if share_level == 0:
            cls.append(copy.deepcopy(vgg.classifier[0]))
            cls.append(nn.ReLU(True))
            det.append(copy.deepcopy(vgg.classifier[0]))
            det.append(nn.ReLU(True))

        if share_level <= 1:
            cls.append(copy.deepcopy(vgg.classifier[3]))
            cls.append(nn.ReLU(True))
            det.append(copy.deepcopy(vgg.classifier[3]))
            det.append(nn.ReLU(True))

        cls.append(nn.Linear(4096, self.num_classes))

        det.append(nn.Linear(4096, num_group))

        self.cls_layer = nn.Sequential(*cls)
        self.det_layer = nn.Sequential(*det)
        self.back_layer = nn.Linear(4096, 1)

        if pooling_method == 'roi_pooling':
            self.region_pooling = _RoIPooling(7, 7, 1.0 / 16.0)
        elif pooling_method == 'roi_align':
            self.region_pooling = RoIAlignAvg(7, 7, 1.0 / 16.0)
        else:
            raise Exception('Undefined pooling method')

        # layer 추가할거면, get_optimizer도 수정해야댐
        self._init_weights()
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        ################
        use_bias = True
        use_dropout = False
        lrelu = nn.LeakyReLU(0.1, True)

        conv3_1 = nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=use_bias)
        conv3_2 = nn.Conv2d(256,
                            512,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        self.conv3 = nn.Sequential(conv3_1, nn.BatchNorm2d(256),
                                   lrelu, conv3_2,
                                   nn.BatchNorm2d(512, affine=False), lrelu)

        conv4_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        self.conv4 = nn.Sequential(conv4_1, nn.BatchNorm2d(512),
                                   lrelu, conv4_2,
                                   nn.BatchNorm2d(512, affine=False), lrelu)

        conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        self.conv5 = nn.Sequential(conv5_1, nn.BatchNorm2d(512),
                                   lrelu, conv5_2,
                                   nn.BatchNorm2d(512, affine=False), lrelu)
        # 16x
        e1_conv = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        self.e1 = nn.Sequential(e1_conv, nn.BatchNorm2d(512), lrelu)

        # 32x
        e2_conv = nn.Conv2d(512,
                            1024,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        self.e2 = nn.Sequential(e2_conv, nn.BatchNorm2d(1024), lrelu)

        # 64x
        e3_conv = nn.Conv2d(1024,
                            2048,
                            kernel_size=4,
                            stride=2,
                            padding=1,
                            bias=use_bias)
        self.e3 = nn.Sequential(e3_conv, nn.BatchNorm2d(2048), lrelu)

        # # 128x
        # e4_conv = nn.Conv2d(2048, 4096, kernel_size=4, stride=2, padding=1, bias=use_bias)
        # self.e4 = nn.Sequential(e3_conv, nn.BatchNorm2d(4096), lrelu)
        # e4_conv = nn.Conv2d(4096, 4096, kernel_size=3, padding=1, bias=use_bias)
        # self.e4 = nn.Sequential(e4_conv, nn.BatchNorm2d(4096), lrelu)

        self.d1_deconv = nn.ConvTranspose2d(2048,
                                            1024,
                                            kernel_size=4,
                                            stride=2,
                                            padding=1,
                                            bias=use_bias)
        if use_dropout:
            self.d1 = nn.Sequential(nn.BatchNorm2d(1024), nn.Dropout(0.5),
                                    lrelu)
        else:
            self.d1 = nn.Sequential(nn.BatchNorm2d(1024), lrelu)

        self.d2_deconv = nn.ConvTranspose2d(2048,
                                            512,
                                            kernel_size=4,
                                            stride=2,
                                            padding=1,
                                            bias=use_bias)
        if use_dropout:
            self.d2 = nn.Sequential(nn.BatchNorm2d(512), nn.Dropout(0.5),
                                    lrelu)
        else:
            self.d2 = nn.Sequential(nn.BatchNorm2d(512), lrelu)

        d3_conv = nn.Conv2d(1024, 512, kernel_size=3, padding=1, bias=use_bias)
        if use_dropout:
            self.d3 = nn.Sequential(d3_conv, nn.BatchNorm2d(512),
                                    nn.Dropout(0.5), lrelu)
        else:
            self.d3 = nn.Sequential(d3_conv, nn.BatchNorm2d(512), lrelu)

        # d4_conv = nn.Conv2d(1024, 512, kernel_size=3, padding=1, bias=use_bias)
        # if use_dropout:
        #   self.d4 = nn.Sequential(d4_conv, nn.BatchNorm2d(512), nn.Dropout(0.5), lrelu)
        # else:
        #   self.d4 = nn.Sequential(d4_conv, nn.BatchNorm2d(512), lrelu)

        d4_conv = nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=use_bias)
        self.d4 = nn.Sequential(d4_conv, nn.ReLU(True))
Exemplo n.º 29
0
    def __init__(self, baseModels, obj_classes, att_classes, rel_classes,
                 dout_base_model, pooled_feat_dim):

        super(_ISGG, self).__init__()
        self.obj_classes = obj_classes
        self.n_obj_classes = len(obj_classes)

        self.att_classes = att_classes
        self.n_att_classes = 0 if att_classes == None else len(att_classes)

        self.rel_classes = rel_classes
        self.n_rel_classes = 0 if rel_classes == None else len(rel_classes)

        # define base model
        self.RCNN_base_model = baseModels

        # define rpn
        self.RCNN_rpn = _RPN(dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(
            self.n_obj_classes, self.n_att_classes, self.n_rel_classes)
        self.RCNN_proposal_target_msdn = _ProposalTargetLayer_MSDN(
            self.n_obj_classes, self.n_att_classes, self.n_rel_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        if cfg.HAS_RELATIONS:
            self.RELPN_rpn = _RelPN(pooled_feat_dim, self.n_obj_classes)
            self.RELPN_proposal_target = _RelProposalTargetLayer(
                self.n_rel_classes)

            self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE,
                                              cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE,
                                               cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
            self.RELPN_roi_crop = _RoICrop()

        reduced_pooled_feat_dim = pooled_feat_dim

        # define mps
        nhidden = 512
        dropout = False
        gate_width = 1
        use_kernel_function = False

        self.imp = _IMP(nhidden,
                        dropout,
                        gate_width=gate_width,
                        use_kernel_function=use_kernel_function
                        )  # the hierarchical message passing structure
        network.weights_normal_init(self.imp, 0.01)

        # self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)

        # self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes)
        # self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes)
        # self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes)

        if cfg.GCN_LAYERS > 0:
            if cfg.GCN_ON_SCORES:
                self.GRCNN_gcn_score = _GCN_1(self.n_obj_classes,
                                              self.n_att_classes,
                                              self.n_rel_classes)

            if cfg.GCN_ON_FEATS and not cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_2(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_3(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_LOW_RANK_PARAMS:
                self.GRCNN_gcn_feat = _GCN_4(reduced_pooled_feat_dim)

        if cfg.GCN_HAS_ATTENTION:
            self.GRCNN_gcn_att1 = _GCN_ATT(self.n_obj_classes)
            self.GRCNN_gcn_att2 = _GCN_ATT(self.n_obj_classes)

        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0
        self.RCNN_loss_bbox = 0
Exemplo n.º 30
0
    def __init__(self,
                 classes,
                 num_layers=101,
                 pretrained=False,
                 class_agnostic=False,
                 b_save_mid_convs=False):
        super(_fasterRCNN, self).__init__(classes, num_layers, pretrained,
                                          class_agnostic)
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        self.b_save_mid_convs = b_save_mid_convs
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        self.Conv_feat_track = None
        self.rpn_rois = None

        if cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.FASTER_RCNN:
            print('RCNN uses Faster RCNN core.')
        elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN_LIGHTHEAD:
            print('RCNN uses RFCN Light Head core.')
            # The input channel is set mannually since we use resnet101 only.
            # c_out is set to 10*ps*ps. c_mid is set to 256.
            self.relu = nn.ReLU()
            core_depth = cfg.RESNET.GLOBAL_CONTEXT_OUT_DEPTH
            ctx_size = cfg.RESNET.GLOBAL_CONTEXT_RANGE
            self.g_ctx = _global_context_layer(
                2048, core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, 256,
                ctx_size)
            self.RCNN_psroi_pool = PSRoIPool(cfg.POOLING_SIZE,
                                             cfg.POOLING_SIZE, 1.0 / 16.0,
                                             cfg.POOLING_SIZE, core_depth)
            # fc layer for roi-wise prediction.
            # roi_mid_c in the original paper is 2048.
            roi_mid_c = 2048
            self.fc_roi = nn.Linear(
                core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, roi_mid_c)
        elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN:
            print('RCNN uses R-FCN core.')
            # define extra convolution layers for psroi input.
            tmp_c_in = 2048
            self.rfcn_cls = nn.Conv2d(tmp_c_in,
                                      self.n_classes * cfg.POOLING_SIZE *
                                      cfg.POOLING_SIZE,
                                      kernel_size=1)
            if self.class_agnostic:
                self.rfcn_bbox = nn.Conv2d(tmp_c_in,
                                           4 * cfg.POOLING_SIZE *
                                           cfg.POOLING_SIZE,
                                           kernel_size=1)
            else:
                # Need to remove the background class for bbox regression.
                # Other circumstances are handled by torch.gather op later.
                self.rfcn_bbox = nn.Conv2d(tmp_c_in,
                                           4 * (self.n_classes) *
                                           cfg.POOLING_SIZE * cfg.POOLING_SIZE,
                                           kernel_size=1)
            # define psroi layers
            self.RCNN_psroi_score = PSRoIPool(cfg.POOLING_SIZE,
                                              cfg.POOLING_SIZE, 1.0 / 16.0,
                                              cfg.POOLING_SIZE, self.n_classes)
            if self.class_agnostic:
                self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE,
                                                 cfg.POOLING_SIZE, 1.0 / 16.0,
                                                 cfg.POOLING_SIZE, 4)
            else:
                self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE,
                                                 cfg.POOLING_SIZE, 1.0 / 16.0,
                                                 cfg.POOLING_SIZE,
                                                 4 * (self.n_classes))
            # define ave_roi_pooling layers.
            self.ave_pooling_bbox = nn.AvgPool2d(cfg.POOLING_SIZE,
                                                 stride=cfg.POOLING_SIZE)
            self.ave_pooling_cls = nn.AvgPool2d(cfg.POOLING_SIZE,
                                                stride=cfg.POOLING_SIZE)