def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv4',), pretrained = True): super(fasterRCNN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.dout_base_model = rand_feat.size(1) self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE[0]) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = cfg.RCNN_COMMON.POOLING_SIZE * 2 if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL else cfg.RCNN_COMMON.POOLING_SIZE self.iter_counter = 0
def __init__(self, classes, class_agnostic, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign( (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0 ) self.grid_size = ( cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE ) # self.RCNN_roi_crop = _RoICrop() self.RCNN_imageDA = _ImageDA(self.dout_base_model) self.RCNN_instanceDA = _InstanceDA(in_channel) self.consistency_loss = torch.nn.MSELoss(size_average=False) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def __init__(self, classes, n_way=2, n_shot=5): super(_fgnRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot rcnn head self.avgpool_rpn = nn.AvgPool2d(20) self.avgpool_rcnn = nn.AvgPool2d(14, stride=1) self.cls_conv1 = nn.Conv2d(2048, 512, 3, padding=0, bias=False) self.bn1 = nn.BatchNorm2d(512) self.cls_conv2 = nn.Conv2d(512, 128, 3, padding=0, bias=False) self.bn2 = nn.BatchNorm2d(128) # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes: Tuple[str], class_agnostic: bool): super(OneShotBase, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.match_net = match_block(self.dout_base_model) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN) self.RCNN_base = nn.Module() # implement in inherit class self.RCNN_top = nn.Module() # implement in inherit class self.RCNN_cls_score = nn.Module() # implement in inherit class self.RCNN_bbox_pred = nn.Module() # implement in inherit clas
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) self.DCR_proposal = _DCRProposalLayer(self.class_agnostic) self.DCR_target_proposal = _DCRTargetLayer(self.n_classes, self.class_agnostic) # self.DCR_roi_pool = ROIPool((112, 112), 1.0/4.0) self.DCR_roi_pool = ROIAlign((56, 56), 1.0/4.0, 0) # self.DCR_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) # self.DCR_roi_pool = ROIPool((28, 28), 1.0/8.0) if cfg.TRAIN.ATTENTION_MODEL: self.levelattentionLoss = Attention_loss()
def __init__(self, classes, attention_type, rpn_reduce_dim, rcnn_reduce_dim, n_way=2, n_shot=5, pos_encoding=True): super(_MAML_CISA, self).__init__() self.classes = classes self.n_classes = len(classes) self.n_way = n_way self.n_shot = n_shot self.attention_type = attention_type self.unary_gamma = 0.1 self.rpn_reduce_dim = rpn_reduce_dim self.rcnn_reduce_dim = rcnn_reduce_dim # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.pool_feat_dim = 1024 self.rcnn_dim = 64 self.avgpool = nn.AvgPool2d(14, stride=1) dim_in = self.pool_feat_dim ################ self.rpn_unary_layer = nn.Linear(dim_in, 1) init.normal_(self.rpn_unary_layer.weight, std=0.01) init.constant_(self.rpn_unary_layer.bias, 0) self.rcnn_unary_layer = nn.Linear(dim_in, 1) init.normal_(self.rcnn_unary_layer.weight, std=0.01) init.constant_(self.rcnn_unary_layer.bias, 0) self.rpn_adapt_q_layer = nn.Linear(dim_in, rpn_reduce_dim) init.normal_(self.rpn_adapt_q_layer.weight, std=0.01) init.constant_(self.rpn_adapt_q_layer.bias, 0) self.rpn_adapt_k_layer = nn.Linear(dim_in, rpn_reduce_dim) init.normal_(self.rpn_adapt_k_layer.weight, std=0.01) init.constant_(self.rpn_adapt_k_layer.bias, 0) self.rcnn_adapt_q_layer = nn.Linear(dim_in, rcnn_reduce_dim) init.normal_(self.rcnn_adapt_q_layer.weight, std=0.01) init.constant_(self.rcnn_adapt_q_layer.bias, 0) self.rcnn_adapt_k_layer = nn.Linear(dim_in, rcnn_reduce_dim) init.normal_(self.rcnn_adapt_k_layer.weight, std=0.01) init.constant_(self.rcnn_adapt_k_layer.bias, 0) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) if self.attention_type == 'concat': self.RCNN_rpn = _RPN(2048) self.rcnn_transform_layer = nn.Linear(2048, self.rcnn_dim) elif self.attention_type == 'product': self.RCNN_rpn = _RPN(1024) self.rcnn_transform_layer = nn.Linear(1024, self.rcnn_dim) self.output_score_layer = FFN(64* 49, dim_in) # positional encoding self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400)
def __init__(self, classes, n_head, d_feat=64, n_way=2, n_shot=5, pos_encoding=True): super(_multiRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.global_relation = True self.local_correlation = True self.pool_feat_dim = 1024 self.soft_gamma = 10 self.avgpool = nn.AvgPool2d(14, stride=1) # self.maxpool_fc = nn.MaxPool1d(49) dim_in = self.pool_feat_dim ################ self.h = n_head if self.h == 4: d_rpn_in = 1024 d_rpn_hidden = 512 d_rcnn_hidden = 64 d_ffn_hidden = 1024 elif self.h == 2: d_rpn_in = 512 d_rpn_hidden = 256 d_rcnn_hidden = 64 d_ffn_hidden = 1024 elif self.h == 1: d_rpn_in = 256 d_rpn_hidden = 128 d_rcnn_hidden = 32 d_ffn_hidden = 512 else: raise Exception(f'{self.h}') self.d_feat = d_feat self.multihead_attention_layer = MultiheadAttentionModule(dim_in, d_feat=self.d_feat, h=self.h) self.rpn_attention_linear = nn.Linear(400 * self.h, d_rpn_in) self.RCNN_rpn = _MultiheadRPN(d_rpn_in, d_rpn_hidden) self.rcnn_attention_linear = nn.Linear(49 * self.h, d_rcnn_hidden) self.rcnn_ffn_layer = FFN(49 * d_rcnn_hidden, d_ffn_hidden) ################ self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400) ################ # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, n_head, n_way=2, n_shot=5, pos_encoding=True): super(_multiheadAttentionRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.n_way = n_way self.n_shot = n_shot self.n_head = n_head # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.pool_feat_dim = 1024 self.avgpool = nn.AvgPool2d(14, stride=1) dim_in = self.pool_feat_dim ################ self.d_k = 64 self.Q_weight_list = [] self.K_weight_list = [] self.V_weight_list = [] for i in range(n_head): Q_weight = nn.Linear(dim_in, self.d_k) K_weight = nn.Linear(dim_in, self.d_k) V_weight = nn.Linear(dim_in, self.d_k) init.normal_(Q_weight.weight, std=0.01) init.constant_(Q_weight.bias, 0) init.normal_(K_weight.weight, std=0.01) init.constant_(K_weight.bias, 0) init.normal_(V_weight.weight, std=0.01) init.constant_(V_weight.bias, 0) self.Q_weight_list.append(Q_weight) self.K_weight_list.append(K_weight) self.V_weight_list.append(V_weight) self.Q_layers = nn.ModuleList(self.Q_weight_list) self.K_layers = nn.ModuleList(self.K_weight_list) self.V_layers = nn.ModuleList(self.V_weight_list) if n_head != 1: self.rpn_multihead_layer = nn.Linear(n_head * 400, 400) self.rcnn_multihead_layer = nn.Linear(n_head * self.d_k * 49, self.d_k * 49) self.output_score_layer = FFN(self.d_k * 49, dim_in) # rpn self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_rpn = _BipathRPN(400, 256) # positional encoding self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400)
def __init__(self, classes, class_agnostic,lc,gc): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic=False): super(Faster_RCNN, self).__init__() self.n_classes = len(classes) self.classes = classes self.class_agnostic = class_agnostic # RCNN loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.RCNN_rpn = RPN(self.feature_out_dim) self.RCNN_proposal_target = ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, n_way=2, n_shot=5, pos_encoding=True): super(_qkvRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot rcnn head self.global_relation = True self.local_correlation = True self.pool_feat_dim = 1024 self.soft_gamma = 10 self.avgpool = nn.AvgPool2d(14, stride=1) # self.maxpool_fc = nn.MaxPool1d(49) dim_in = self.pool_feat_dim ################ self.d_k = 64 self.Q_weight = nn.Linear(dim_in, self.d_k) self.K_weight = nn.Linear(dim_in, self.d_k) self.V_weight = nn.Linear(dim_in, self.d_k) init.normal_(self.Q_weight.weight, std=0.01) init.constant_(self.Q_weight.bias, 0) init.normal_(self.K_weight.weight, std=0.01) init.constant_(self.K_weight.bias, 0) init.normal_(self.V_weight.weight, std=0.01) init.constant_(self.V_weight.bias, 0) self.ffn_layer = FFN(self.d_k * 49, dim_in) ################ self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() ################ # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, n_way=2, n_shot=5): super(_metaRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, class_agnostic, model_type="attention", fusion='query'): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic conv_nd = nn.Conv2d self.fusion = fusion if fusion == 'query': self.attention_net = attention(self.dout_base_model) elif fusion == 'attention': self.attention_net = attention_early_fusion_multi_query( self.dout_base_model) self.projection = conv_nd(in_channels=1024 * 2, out_channels=1024, kernel_size=1, stride=1, padding=0, bias=False) nn.init.xavier_uniform_(self.projection.weight) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 0)
def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv2', 'conv3', 'conv4', 'conv5'), pretrained=True): super(FPN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.n_channels = [f.size(1) for f in rand_feat] self.dout_base_model = 256 # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self._num_pyramid_layers = len(cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_roi_aligns = nn.ModuleList() self.RCNN_roi_pools = nn.ModuleList() for i in range(len(cfg.RCNN_COMMON.FEAT_STRIDE)): self.RCNN_roi_aligns.append( ROIAlign((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]), 0)) self.RCNN_roi_pools.append( ROIPool((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]))) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.iter_counter = 0
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn (传入(base_model)特征提取网络的特征图) self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # 两种裁剪特征图的方式 self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic, loss_type): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = classes self.class_agnostic = class_agnostic self.loss_type = loss_type # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.n_classes) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic, lc, gc, da_use_contex, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = (cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # self.bn1 = nn.BatchNorm2d(self.dout_base_model, momentum=0.01) # self.bn2 = nn.BatchNorm2d(self.n_classes-1, momentum=0.01) self.da_use_contex = da_use_contex if self.da_use_contex: if self.lc: in_channel += 128 if self.gc: in_channel += 128 self.RCNN_instanceDA = _InstanceDA(in_channel)
def __init__(self, classes, class_agnostic, rpn_batchsize): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.rpn_batchsize = rpn_batchsize # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.rpn_batchsize) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) #print 'INFO: pooling size is: ', cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W #self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0/16.0) #self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W), 1.0/16.0, 0) ''' RoICrop removed from pytorch-1.0 branch
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # RPN layer (also compute the cls loss and bbox loss for RPN layer) self.RCNN_rpn = _RPN(self.dout_base_model) # RCNN gt labels layer (produces gt labels for final cls and bbox regression) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # ROIPooling or ROIAlign layer self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # new layer self.extension_layer = extension_layers.extension_layer()
def __init__(self, classes, class_agnostic, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign( (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0 ) self.grid_size = ( cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE ) # self.RCNN_roi_crop = _RoICrop() self.RCNN_imageDA = _ImageDA(self.dout_base_model, self.n_classes) self.RCNN_instanceDA = _InstanceDA(in_channel) self.consistency_loss = torch.nn.MSELoss(size_average=False) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # projection MLP , zf self.s_l1 = nn.Linear(self.dout_base_model, self.dout_base_model) self.s_l2 = nn.Linear(self.dout_base_model, 128) self.s_nt_xent_criterion = NTXentLoss(batch_size=self.n_classes-1, temperature=0.5, use_cosine_similarity=True) self.contra_softmax = nn.Softmax(dim=1)
def __init__(self, classes, class_agnostic, lc, gc): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = (cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.match_net = match_block(self.dout_base_model) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def __init__(self, classes, class_agnostic, model_type): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.model_type = model_type print(self.classes) self.class_agnostic = class_agnostic conv_nd = nn.Conv2d if self.model_type in ["match_net"]: self.match_net = match_block(self.dout_base_model) if self.model_type == "attention": self.attention_net = attention(self.dout_base_model) self.projection = conv_nd(in_channels=1024 * 2, out_channels=1024, kernel_size=1, stride=1, padding=0, bias=False) nn.init.xavier_uniform_(self.projection.weight) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0, query_blobs_in=None): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoICrop', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] query_bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy(rpn_ret[bl_rois])).cuda(device_id) batch_idxs = rois[:, 0].long() if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron #xform_out = RoIPoolFunction(resolution, resolution, sc)(bl_in, rois) xform_out = ROIPool((resolution, resolution), sc)(bl_in, rois) #elif method == 'RoICrop': # Warning!: Not check if implementation matches Detectron # grid_xy = net_utils.affine_grid_gen( # rois, bl_in.size()[2:], self.grid_size) # grid_yx = torch.stack( # [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() # xform_out = RoICropFunction()(bl_in, Variable(grid_yx).detach()) # if cfg.CROP_RESIZE_WITH_MAX_POOL: # xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = ROIAlign(( resolution, resolution), sc, sampling_ratio)(bl_in, rois) if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_bl_in = query_blobs_in[k_max - lvl] query_bl_in = query_bl_in[batch_idxs] query_bl_in = F.interpolate(query_bl_in, size=[resolution, resolution], mode="bilinear") query_bl_out_list.append(query_bl_in) bl_out_list.append(xform_out) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled = torch.cat(bl_out_list, dim=0) # Unshuffle to match rois from dataloader device_id = xform_shuffled.get_device() restore_bl = rpn_ret[blob_rois + '_idx_restore_int32'] restore_bl = Variable( torch.from_numpy(restore_bl.astype('int64', copy=False))).cuda(device_id) xform_out = xform_shuffled[restore_bl] if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_xform_shuffled = torch.cat(query_bl_out_list, dim=0) query_xform_out = query_xform_shuffled[restore_bl] return xform_out, query_xform_out else: return xform_out else: # Single feature level # rois: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) device_id = blobs_in.get_device() rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id) batch_idxs = rois[:, 0].long() if method == 'RoIPoolF': xform_out = ROIPool((resolution, resolution), spatial_scale)(blobs_in, rois) #elif method == 'RoICrop': # grid_xy = net_utils.affine_grid_gen(rois, blobs_in.size()[2:], self.grid_size) # grid_yx = torch.stack( # [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() # xform_out = RoICropFunction()(blobs_in, Variable(grid_yx).detach()) # if cfg.CROP_RESIZE_WITH_MAX_POOL: # xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = ROIAlign(( resolution, resolution), spatial_scale, sampling_ratio)(blobs_in, rois) if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_blobs_in = query_blobs_in[batch_idxs] query_blobs_in = F.interpolate(query_blobs_in, size=[resolution, resolution], mode="bilinear") return xform_out, query_blobs_in else: return xform_out
def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy( rpn_ret[bl_rois])).cuda(device_id) if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron xform_out = ROIPool((resolution, resolution), sc)(bl_in, rois) elif method == 'RoIAlign': xform_out = ROIAlign((resolution, resolution), sc, sampling_ratio)(bl_in, rois) bl_out_list.append(xform_out) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled = torch.cat(bl_out_list, dim=0) # Unshuffle to match rois from dataloader device_id = xform_shuffled.get_device() restore_bl = rpn_ret[blob_rois + '_idx_restore_int32'] restore_bl = Variable( torch.from_numpy(restore_bl.astype( 'int64', copy=False))).cuda(device_id) xform_out = xform_shuffled[restore_bl] else: # Single feature level # rois: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) device_id = blobs_in.get_device() rois = Variable(torch.from_numpy( rpn_ret[blob_rois])).cuda(device_id) if method == 'RoIPoolF': xform_out = ROIPool((resolution, resolution), spatial_scale)(blobs_in, rois) elif method == 'RoIAlign': xform_out = ROIAlign((resolution, resolution), spatial_scale, sampling_ratio)(blobs_in, rois) return xform_out
def __init__(self, classes, n_way=2, n_shot=5, g=True, l=True, p=True): super(_reweight_RCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot rcnn head self.global_relation = g self.local_correlation = l self.patch_relation = p self.pool_feat_dim = 1024 self.soft_gamma = 1e1 self.avgpool = nn.AvgPool2d(14, stride=1) self.avgpool_fc = nn.AvgPool2d(7) self.patch_avgpool = nn.AvgPool2d(kernel_size=3, stride=1) dim_in = self.pool_feat_dim if self.global_relation: self.global_fc_1 = nn.Linear(dim_in * 2, dim_in) self.global_fc_2 = nn.Linear(dim_in, dim_in) self.global_cls_score = nn.Linear(dim_in, 2) # nn.Linear(dim_in, 2) init.normal_(self.global_fc_1.weight, std=0.01) init.constant_(self.global_fc_1.bias, 0) init.normal_(self.global_fc_2.weight, std=0.01) init.constant_(self.global_fc_2.bias, 0) init.normal_(self.global_cls_score.weight, std=0.01) init.constant_(self.global_cls_score.bias, 0) if self.local_correlation: self.corr_conv = nn.Conv2d(dim_in, dim_in, 1, padding=0, bias=False) # self.bbox_pred_cor = nn.Linear(dim_in, 4 * 2) self.corr_cls_score = nn.Linear(dim_in, 2) # nn.Linear(dim_in, 2) init.normal_(self.corr_conv.weight, std=0.01) init.normal_(self.corr_cls_score.weight, std=0.01) init.constant_(self.corr_cls_score.bias, 0) if self.patch_relation: self.patch_conv_1 = nn.Conv2d(2 * dim_in, int(dim_in / 4), 1, padding=0, bias=False) self.patch_conv_2 = nn.Conv2d(int(dim_in / 4), int(dim_in / 4), 3, padding=0, bias=False) self.patch_conv_3 = nn.Conv2d(int(dim_in / 4), dim_in, 1, padding=0, bias=False) self.patch_cls_score = nn.Linear(dim_in, 2) init.normal_(self.patch_conv_1.weight, std=0.01) init.normal_(self.patch_conv_2.weight, std=0.01) init.normal_(self.patch_conv_3.weight, std=0.01) init.normal_(self.patch_cls_score.weight, std=0.01) init.constant_(self.patch_cls_score.bias, 0) # few shot settings self.n_way = n_way self.n_shot = n_shot
!git checkout pytorch-1.0 !pip install -r requirements.txt cd lib pwd !python setup.py build develop import sys sys.path.append("/content/faster-rcnn.pytorch/lib") from model.roi_layers import ROIPool roi_pool = ROIPool((2,2), 1) from __future__ import print_function, division import os import torch import pandas as pd from skimage import io, transform import numpy as np import matplotlib.pyplot as plt from torch.utils.data import Dataset, DataLoader from torchvision import transforms, utils import numpy as np import torchvision from torchvision import datasets, models, transforms import matplotlib.pyplot as plt import matplotlib.image as mpimg