def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() # 在reshaped image的尺度上,以feature map上一个点对应的一个16*16的左上角点为原点,计算得到的所有anchorbox的角点的相对坐标 # 为了后边计算reshaped image上的所有anchor box做准备 self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator( self, **proposal_creator_params ) # parent_model = instance of RegionProposalNetwork, and use other default parameters n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=1, padding=1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, kernel_size=1, stride=1, padding=0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, kernel_size=1, stride=1, padding=0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], # the anchor_scales are the edge length, not the area of the anchor. In order to get the # area of the anchor, take the square of it (8^2, 16^2, 32^2). anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] # first conv layer before send into RPN self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) # the 1 by 1 conv layer of score self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # the 1 by 1 conv layer of bbox regression self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base( #生成anchor_base anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride #下采样倍数 self.proposal_layer = ProposalCreator(self, **proposal_creator_params) #实例化生成roi函数 n_anchor = self.anchor_base.shape[0] #anchor_base的数量 self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) #3x3卷积核 self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) #rpn分类层 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) #rpn回归层 normal_init(self.conv1, 0, 0.01) #初始化3x3卷积核参数 normal_init(self.score, 0, 0.01) #初始化rpn分类层参数 normal_init(self.loc, 0, 0.01) #初始化rpn回归层参数
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base( anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5,1,2], anchor_scales=[8,16,32], feat_stride=16, proposal_creator_params=dict()): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales,ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) num_anchor_base = self.anchor_base.shape[0] #9 self.conv1 = torch.nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = torch.nn.Conv2d(mid_channels, num_anchor_base*2, 1, 1, 0) #二分类,obj or nobj self.loc = torch.nn.Conv2d(mid_channels, num_anchor_base*4, 1, 1, 0) #坐标回归 normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() # anchor_baseは(9,4)のndarray。 anchor_scales * rations = 3 * 3なので9。x,y,w,hで4 self.anchor_base = generate_anchor_base( anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) # これはなんだろうなぁ。 n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # 2k 論文でいうところのcls_layer self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) # 4k 論文でいうところのreg_layer normal_init(self.conv1, 0, 0.01) # 初期化 normal_init(self.score, 0, 0.01) # 初期化 normal_init(self.loc, 0, 0.01) # 初期化
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(fixed_RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base( anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.act_quant = qt.activation_quantization(8, qt.Quant.linear) self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base( anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] # 9 self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) # 首先是加pad的512个3*3大小卷积核,输出仍为(N,512,h,w) #然后左右两边各有一个1 * 1卷积。 # 左路为18个1 * 1卷积,输出为(N,18,h,w),即所有anchor的0 - 1类别概率(h * w约为2400,h * w * 9约为20000)。 # 右路为36个1 * 1卷积,输出为(N,36,h,w),即所有anchor的回归位置参数。 self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # 18,分类得分 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) # 36,回归参数 normal_init(self.conv1, 0, 0.01) # 归一初始化 normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base( # 创建 9 个锚框的 以 cell 为中心的相对坐标(9,4) anchor_scales=anchor_scales, ratios=ratios) n_anchor = self.anchor_base.shape[0] # 每个点对应着 9 个锚框 self.feat_stride = feat_stride # 缩小后是原图的 1/16 self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # 前景后景特征提取 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) # 回归特征提取 self.proposal_layer = ProposalCreator( self, **proposal_creator_params) # 输出2000个roi normal_init(self.conv1, 0, 0.01) # 归一化处理 normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__(self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict()): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride #TODO:为什么要传入这个self变量 self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] #9 self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) #卷积核3*3 self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) #卷积核 1*1,是不是物体二分类 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) #卷积核1*1,回归框的4个坐标 #TODO:为什么要这样初始化,以及bais要置零 normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__(self, feature_channels=512, mid_channels=512, ratios=OPT.ratios, anchor_scales=OPT.anchor_scales, sub_sample=OPT.sub_sample, proposal_creator_params=dict()): super(RegionProposalNetwork, self).__init__() # 第一个锚点处生成的锚点框 self.anchor_base = generate_anchor_base(ratios=ratios, anchor_scales=anchor_scales) self.sub_sample = sub_sample # 下采样的倍数,由特征提取网络决定 self.proposal_layer = ProposalCreator(self, **proposal_creator_params) # 一个锚点处的锚点框的个数 n_anchor = self.anchor_base.shape[0] # rpn网络中的第一个卷积层 self.conv1 = nn.Conv2d(in_channels=feature_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1) # rpn网络背景和前景的分类层 self.score = nn.Conv2d(in_channels=mid_channels, out_channels=n_anchor * 2, kernel_size=1, stride=1, padding=0) # rpn网络预测的锚点框偏移量 self.loc = nn.Conv2d(in_channels=mid_channels, out_channels=n_anchor * 4, kernel_size=1, stride=1, padding=0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) # Head layer of RPN 3*3*512 self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # classification layer 1*1*18 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) # regression layer 1*1*36 normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) # ([1*18*50*50]) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() # 生成面积为[128,256,512],比例为[0.5,1,2]的9个base anchor,[9,4] self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride # proposal_layer用于生成ROI self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, proposal_creator_params=dict(), ): super(RegionProposalNetwork, self).__init__() # 生成一个(R, 4),R = len(anchor_scales)*len(ratios)=9,[y_min, x_min, y_max, x_max] self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(self, **proposal_creator_params) n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # 2为背景和前景 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) # 4为坐标 normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
shift_y = torch.arange(0, height * feat_stride, feat_stride) shift_x = torch.arange(0, width * feat_stride, feat_stride) shift_x, shift_y = np.meshgrid(shift_x, shift_y) #产生x,y坐标网格 shift = np.stack( (shift_y.ravel(), shift_x.ravel(), shift_y.ravel(), shift_x.ravel()), axis=1) #产生坐标偏移矩阵(w*h, 4) A = anchor_base.shape[0] #特征图上每一个点产生anchor数目,9 K = shift.shape[0] #坐标偏移矩阵行数(即特征图的像素点个数, w*h) #(1, A ,4) + (K, 1, 4) = (K, A, 4) anchor = anchor_base.reshape(1, A, 4) + shift.reshape((1, K, 4)).transpose( (1, 0, 2)) anchor = anchor.reshape((K * A, 4)).astype(np.float32) #修改尺寸为(K * A, 4) return anchor if __name__ == '__main__': anchor_base = generate_anchor_base(anchor_scales=[8], ratios=[0.5, 1, 2]) print(anchor_base) feat_stride = [4, 8, 16, 32, 64] height = [200, 100, 50, 25, 12] width = [200, 100, 50, 25, 12] all_anchor = list() for i in range(5): anchor = _enumerate_shifted_anchor(anchor_base, feat_stride[i], height[i], width[i]) all_anchor.append(anchor) all_anchor = np.concatenate(all_anchor, axis=0) print(all_anchor)
order = score.ravel().argsort()[::-1] #分数从大到小排列 if n_pre_nms > 0: order = order[:n_pre_nms] #train时从20000中取前12000个rois,test取前6000个 roi = roi[order, :] #使用nms过一遍排序后的roi keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)), thresh=self.nms_thresh) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep] return roi if __name__ == '__main__': from model.region_proposal_network import _enumerate_shifted_anchor from model.utils.bbox_tools import generate_anchor_base anchor_base = generate_anchor_base() anchor = _enumerate_shifted_anchor(anchor_base, 16, 60, 35) bbox = np.array([[100, 100, 200, 200], [400, 400, 650, 500]]) anchor_target = AnchorTargetCreator() loc, label = anchor_target.__call__(bbox, anchor, (960, 560)) roi = np.zeros((2000, 4), dtype=float) label = np.zeros((2000, ), dtype=int) proposal = ProposalTargetCreator() sample_roi, gt_roi_loc, gt_roi_label = proposal.__call__(roi, bbox, label)
def forward(self, feature_maps, img_size, scale=1.): feature_maps_num = len(feature_maps) all_anchors = list() all_rois = list() all_roi_indices = list() all_rpn_locs = [] all_rpn_fg_scores = [] all_rpn_scores = [] for i in range(feature_maps_num): batch_size, _, hh, ww = feature_maps[ i].shape # x为feature map, n为batch_size,此版本代码为1. _为512, hh, ww即为特征图宽高 if i == 0: anchor_base = generate_anchor_base(anchor_scales=[4], ratios=self.ratios) if i == 1: anchor_base = generate_anchor_base(anchor_scales=[8], ratios=self.ratios) if i == 2: anchor_base = generate_anchor_base(anchor_scales=[16], ratios=self.ratios) if i == 3: anchor_base = generate_anchor_base(anchor_scales=[32], ratios=self.ratios) anchor = _enumerate_shifted_anchor(np.array(anchor_base), self.feat_stride[i], hh, ww) all_anchors.append(anchor) num_anchor = anchor.shape[0] // (hh * ww) # h = functional.relu(self.conv1(feature_maps[i]), inplace=True) #(batch_size, 512, hh, ww) rpn_locs = self.loc(h) #(batch_size, 9*4, hh, ww) rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view( batch_size, -1, 4) #转换为(batch_size,hh, ww, 9*4)在转换为(batch_size, hh*ww*9, 4) rpn_scores = self.score(h) rpn_scores = rpn_scores.permute( 0, 2, 3, 1).contiguous() #转换为(batch_size,hh, ww, 9*2) rpn_softmax_scores = functional.softmax(rpn_scores.view( batch_size, hh, ww, num_anchor, 2), dim=4) #TODO 维度问题 rpn_fg_scores = rpn_softmax_scores[:, :, :, :, 1].contiguous() #得到前景的分类概率 rpn_fg_scores = rpn_fg_scores.view(batch_size, -1) #得到所有anchor的前景分类概率 rpn_scores = rpn_scores.view(batch_size, -1, 2) all_rpn_locs.append(rpn_locs) all_rpn_fg_scores.append(rpn_fg_scores) all_rpn_scores.append(rpn_scores) all_rpn_locs = torch.cat(all_rpn_locs, 1) all_rpn_fg_scores = torch.cat(all_rpn_fg_scores, 1) all_rpn_scores = torch.cat(all_rpn_scores, 1) all_anchors = np.concatenate(all_anchors, axis=0) for i in range(batch_size): roi = self.proposal_layer(all_rpn_locs[i].cpu().data.numpy(), all_rpn_fg_scores[i].cpu().data.numpy(), all_anchors, img_size, scale=scale) #rpn_locs维度(hh * ww * 9,4),rpn_fg_scores维度为(hh * ww * 9), #anchor的维度为(hh * ww * 9,4), img_size的维度为(3,H,W),H和W是经过数据预处理后的。 #计算(H / 16)x( W / 16)x9(大概20000) #个anchor属于前景的概率,取前12000个并经过NMS得到2000个近似目标框G ^ 的坐标。roi的维度为(2000, 4) batch_index = i * np.ones( (len(roi), ), dtype=np.int32) #(len(roi), ) all_rois.append(roi) all_roi_indices.append(batch_index) #记录roi的batch批次 all_rois = np.concatenate( all_rois, axis=0) #按列排所有的roi, rois格式(R, 4),R为所有batch的roi数量 all_roi_indices = np.concatenate(all_roi_indices, axis=0) #按列排所有roi的批次编号,格式同rois # rpn_locs的维度(hh*ww*9,4),rpn_scores维度为(hh*ww*9,2), # rois的维度为(2000,4),roi_indices用不到(因为此代码训练时batch为1),anchor的维度为(hh*ww*9,4) return all_rpn_locs, all_rpn_scores, all_rois, all_roi_indices, all_anchors