def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, mode="training", ): super(RegionProposalNetwork, self).__init__() self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) # 步长,压缩的倍数 self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(mode) # 每一个网格上默认先验框的数量 n_anchor = self.anchor_base.shape[0] # 先进行一个3x3的卷积 self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) # 分类预测先验框内部是否包含物体 self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) # 回归预测对先验框进行调整 self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, mode = "training", ): super(RegionProposalNetwork, self).__init__() self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(mode) #-----------------------------------------# # 生成基础先验框,shape为[9, 4] #-----------------------------------------# self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios) n_anchor = self.anchor_base.shape[0] #-----------------------------------------# # 先进行一个3x3的卷积,可理解为特征整合 #-----------------------------------------# self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) #-----------------------------------------# # 分类预测先验框内部是否包含物体 #-----------------------------------------# self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) #-----------------------------------------# # 回归预测对先验框进行调整 #-----------------------------------------# self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) #--------------------------------------# # 对FPN的网络部分进行权值初始化 #--------------------------------------# normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__(self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], scales=[0.5, 1, 2], feat_stride=16): super(RegionProposalNetwork, self).__init__() # prepare anchor base self.anchor_base = generate_anchor_base(side_length=16, ratios=ratios, scales=scales, strides=feat_stride) self.feat_stride = feat_stride # network params n_anchor = self.anchor_base.shape[0] self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1) self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0) self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0) normal_init(self.conv1, 0, 0.01) normal_init(self.score, 0, 0.01) normal_init(self.loc, 0, 0.01)
def __init__(self, in_channels=256, mid_channels=256, ratios=[0.5, 1., 2.], anchor_scales=[8, 16, 32], feat_stride=16, mode="training"): super(RPN, self).__init__() self.base_anchor = generate_anchor_base(ratios=ratios, anchor_scales=anchor_scales) self.K = self.base_anchor.shape[ 0] # number of anchors in a anchor base self.feat_stride = feat_stride self.proposal_layer = region_proposal(mode) # layers after conv5 self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=3, stride=1, padding=1) self.cls = nn.Conv2d(in_channels=mid_channels, out_channels=self.K * 2, kernel_size=1, stride=1, padding=0) self.reg = nn.Conv2d(in_channels=mid_channels, out_channels=self.K * 4, kernel_size=1, stride=1, padding=0) # channels inistalization normal_init(self.conv1, 0, 0.01) normal_init(self.cls, 0, 0.01) normal_init(self.reg, 0, 0.01)
batch_size = 1 start_epoch, max_epoch = 0, 30 data_root = cfg_net['data_path'] train_file = os.path.join(cfg_net['train_data_path'], "VOC2007_train.txt") train_transform = Compose([ToTensor(), RandomHorizontalFlip(0.5)]) # step 1: data train_set = datasets.VOC2007Dataset(data_root, vocfile=train_file, transforms=train_transform) train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_fn) base_size = 16 ratios = [0.5, 1, 2] anchor_scales = [8, 16, 32] anchors_gen = Anchors.AnchorTargetCreator() anchor_base = Anchors.generate_anchor_base(base_size=base_size, anchor_scales=anchor_scales, ratios=ratios) net = mobilenet_v2(num_classes=2, width_mult=0.35, inverted_residual_setting=None, round_nearest=8).to(device) rpn_net = RPN(1280, 512, 9).to(device) # inputs = torch.zeros((2, 3, 128, 128)) # cls = net(inputs) # feature_map = net.features(inputs) # rpn_class, rpn_prob, rpn_bbox = rpn_net(feature_map) classify_loss = rpn_loss.ClassifyLoss() for data in train_loader: images, targets = data images_size = list(images.shape[2:])