def __init__(self, config): super(YoloBody, self).__init__() self.config = config # backbone,从cfg改为直接init self.backbone = darknet53(None) out_filters = self.backbone.layers_out_filters # last_layer0 final_out_filter0 = len( config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"]) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) # embedding1 final_out_filter1 = len( config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') #26, 26, 256 #堆叠前两个 self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) # embedding2 final_out_filter2 = len( config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def __init__(self, config): super(YoloBody, self).__init__() self.config = config # backbone self.backbone = darknet53(None) # 获取darknet结构,保存到 self.backbone out_filters = self.backbone.layers_out_filters # layers_out_filters = [64, 128, 256, 512, 1024] # last_layer0 # 3*(5+num_classes) = 3*(5+20)=3*(4+1+20)=75 final_out_filter0 = len(config["yolo"]["anchors"][0]) * ( 5 + config["yolo"]["classes"] ) # 3*(5+num_classes) = 3*(5+20)=3*(4+1+20)=75 self.last_layer0 = make_last_layers( [512, 1024], out_filters[-1], final_out_filter0) # 7次卷积(5次卷积+2次卷积) # embedding1 75 final_out_filter1 = len(config["yolo"]["anchors"][1]) * ( 5 + config["yolo"]["classes"]) # 75 self.last_layer1_conv = conv2d(512, 256, 1) # 卷积 self.last_layer1_upsample = nn.Upsample( scale_factor=2, mode='nearest') # 上采样,高宽扩张为26x26 # 26,26,256 self.last_layer1 = make_last_layers( [256, 512], out_filters[-2] + 256, final_out_filter1) # 7次卷积(5次卷积+2次卷积) # embedding2 75 final_out_filter2 = len(config["yolo"]["anchors"][2]) * ( 5 + config["yolo"]["classes"]) # 75 self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 52,52,128 self.last_layer2 = make_last_layers( [128, 256], out_filters[-3] + 128, final_out_filter2) # 堆叠完成后, 7次卷积(5次卷积+2次卷积)
def __init__(self, config): super(YoloBody, self).__init__() self.config = config #---------------------------------------------------# # 生成darknet53的主干模型 # 获得三个有效特征层,他们的shape分别是: # 52,52,256 # 26,26,512 # 13,13,1024 #---------------------------------------------------# self.backbone = darknet53(None) # out_filters : [64, 128, 256, 512, 1024] out_filters = self.backbone.layers_out_filters #------------------------------------------------------------------------# # 计算yolo_head的输出通道数,对于voc数据集而言 # final_out_filter0 = final_out_filter1 = final_out_filter2 = 75 #------------------------------------------------------------------------# final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"]) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def __init__(self, config): super(YoloBody, self).__init__() self.config = config # backbone self.backbone = darknet53(None) #获取darknext53 的结构,保存在self.backbone out_filters = self.backbone.layers_out_filters # last_layer0 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75 final_out_filter0 = len( config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"]) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) # embedding1 75 final_out_filter1 = len( config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 获得一个26*26*256的特征层 self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) # embedding2 75 final_out_filter2 = len( config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 52*52*128 self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
def __init__(self, anchor, num_classes): """ YOLOv3 网络结构初始化\n """ super(YoloBody, self).__init__() #---------------------------------------------------# # 生成 darknet53的主干模型。 # 从 darknet53网络输出中,获得三个有效特征层。 # 他们的 shape分别是: # 52,52,256 # 26,26,512 # 13,13,1024 #---------------------------------------------------# self.backbone = darknet53(None) # 默认不装载任何预训练模型 # backbone_out_channels : [64, 128, 256, 512, 1024] backbone_out_channels = self.backbone.layers_out_channels # 特征金字塔采样并输出 # 网络输出:13* 13* final_out_channel0 final_out_channel0 = len(anchor[0]) * ( 5 + num_classes) # 单个 grid cell 最多可侦测 len(anchor[0])个物体 self.final_layer0 = make_last_layers( [512, 1024], backbone_out_channels[-1], final_out_channel0 ) # channels: 1024 --> final_out_channel0; 不需要 concat # 网络输出:26* 26* final_out_channel1 self.layer1_conv = conv2d(512, 256, 1) # 1* 1 Conv, 13* 13* 512 --> 13* 13* 256 self.layer1_upsample = nn.Upsample( scale_factor=2, mode='nearest') # 上采样:13* 13* 256 --> 26* 26* 256 final_out_channel1 = len(anchor[1]) * (5 + num_classes) self.final_layer1 = make_last_layers( [256, 512], backbone_out_channels[-2] + 256, final_out_channel1 ) # channels: 768 --> final_out_channel1; 768 是经过 concat后的尺寸 # 网络输出:52* 52* final_out_channel2 self.layer2_conv = conv2d(256, 128, 1) self.layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') final_out_channel2 = len(anchor[2]) * (5 + num_classes) self.final_layer2 = make_last_layers( [128, 256], backbone_out_channels[-3] + 128, final_out_channel2 ) # channels: 384 --> final_out_channel2; 384 是经过 concat后的尺寸
def __init__(self, anchors_mask, num_classes, pretrained=False): super(YoloBody, self).__init__() #---------------------------------------------------# # 生成darknet53的主干模型 # 获得三个有效特征层,他们的shape分别是: # 52,52,256 # 26,26,512 # 13,13,1024 #---------------------------------------------------# self.backbone = darknet53() if pretrained: self.backbone.load_state_dict( torch.load("model_data/darknet53_backbone_weights.pth")) #---------------------------------------------------# # out_filters : [64, 128, 256, 512, 1024] #---------------------------------------------------# out_filters = self.backbone.layers_out_filters #------------------------------------------------------------------------# # 计算yolo_head的输出通道数,对于voc数据集而言 # final_out_filter0 = final_out_filter1 = final_out_filter2 = 75 #------------------------------------------------------------------------# self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], len(anchors_mask[0]) * (num_classes + 5)) self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, len(anchors_mask[1]) * (num_classes + 5)) self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, len(anchors_mask[2]) * (num_classes + 5))
def __init__(self, config): super(YoloBody, self).__init__() self.config = config # backbone self.backbone = darknet53( None) # 将darknet.py中获得的主干网络的结构保存在.backbone属性中。 out_filters = self.backbone.layers_out_filters # last_layer0 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75 这部分是处理out5的特征层 final_out_filter0 = len(config["yolo"]["anchors"][0]) * ( 5 + config["yolo"]["classes"]) # final_out_filter0就是75 是特征图的参数 self.last_layer0 = make_last_layers( [512, 1024], out_filters[-1], final_out_filter0) # make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测 # embedding1 75 这部分是处理out4的特征层 final_out_filter1 = len( config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) self.last_layer1_conv = conv2d(512, 256, 1) # 用1*1的卷积调整通道数 self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 第一次上采样 # 此处已经获得26,26,256的特征层 self.last_layer1 = make_last_layers( [256, 512], out_filters[-2] + 256, final_out_filter1 ) # 在前向传播时两个尺度的特征层进行了堆叠。make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测 # embedding2 75 这部分是处理out3的特征层 final_out_filter2 = len( config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) self.last_layer2_conv = conv2d(256, 128, 1) # 1*1卷积调整通道数 self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # 第二次上采样 # 此处已经获得52,52,128的特征层 self.last_layer2 = make_last_layers( [128, 256], out_filters[-3] + 128, final_out_filter2 ) # 在前向传播时两个尺度的特征层进行了堆叠。make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测