Exemplo n.º 1
0
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
        #  backbone,从cfg改为直接init
        self.backbone = darknet53(None)

        out_filters = self.backbone.layers_out_filters
        #  last_layer0
        final_out_filter0 = len(
            config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1],
                                            final_out_filter0)

        #  embedding1
        final_out_filter1 = len(
            config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2,
                                                mode='nearest')  #26, 26, 256
        #堆叠前两个
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256,
                                            final_out_filter1)

        #  embedding2
        final_out_filter2 = len(
            config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128,
                                            final_out_filter2)
Exemplo n.º 2
0
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
        #  backbone
        self.backbone = darknet53(None)  # 获取darknet结构,保存到 self.backbone

        out_filters = self.backbone.layers_out_filters  # layers_out_filters = [64, 128, 256, 512, 1024]
        #  last_layer0  # 3*(5+num_classes) = 3*(5+20)=3*(4+1+20)=75
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (
            5 + config["yolo"]["classes"]
        )  # 3*(5+num_classes) = 3*(5+20)=3*(4+1+20)=75
        self.last_layer0 = make_last_layers(
            [512, 1024], out_filters[-1], final_out_filter0)  # 7次卷积(5次卷积+2次卷积)

        #  embedding1  75
        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (
            5 + config["yolo"]["classes"])  # 75
        self.last_layer1_conv = conv2d(512, 256, 1)  # 卷积
        self.last_layer1_upsample = nn.Upsample(
            scale_factor=2, mode='nearest')  # 上采样,高宽扩张为26x26
        # 26,26,256
        self.last_layer1 = make_last_layers(
            [256, 512], out_filters[-2] + 256,
            final_out_filter1)  # 7次卷积(5次卷积+2次卷积)

        #  embedding2  75
        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (
            5 + config["yolo"]["classes"])  # 75
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        # 52,52,128
        self.last_layer2 = make_last_layers(
            [128, 256], out_filters[-3] + 128,
            final_out_filter2)  # 堆叠完成后, 7次卷积(5次卷积+2次卷积)
Exemplo n.º 3
0
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
        #---------------------------------------------------#   
        #   生成darknet53的主干模型
        #   获得三个有效特征层,他们的shape分别是:
        #   52,52,256
        #   26,26,512
        #   13,13,1024
        #---------------------------------------------------#
        self.backbone = darknet53(None)

        # out_filters : [64, 128, 256, 512, 1024]
        out_filters = self.backbone.layers_out_filters

        #------------------------------------------------------------------------#
        #   计算yolo_head的输出通道数,对于voc数据集而言
        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
        #------------------------------------------------------------------------#
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)

        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)

        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
Exemplo n.º 4
0
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
        #  backbone
        self.backbone = darknet53(None)  #获取darknext53 的结构,保存在self.backbone

        out_filters = self.backbone.layers_out_filters
        #  last_layer0 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75
        final_out_filter0 = len(
            config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1],
                                            final_out_filter0)

        #  embedding1  75
        final_out_filter1 = len(
            config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        # 获得一个26*26*256的特征层
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256,
                                            final_out_filter1)

        #  embedding2  75
        final_out_filter2 = len(
            config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        # 52*52*128
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128,
                                            final_out_filter2)
Exemplo n.º 5
0
    def __init__(self, anchor, num_classes):
        """ 
        YOLOv3 网络结构初始化\n
        """
        super(YoloBody, self).__init__()
        #---------------------------------------------------#
        #   生成 darknet53的主干模型。
        #   从 darknet53网络输出中,获得三个有效特征层。
        #   他们的 shape分别是:
        #   52,52,256
        #   26,26,512
        #   13,13,1024
        #---------------------------------------------------#
        self.backbone = darknet53(None)  # 默认不装载任何预训练模型

        # backbone_out_channels : [64, 128, 256, 512, 1024]
        backbone_out_channels = self.backbone.layers_out_channels

        # 特征金字塔采样并输出
        # 网络输出:13* 13* final_out_channel0
        final_out_channel0 = len(anchor[0]) * (
            5 + num_classes)  # 单个 grid cell 最多可侦测 len(anchor[0])个物体
        self.final_layer0 = make_last_layers(
            [512, 1024], backbone_out_channels[-1], final_out_channel0
        )  # channels: 1024 --> final_out_channel0; 不需要 concat

        # 网络输出:26* 26* final_out_channel1
        self.layer1_conv = conv2d(512, 256,
                                  1)  # 1* 1 Conv, 13* 13* 512 --> 13* 13* 256
        self.layer1_upsample = nn.Upsample(
            scale_factor=2, mode='nearest')  # 上采样:13* 13* 256 --> 26* 26* 256
        final_out_channel1 = len(anchor[1]) * (5 + num_classes)
        self.final_layer1 = make_last_layers(
            [256, 512], backbone_out_channels[-2] + 256, final_out_channel1
        )  # channels: 768 --> final_out_channel1; 768 是经过 concat后的尺寸

        # 网络输出:52* 52* final_out_channel2
        self.layer2_conv = conv2d(256, 128, 1)
        self.layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        final_out_channel2 = len(anchor[2]) * (5 + num_classes)
        self.final_layer2 = make_last_layers(
            [128, 256], backbone_out_channels[-3] + 128, final_out_channel2
        )  # channels: 384 --> final_out_channel2; 384 是经过 concat后的尺寸
Exemplo n.º 6
0
    def __init__(self, anchors_mask, num_classes, pretrained=False):
        super(YoloBody, self).__init__()
        #---------------------------------------------------#
        #   生成darknet53的主干模型
        #   获得三个有效特征层,他们的shape分别是:
        #   52,52,256
        #   26,26,512
        #   13,13,1024
        #---------------------------------------------------#
        self.backbone = darknet53()
        if pretrained:
            self.backbone.load_state_dict(
                torch.load("model_data/darknet53_backbone_weights.pth"))

        #---------------------------------------------------#
        #   out_filters : [64, 128, 256, 512, 1024]
        #---------------------------------------------------#
        out_filters = self.backbone.layers_out_filters

        #------------------------------------------------------------------------#
        #   计算yolo_head的输出通道数,对于voc数据集而言
        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
        #------------------------------------------------------------------------#
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1],
                                            len(anchors_mask[0]) *
                                            (num_classes + 5))

        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256,
                                            len(anchors_mask[1]) *
                                            (num_classes + 5))

        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128,
                                            len(anchors_mask[2]) *
                                            (num_classes + 5))
Exemplo n.º 7
0
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
        #  backbone
        self.backbone = darknet53(
            None)  # 将darknet.py中获得的主干网络的结构保存在.backbone属性中。

        out_filters = self.backbone.layers_out_filters
        #  last_layer0 3*(5+num_classes)=3*(5+20)=3*(4+1+20)=75 这部分是处理out5的特征层
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (
            5 + config["yolo"]["classes"])  # final_out_filter0就是75 是特征图的参数
        self.last_layer0 = make_last_layers(
            [512, 1024], out_filters[-1],
            final_out_filter0)  # make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测

        #  embedding1 75 这部分是处理out4的特征层
        final_out_filter1 = len(
            config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)  # 用1*1的卷积调整通道数
        self.last_layer1_upsample = nn.Upsample(scale_factor=2,
                                                mode='nearest')  # 第一次上采样
        # 此处已经获得26,26,256的特征层
        self.last_layer1 = make_last_layers(
            [256, 512], out_filters[-2] + 256, final_out_filter1
        )  # 在前向传播时两个尺度的特征层进行了堆叠。make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测

        #  embedding2 75 这部分是处理out3的特征层
        final_out_filter2 = len(
            config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)  # 1*1卷积调整通道数
        self.last_layer2_upsample = nn.Upsample(scale_factor=2,
                                                mode='nearest')  # 第二次上采样
        # 此处已经获得52,52,128的特征层
        self.last_layer2 = make_last_layers(
            [128, 256], out_filters[-3] + 128, final_out_filter2
        )  # 在前向传播时两个尺度的特征层进行了堆叠。make_last_layers是七次卷积,最后两次卷积是回归预测和分类预测