Пример #1
0
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        print('Loading pretrained weights.')

        model_dict = self.net.state_dict()
        pretrained_dict = torch.load(self.model_path)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
        model_dict.update(pretrained_dict)
        self.net.load_state_dict(model_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finish loading!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
Пример #2
0
    def generate(self):
        
        self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        state_dict = torch.load(self.model_path)
        self.net.load_state_dict(state_dict)
        
        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
    
        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names),  (self.model_image_size[1], self.model_image_size[0])))


        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
Пример #3
0
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        # print('Loading Mobilenet weights into state dict...') # Mobilenet 527
        # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # # state dict for backbound
        # backbone_dict ={key: value for key, value in self.net.state_dict().items() if key.split('.')[0] == 'backbone'}
        # state_dict_yolov4 = torch.load(self.model_path, map_location=device)
        # # state dict for head, SPP, PAN
        # switched_state_dict = {key: value for key, value in state_dict_yolov4.items() if key.split('.')[0] != 'backbone'}
        # state_dict = {**backbone_dict, **switched_state_dict}
        # self.net.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
Пример #4
0
    def generate(self):
        #---------------------------------------------------#
        #   建立yolov4模型
        #---------------------------------------------------#
        self.net = YoloBody(len(self.anchors[0]),
                            len(self.class_names),
                            backbone=self.backbone).eval()

        #---------------------------------------------------#
        #   载入yolov4模型的权重
        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
        print('Finished!')

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        #---------------------------------------------------#
        #   建立三个特征层解码用的工具
        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
Пример #5
0
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
        #替换训练好的backbone
        target_backbone = tail_model_backbone(
            source_model='CSPdarknetGA',
            target_model='CSPDarkNet',
            state_dict_path='D:\\datasets\\saved_model\\prune_baseline.pth',
            device=device)
        self.net.backbone.load_state_dict(target_backbone.state_dict())
        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
Пример #6
0
    def generate(self):
        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        # 放到GPU上跑
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # 加载预训练模型参数
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            # 指定使用哪几块GPU  '0, 1, 2'
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            # 等我有多张卡的时候再说  /(ㄒoㄒ)/~~
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        # 实例化三类size的anchor的DecodeBox并存入列表
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
        print('Loading pretrained weights.')

        model_dict = self.net.state_dict()
        pretrained_dict = torch.load(self.model_path)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
        model_dict.update(pretrained_dict)
        self.net.load_state_dict(model_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finish loading!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(self.anchors[i], len(self.class_names), (self.input_shape[1], self.input_shape[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
Пример #8
0
    #-----------------------------------#
    train_dataset = YoloDataset(lines[:num_train], input_shape, mosaic=mosaic)
    '''
    it = iter(train_dataset)
    for i in range(24):
        ii = next(it)
        a, b = ii[0], ii[1]
        print(i)
        print(a.shape, b.shape)
    '''
    val_dataset = YoloDataset(lines[num_train:], input_shape, mosaic=False)

    #-----------------------------------#
    #   创建模型、加载预训练权重
    #-----------------------------------#
    model = YoloBody(3, num_classes)
    # 加快模型训练的效率
    print('加载预训练权重……')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('加载完成!')

    #-----------------------------------#
Пример #9
0
class YOLO(object):
    _defaults = {
        #"model_path": 'model_data/yolo4_weights.pth',
        "model_path": 'logs/Epoch14-Total_Loss16.0980-Val_Loss0.0000.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        #"classes_path": 'model_data/coco_classes.txt',
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.1,
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        state_dict = torch.load(self.model_path)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            # top = top - 250
            # left = left - 250
            # bottom = bottom + 250
            # right = right + 250
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5
            # 从左上角开始 剪切 200*200的图片
            img2 = image.crop((left, top, right, bottom))
            img2.save("lena2.jpg")
            top = max(0, np.floor(top).astype('int32'))
            left = max(0, np.floor(left).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right).astype('int32'))

            # 画框框
            # 画框框
            if predicted_class == 'person':
                predicted_class_ch = "Ren"
            elif predicted_class == 'chair':
                predicted_class_ch = "椅子"
            elif predicted_class == 'clock':
                predicted_class_ch = "钟"
            elif predicted_class == 'tie':
                predicted_class_ch = "厂牌吗??"
            elif predicted_class == 'cell phone':
                predicted_class_ch = "手机"
            elif predicted_class == 'laptop':
                predicted_class_ch = "笔记本电脑"
            elif predicted_class == 'QR':
                predicted_class_ch = "2维码"
            else:
                predicted_class_ch = "单号"
            label = '{} {} {:.2f} {}'.format(predicted_class_ch, '置信度', score,
                                             '%')
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 3])

            for i in range(thickness):
                draw.rectangle(  #边框
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[c])
            #draw.rectangle(
            #  [tuple(text_origin), tuple(text_origin)+ label_size],
            #  fill=self.colors[c])Y1909170500-F1-1568720302878.jpg 不行
            # 绘制文本E:\发货单\截图20200727212747.png
            # font = ImageFont.truetype("consola.ttf", 40, encoding="unic")  # 设置字体
            # draw.text((100, 50), u'Hello World', 'fuchsia', font)

            #draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            #del draw
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[c])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image
Пример #10
0
class YOLO(object):
    _defaults = {
        "model_path": './logs/best110.pth',
        "anchors_path": './data/anchors/yolov4_anchors.txt',
        "classes_path": './data/classes/yolov4_classes.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.6,
        "cuda": False
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化YOLO
    # ---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # ---------------------------------------------------#
    #   获得所有的先验框
    # ---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0  # 归一化
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return []

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        l = []
        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))
            l.append([left, top, right, bottom, score, predicted_class])

        return l
import torchvision.models as models
from torchvision import transforms as transform
from nets.yolo4 import YoloBody
import torch
import cv2
import numpy as np
import os

model = YoloBody(num_anchors=3, num_classes=2)
model.load_state_dict(
    torch.load(r"logs/Epoch60.pth", map_location=torch.device('cuda')))
model.eval()
model.to(device='cuda')
sample = torch.ones(1, 3, 608, 608).to(device='cuda')
traced_script_module = torch.jit.trace(model, sample)
traced_script_module.save("/home/curious/code/yolo4-cpp/111.pt")
Пример #12
0
class YOLO(object):
    _defaults = {
        "model_path": 'model_data/yolo4_weights.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/coco_classes.txt',
        "model_image_size" : (416, 416, 3),
        "confidence": 0.5,
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()
    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names
    
    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        
        self.net = YoloBody(len(self.anchors[0]),len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        state_dict = torch.load(self.model_path)
        self.net.load_state_dict(state_dict)
        
        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
    
        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(DecodeBox(self.anchors[i], len(self.class_names),  (self.model_image_size[1], self.model_image_size[0])))


        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0],self.model_image_size[1])))
        photo = np.array(crop_img,dtype = np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)
            
        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                                conf_thres=self.confidence,
                                                nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image
            
        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
        top_label = np.array(batch_detections[top_index,-1],np.int32)
        top_bboxes = np.array(batch_detections[top_index,:4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(predicted_class)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image
Пример #13
0
class YOLO(object):
    _defaults = {
        "model_path":
        'model_data/Epoch102-Total_Loss11.0130-Val_Loss8.8086.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/helmet_classes.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.5,
        "iou": 0.3,
        "cuda": False,
        #---------------------------------------------------------------------#
        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize,
        #   在多次测试后,发现关闭letterbox_image直接resize的效果更好
        #---------------------------------------------------------------------#
        "letterbox_image": False,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   生成模型
    #---------------------------------------------------#
    def generate(self):
        #---------------------------------------------------#
        #   建立yolov4模型
        #---------------------------------------------------#
        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        #---------------------------------------------------#
        #   载入yolov4模型的权重
        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
        print('Finished!')

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        #---------------------------------------------------#
        #   建立三个特征层解码用的工具
        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(
                letterbox_image(
                    image,
                    (self.model_image_size[1], self.model_image_size[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize(
                (self.model_image_size[1], self.model_image_size[0]),
                Image.BICUBIC)
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

        return output_list
    #------------------------------------------------------#
    #   Yolov4的tricks应用
    #   mosaic 马赛克数据增强 True or False
    #   Cosine_scheduler 余弦退火学习率 True or False
    #   label_smoothing 标签平滑 0.01以下一般 如0.01、0.005
    #------------------------------------------------------#
    mosaic = False
    Cosine_lr = False
    smoooth_label = 0

    #------------------------------------------------------#
    #   创建yolo模型
    #   训练前一定要修改classes_path和对应的txt文件
    #------------------------------------------------------#
    model = YoloBody(len(anchors[0]),
                     num_classes,
                     backbone=backbone,
                     pretrained=pretrained)

    #------------------------------------------------------#
    #   权值文件请看README,百度网盘下载
    #------------------------------------------------------#
    model_path = "model_data/yolov4_mobilenet_v1_map76.62.pth"
    # 加快模型训练的效率
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)
Пример #15
0
    mosaic = False
    Cosine_lr = False
    smoooth_label = 0

    #----------------------------------------------------#
    #   获取classes和anchor
    #----------------------------------------------------#
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)

    #------------------------------------------------------#
    #   创建yolo模型
    #   训练前一定要修改classes_path和对应的txt文件
    #------------------------------------------------------#
    model = YoloBody(len(anchors[0]), num_classes)
    weights_init(model)

    #------------------------------------------------------#
    #   权值文件请看README,百度网盘下载
    #------------------------------------------------------#
    model_path = "model_data/yolo4_weights.pth"
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')
Пример #16
0
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True

    annotation_path = '2007_train.txt'
    #-------------------------------#
    #   获得先验框和类
    #-------------------------------#
    anchors_path = 'model_data/plane_anchors.txt'
    classes_path = 'model_data/dota2cls.txt'
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)

    # 创建模型
    model = YoloBody(len(anchors[0]), num_classes)
    #print(model)
    # model_path = "model_data/yolo4_weights.pth"
    # # 加快模型训练的效率
    # print('Loading weights into state dict...')
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # model_dict = model.state_dict()
    # pretrained_dict = torch.load(model_path, map_location=device)
    # pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    # model_dict.update(pretrained_dict)
    # model.load_state_dict(model_dict)
    # print('Finished!')

    net = model.train()
    if Cuda:
        net = torch.nn.DataParallel(model)
Пример #17
0
class YOLO(object):
    _defaults = {
        "model_path": 'logs/Epoch100-Train_loss6.1517-Val_Loss4.5064.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/voc_class.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.5,
        "iou": 0.3,
        "cuda": True
    }

    # classmethod 修饰符对应的函数不需要实例化即可调用
    # 使用特殊参数cls而非self
    @classmethod
    def get_defaults(cls, attribute_name):
        if attribute_name in cls._defaults:
            return cls._defaults[attribute_name]
        else:
            return '没有定义的属性:' + attribute_name

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        # 类的静态函数、类函数、普通函数、全局变量以及一些内置的属性都是放在类__dict__里的
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        # 放到GPU上跑
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        # 加载预训练模型参数
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            # 指定使用哪几块GPU  '0, 1, 2'
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            # 等我有多张卡的时候再说  /(ㄒoㄒ)/~~
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        # 实例化三类size的anchor的DecodeBox并存入列表
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        # 更改格式为float32
        photo = np.array(crop_img, dtype=np.float32)
        # 像素值压缩到0-1之间
        photo /= 255.0
        # 读进来得图片是H*W*C,输入网络时要求C*H*W,因此在这里变换通道
        photo = np.transpose(photo, (2, 0, 1))
        # photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            # 把numpy格式的像素数组转为tensor
            images = torch.from_numpy(images)
            if self.cuda:
                # 把数据放到GPU上
                images = images.cuda()
            # YoloBody得到预测结果
            # self.net == self.net.forword(images)
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            # 用第i个DecodeBox来处理第i个output
            output_list.append(self.yolo_decodes[i](outputs[i]))

        # 将13、26、52的output拼接到一起  bs * 10647 * [4+1+num_classes]
        output = torch.cat(output_list, 1)

        # 使用非极大似然抑制剔除一定区域内的重复框
        # bs * n * [(x1,y1,x2,y2)+obj_conf+class_conf+class_pred]
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=self.iou)

        # 整理检测结果
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        # 根据score再筛选一遍,但是在non_max_suppression已经使用score筛选过了为什么还要筛选呢?
        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        # 根据筛选结果得到符合要求的score、label、bboxes
        top_score = batch_detections[top_index,
                                     4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        # 将(x1,y1,x2,y2)分别扩展至n*1维,n为box总数
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条,得到原图上预测框(y1,x1,y2,x2)坐标(top,left,bottom,right)
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        # 绘制检测结果
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))
        # 矩形框四边线条厚度
        # thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
        thickness = int(
            max(np.ceil(np.shape(image)[0] / self.model_image_size[0]),
                np.ceil(np.shape(image)[1] / self.model_image_size[0]))) + 1

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_score[i]

            top, left, bottom, right = boxes[i]
            # top = top - 5
            # left = left - 5
            # bottom = bottom + 5
            # right = right + 5

            # top = max(0, np.floor(top + 0.5).astype('int32'))
            # left = max(0, np.floor(left + 0.5).astype('int32'))
            # bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            # right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            top = max(0, np.ceil(top).astype('int32'))
            left = max(0, np.ceil(left).astype('int32'))
            bottom = min(np.shape(image)[0], np.ceil(bottom).astype('int32'))
            right = min(np.shape(image)[1], np.ceil(right).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            # 返回使用指定字体对象显示给定字符串所需要的图像尺寸
            label_size = draw.textsize(label, font)
            # label = label.encode('utf-8')
            # print(label)

            # 如果顶部有文本框的空间,文本框放置在预测框左上方的外部
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            # 顶部没有文本框的空间,文本框放置在预测框左上方的内部
            else:
                # text_origin = np.array([left, top + 1])
                text_origin = np.array([left + 1, top + 1])

            # 绘制预测框的空心矩形
            draw.rectangle(
                [left, top, right, bottom],
                outline=self.colors[self.class_names.index(predicted_class)],
                width=thickness)
            # 绘制文本框的实心矩形
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            # 绘制文本框内的文字
            # draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font = font)
            # fill = (0, 0, 0) 文字颜色纯黑
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)

            del draw
        return image
class YOLO4_inference(object):

    # ---------------------------------------------------#
    #   初始化YOLO
    # ---------------------------------------------------#
    def __init__(self, model_path, input_shape=416,confidence=0.5, cuda=True):
        self.class_names = ID2CLASS
        self.anchors = anchors
        self.model_path=model_path
        self.input_shape=(input_shape,input_shape,3)
        self.confidence=confidence
        self.cuda=cuda

        # 画框设置不同的颜色
        self.colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255)]
        self.generate()

    # ---------------------------------------------------#
    #   加载训练好的模型
    # ---------------------------------------------------#
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()
        print('Loading pretrained weights.')

        model_dict = self.net.state_dict()
        pretrained_dict = torch.load(self.model_path)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
        model_dict.update(pretrained_dict)
        self.net.load_state_dict(model_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finish loading!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(self.anchors[i], len(self.class_names), (self.input_shape[1], self.input_shape[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

    def predict(self,image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)

        batch_detections = batch_detections[0].cpu().numpy()

        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), \
                                                 np.expand_dims(top_bboxes[:, 1], -1), \
                                                 np.expand_dims(top_bboxes[:, 2], -1), \
                                                 np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        return boxes,top_label,top_conf
    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.input_shape[0], self.input_shape[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1],
                                                                                                      -1), np.expand_dims(
            top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        font = ImageFont.truetype(font='simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{}: {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle(
                    [left + i, top + i, right - i, bottom - i],
                    outline=self.colors[self.class_names.index(predicted_class)])
            draw.rectangle(
                [tuple(text_origin), tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            del draw
        return image
Пример #19
0
class YOLO(object):
    _defaults = {
        "model_path": 'model_data/yolo4_weights.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/coco_classes.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.5,
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        #print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        #print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        predict = np.zeros(12)
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output,
                                               len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image, predict

        top_index = batch_detections[:,
                                     4] * batch_detections[:,
                                                           5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index,
                                                                     5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        predict = self.calsquare(image, boxes, top_label, predict)

        return image, predict

    def calsquare(self, image, boxes, top_label, res):
        dic = {2: 2, 7: 4, 5: 6, 3: 8, 1: 10}
        w, h = image.size
        top_main = h / 8
        bottom_main = 7 * h / 8
        left_main = w / 8
        right_main = 7 * w / 8
        box_main = [top_main, left_main, bottom_main, right_main]
        square_rate = 0
        square1 = 0
        square2 = 0
        pre = []
        for i, c in enumerate(top_label):
            if c not in dic:
                continue
            predicted_class = self.class_names[c]
            top, left, bottom, right = boxes[i]

            square = (bottom - top) * (right - left)
            if square >= 0.4 * w * h:
                continue
            if bottom > h - 30 and left < 30 and right > w - 30:
                continue
            flag = 0
            for j in range(len(pre)):
                temp = abs(pre[j] - boxes[i])
                if sum(temp) < 20:
                    flag = 1
                    break
            if flag:
                continue
            pre.append(list(boxes[i]))

            if self.mat_inter(box_main, boxes[i]):
                top, left, bottom, right = boxes[i]
                w_con = min(right, right_main) - max(left, left_main)
                h_con = min(bottom, bottom_main) - max(top, top_main)
                square_con = w_con * h_con

                square1 = square1 + square_con
                square2 = square2 + (square - square_con)

                if square_con >= 0.5 * square:
                    idx = dic[c]
                    res[idx] = res[idx] + 1
                else:
                    idx = dic[c] + 1
                    res[idx] = res[idx] + 1
            else:
                square2 = square2 + square
                idx = dic[c] + 1
                res[idx] = res[idx] + 1

        res[0] = square1 / (h * w / 4)
        res[1] = square2 / (3 * h * w / 4)

        return res

    def mat_inter(self, box1, box2):
        # 判断两个矩形是否相交
        y01, x01, y02, x02 = box1
        y11, x11, y12, x12 = box2

        lx = abs((x01 + x02) / 2 - (x11 + x12) / 2)
        ly = abs((y01 + y02) / 2 - (y11 + y12) / 2)
        sax = abs(x01 - x02)
        sbx = abs(x11 - x12)
        say = abs(y01 - y02)
        sby = abs(y11 - y12)
        if lx <= (sax + sbx) / 2 and ly <= (say + sby) / 2:
            return True
        else:
            return False
Пример #20
0
classes_path = 'model_data/coco_classes.txt'

# get classes
classes_path = os.path.expanduser(classes_path)
with open(classes_path) as f:
    class_names = f.readlines()
class_names = [c.strip() for c in class_names]

# get anchors
anchors_path = os.path.expanduser(anchors_path)
with open(anchors_path) as f:
    anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
anchors = np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

net = YoloBody(len(anchors[0]), len(class_names)).eval()
print('Loading weights into state dict...')
is_cuda = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(is_cuda)
state_dict = torch.load(model_path, map_location=device)
net.load_state_dict(state_dict)

if is_cuda == "cuda":
    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    net = net.cuda()

print("model set!")

# -------------- export the model
input_names = ["input_0"]
output_names = ["output_0", "output_1", "output_2"]
Пример #21
0
class YOLO(object):
    _defaults = {
        "model_path": 'model_data/yolov4_mobilenet_v1_map76.62.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/voc_classes.txt',
        "backbone": 'mobilenetv1',
        "model_image_size": (416, 416, 3),
        "confidence": 0.5,
        "iou": 0.3,
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   生成模型
    #---------------------------------------------------#
    def generate(self):
        #---------------------------------------------------#
        #   建立yolov4模型
        #---------------------------------------------------#
        self.net = YoloBody(len(self.anchors[0]),
                            len(self.class_names),
                            backbone=self.backbone).eval()

        #---------------------------------------------------#
        #   载入yolov4模型的权重
        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
        print('Finished!')

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        #---------------------------------------------------#
        #   建立三个特征层解码用的工具
        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]

        with torch.no_grad():
            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
Пример #22
0
#--------------------------------------------#
#   该部分代码只用于看网络结构,并非测试代码
#   map测试请看get_dr_txt.py、get_gt_txt.py
#   和get_map.py
#--------------------------------------------#
import torch
from torchsummary import summary

from nets.CSPdarknet import darknet53
from nets.yolo4 import YoloBody

if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = YoloBody(3, 1).to(device)
    summary(model, input_size=(3, 768, 768))
Пример #23
0
import torch
from torchsummary import summary
from nets.CSPdarknet import darknet53
from nets.yolo4 import YoloBody
import numpy as np

if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    if True:
        model_path = './wheat_yolo.pth'
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model = YoloBody(3, 1).to(device)
        model_dict = model.state_dict()
        pretrained_dict = torch.load(model_path, map_location=device)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if np.shape(model_dict[k]) == np.shape(v)
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)
    summary(model, input_size=(3, 416, 416))
Пример #24
0
import torch
from torchsummary import summary

from nets.yolo4 import YoloBody

if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model   = YoloBody(3,80,backbone="mobilenetv1").to(device)
    summary(model, input_size=(3, 416, 416))
    
    # mobilenetv1-yolov4 40,952,893
    # mobilenetv2-yolov4 39,062,013
    # mobilenetv3-yolov4 39,989,933

    # 修改了panet的mobilenetv1-yolov4 12,692,029
    # 修改了panet的mobilenetv2-yolov4 10,801,149
    # 修改了panet的mobilenetv3-yolov4 11,729,069
Пример #25
0
import torch
from torchsummary import summary
from nets.CSPdarknet import darknet53
from nets.yolo4 import YoloBody

if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = YoloBody(3, 20).to(device)
    summary(model, input_size=(3, 416, 416))
Пример #26
0
    #   Dataloader的使用
    #-------------------------------#
    Use_Data_Loader = True

    annotation_path = '2007_train.txt'
    #-------------------------------#
    #   获得先验框和类
    #-------------------------------#
    anchors_path = './yolo_anchors.txt'
    classes_path = 'model_data/svhn_classes.txt'
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)

    # 创建模型
    model = YoloBody(len(anchors[0]), num_classes)
    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    '''
    model_path = "model_data/yolo4_voc_weights.pth"
    # 加快模型训练的效率
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    '''
Пример #27
0
class YOLO(object):
    _defaults = {
        "model_path": 'logs/Epoch92-Total_Loss2.1432-Val_Loss4.1385.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/voc_classes.txt',
        "model_image_size": (416, 416, 3),
        "confidence": 0.5,
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化YOLO
    # ---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # ---------------------------------------------------#
    #   获得所有的先验框
    # ---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):

        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        # 加快模型训练的效率
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        print('Finished!')

        self.yolo_decodes = []
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(self.anchors[i], len(self.class_names), (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image, aligned_depth_frame=None, color_intrin_part=None, mode=1):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(letterbox_image(image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float32)
        photo /= 255.0
        photo = np.transpose(photo, (2, 0, 1))
        photo = photo.astype(np.float32)
        images = []
        images.append(photo)
        images = np.asarray(images)

        with torch.no_grad():
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                               conf_thres=self.confidence,
                                               nms_thres=0.3)
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return image

        top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
        top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
        top_label = np.array(batch_detections[top_index, -1], np.int32)
        top_bboxes = np.array(batch_detections[top_index, :4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:, 0], -1), np.expand_dims(top_bboxes[:, 1],
                                                                                                      -1), np.expand_dims(
            top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax,
                                   np.array([self.model_image_size[0], self.model_image_size[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
            # print(np.shape(image)[0], np.shape(image)[1])
            # print("left:{}, top:{}, right:{}, bottom:{}".format(left, top, right, bottom))
            fail = 0
            if (aligned_depth_frame and color_intrin_part):
                ppx = color_intrin_part[0]
                ppy = color_intrin_part[1]
                fx = color_intrin_part[2]
                fy = color_intrin_part[3]
                width = aligned_depth_frame.width
                height = aligned_depth_frame.height

# ----------------------------------------------------------------------------------------------------------------
# 1、取中心点像素深度
# ----------------------------------------------------------------------------------------------------------------
                if mode == 1:
                    center_x = int(round((left + right) / 2))
                    center_y = int(round((top + bottom) / 2))
                    # print("center:", center_x, center_y)
                    # print("depth size:", width, height)
                    center_x = min(max(1, center_x), width - 1)
                    center_y = min(max(1, center_y), height - 1)
                    # print("center_after:", center_x, center_y)
                    # center_x = min(max(0,center_x),width)
                    # center_y = min(max(0,center_y),height)
                    target_xy_pixel = [center_x, center_y]
                    target_depth = aligned_depth_frame.get_distance(target_xy_pixel[0], target_xy_pixel[1])
                    strDistance = "\n%.2f m" % target_depth
                    target_xy_true = [(target_xy_pixel[0] - ppx) * target_depth / fx,
                                      (target_xy_pixel[1] - ppy) * target_depth / fy]

# # ----------------------------------------------------------------------------------------------------------------
# # 2、取box里面所有像素深度值后平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 2:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#                             # print("x:{}, y:{}".format(x,y),depth_matrix[x][y])
#                             depth += depth_matrix[x][y]
#                             cnt += 1
#                     target_depth = depth / cnt
#                     minn = 1000000
#                     pseudo_x = 0
#                     pseudo_y = 0
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             if minn > abs(depth_matrix[x][y] - target_depth):
#                                 minn = abs(depth_matrix[x][y] - target_depth)
#                                 pseudo_x = x
#                                 pseudo_y = y
#                     target_xy_pixel = [pseudo_x, pseudo_y]
#                     strDistance = " depth: %.2f m" % target_depth
#                     target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                       (pseudo_y - ppy) * target_depth / fy]
#
# # ----------------------------------------------------------------------------------------------------------------
# # 3、去前后百分之十的极值后再平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 3:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#
#                     depth_matrix_flat = depth_matrix[left:right, top:bottom].reshape((right - left) * (bottom - top), )
#                     matrix_flat_len = depth_matrix_flat.shape[0]
#                     drop_len = int(matrix_flat_len * 0.1)
#                     depth_matrix_flat.sort()
#                     depth_matrix_flat = depth_matrix_flat[drop_len:-drop_len]
#                     depth = depth_matrix_flat.sum()
#
#                     target_depth = depth / (matrix_flat_len - 2 * drop_len)
#                     minn = 1000000
#                     pseudo_x = 0
#                     pseudo_y = 0
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             if minn > abs(depth_matrix[x][y] - target_depth):
#                                 minn = abs(depth_matrix[x][y] - target_depth)
#                                 pseudo_x = x
#                                 pseudo_y = y
#                     target_xy_pixel = [pseudo_x, pseudo_y]
#                     strDistance = " depth: %.2f m" % target_depth
#                     target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                       (pseudo_y - ppy) * target_depth / fy]
#
# # ----------------------------------------------------------------------------------------------------------------
# # 4、去掉深度缺失的像素(深度为0)后再平均
# # ----------------------------------------------------------------------------------------------------------------
#                 elif mode == 4:
#                     depth = 0
#                     cnt = 0
#                     depth_matrix = np.zeros((width, height))
#                     for x in range(left, right):
#                         for y in range(top, bottom):
#                             depth_matrix[x][y] = aligned_depth_frame.get_distance(x, y)
#                             if depth_matrix[x][y] > 0:
#                                 depth += depth_matrix[x][y]
#                                 cnt += 1
#                     if cnt == 0:
#                         print("该目标框内所有像素均检测缺失,无法计算深度")
#                         fail = 1
#                     else:
#                         target_depth = depth / cnt
#                         minn = 1000000
#                         pseudo_x = 0
#                         pseudo_y = 0
#                         for x in range(left, right):
#                             for y in range(top, bottom):
#                                 if minn > abs(depth_matrix[x][y] - target_depth):
#                                     minn = abs(depth_matrix[x][y] - target_depth)
#                                     pseudo_x = x
#                                     pseudo_y = y
#                         target_xy_pixel = [pseudo_x, pseudo_y]
#                         strDistance = " depth: %.2f m" % target_depth
#                         target_xy_true = [(pseudo_x - ppx) * target_depth / fx,
#                                           (pseudo_y - ppy) * target_depth / fy]

            else:
                strDistance = "\n 0 m"

            # 画框框----------------------------------------------------------------------------------------------------
            if fail == 0:
                label = '{} {:.2f}'.format(predicted_class, score)
                label = label + strDistance
                draw = ImageDraw.Draw(image)
                label_size = draw.textsize(label, font)
                label = label.encode('utf-8')
                # print(label)
                print('检测出目标:{} ;实际坐标为(m):({:.3f}, {:.3f}, {:.3f}) \n中心点像素坐标(pixel):({}, {}) ;中心点相机坐标(m):({},{});深度: {} m\n'.format(predicted_class,
                                                                                                target_xy_true[0],target_xy_true[1],target_depth,
                                                                                                target_xy_pixel[0],
                                                                                                target_xy_pixel[1],
                                                                                                target_xy_true[0],
                                                                                                target_xy_true[1],
                                                                                                target_depth))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle(
                        [left + i, top + i, right - i, bottom - i],
                        outline=self.colors[self.class_names.index(predicted_class)])
                draw.rectangle(
                    [tuple(text_origin), tuple(text_origin + label_size)],
                    fill=self.colors[self.class_names.index(predicted_class)])
                draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
                del draw
        return image
class YOLO(object):
    _defaults = {
        "model_path": 'model_data/yolo4_weights.pth',
        "anchors_path": 'model_data/yolo_anchors.txt',
        "classes_path": 'model_data/coco_classes.txt',
        "model_image_size":
        (416, 416, 3),  #这里的model_image_size是什么,不会跟图像size产生冲突吗,为什么不可以改????
        "confidence": 0.3,
        "iou": 0.3,
        "cuda": False
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化YOLO
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.generate()

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的先验框
    #---------------------------------------------------#
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]

    #---------------------------------------------------#
    #   生成模型
    #---------------------------------------------------#
    def generate(self):
        #---------------------------------------------------#
        #   建立yolov4模型
        #---------------------------------------------------#
        self.net = YoloBody(len(self.anchors[0]), len(self.class_names)).eval()

        #---------------------------------------------------#
        #   载入yolov4模型的权重
        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.net.load_state_dict(state_dict)
        print('Finished!')

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

        #---------------------------------------------------#
        #   建立三个特征层解码用的工具
        #---------------------------------------------------#
        self.yolo_decodes = []  # 创建数组,将三个解码器放到数组中
        for i in range(3):
            self.yolo_decodes.append(
                DecodeBox(
                    self.anchors[i], len(self.class_names),
                    (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        # embed()
        image_shape = np.array(np.shape(image)[0:2])
        num_class = len(self.class_names)  # 有80类
        # embed()

        #---------------------------------------------------------#
        #   给图像增加灰条(什么是灰条),实现不失真的resize
        #---------------------------------------------------------#
        # 复制image return new_image
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[1], self.model_image_size[0])))
        photo = np.array(crop_img, dtype=np.float32) / 255.0  # 归一化?
        photo = np.transpose(
            photo, (2, 0, 1)
        )  # 转置:将Image.open(img)得到的[H,W,C]格式转换permute为pytorch可以处理的[C,H,W]格式
        #---------------------------------------------------------#
        #   添加上batch_size维度
        #---------------------------------------------------------#
        images = [photo]  # 将photo变为list类型

        with torch.no_grad(
        ):  # disabled gradient calculation,reduce memory consumption for computations
            images = torch.from_numpy(
                np.asarray(images)
            )  # Creates a Tensor from a numpy.ndarray,此时images的shape为[1, 3, 416, 416]
            if self.cuda:
                images = images.cuda()

            #---------------------------------------------------------#
            #   将图像输入网络当中进行预测!
            #---------------------------------------------------------#
            # embed()
            # 从这里开始处理
            # 特征提取
            # 输出outputs为tuple,len=3,每个tensor的shape分别为 第一个特征层[1, 255, 13, 13],第二个特征层[1, 255, 26, 26],第三个特征层[1, 255, 52, 52]
            outputs = self.net(images)
            # embed()
            output_list = []
            for i in range(3):  # 为什么是3
                # 有三个特征层,每个特征层对应自己的decode解码器
                output_list.append(self.yolo_decodes[i](
                    outputs[i]))  # 在这里打几个断点看看

            #---------------------------------------------------------#
            #   将预测框进行堆叠,然后进行非极大抑制
            #---------------------------------------------------------#
            # torch.cat()对矩阵按行进行拼接得到向量
            output = torch.cat(output_list, 1)  # 这里也打几个断点
            # output就是predictions,格式为[batch_size, num_anchors, 85]
            batch_detections = non_max_suppression(output,
                                                   len(self.class_names),
                                                   conf_thres=self.confidence,
                                                   nms_thres=self.iou)
            # embed()

            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            try:
                batch_detections = batch_detections[0].cpu().numpy()
            except:
                return image

            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
            # coordinates = []# bboxes的坐标

            top_index = batch_detections[:,
                                         4] * batch_detections[:,
                                                               5] > self.confidence
            top_conf = batch_detections[top_index,
                                        4] * batch_detections[top_index, 5]
            top_label = np.array(batch_detections[top_index, -1], np.int32)
            top_bboxes = np.array(batch_detections[top_index, :4])

            # 得到坐标点
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            # coordinates.append((top_xmin,top_xmax,top_ymin,top_ymax))# 把四个坐标点看做一个整体

            #-----------------------------------------------------------------#
            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
            #   因此生成的top_bboxes是相对于有灰条的图像的
            #   我们需要对其进行修改,去除灰条的部分。
            #-----------------------------------------------------------------#

            # boxes存放各目标的坐标
            boxes = yolo_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([self.model_image_size[0], self.model_image_size[1]]),
                image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max((np.shape(image)[0] + np.shape(image)[1]) //
                        self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            # embed()
            predicted_class = self.class_names[c]
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            # 左上角点的坐标
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            # 右下角点的坐标
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(  # 画框框
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image, boxes  # 将boxes返回
Пример #29
0
import torch
from torchsummary import summary

from nets.yolo4 import YoloBody

if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    device  = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model   = YoloBody(3,1,backbone="mobilenetv2").to(device)
    summary(model, input_size=(3, 768, 768))
    
    # mobilenetv1-yolov4 40,952,893
    # mobilenetv2-yolov4 39,062,013
    # mobilenetv3-yolov4 39,989,933

    # 修改了panet的mobilenetv1-yolov4 12,692,029
    # 修改了panet的mobilenetv2-yolov4 10,801,149
    # 修改了panet的mobilenetv3-yolov4 11,729,069
Пример #30
0
    mosaic = True
    # 用于设定是否使用cuda
    Cuda = True
    smoooth_label = 0

    Use_Data_Loader = True

    annotation_path = './data/dataset/train.txt'
    classes_path = './data/classes/yolov4_classes.txt'
    anchors_path = './data/anchors/yolov4_anchors.txt'
    class_names = get_classes(classes_path)
    anchors = get_anchors(anchors_path)
    num_classes = len(class_names)

    # 创建模型
    model = YoloBody(len(anchors[0]), num_classes)
    model_path = "./model_data/yolo4_weights.pth"

    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')

    net = model.train()

    if Cuda:
        net = torch.nn.DataParallel(model)