示例#1
0
def detect(save_img=False):
    '''获取输出文件夹,输入源,权重,参数与等信息'''
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run #
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)  # 获取设备
    # 如果设备为GPU 使用float16
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    # 加载float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除返回)
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        # 设置float16
        model.half()  # to FP16

    # Second-stage classifier
    # 设置第二次分类,默认不使用
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    # 通过不同的输入源来设置不同的数据加载方式
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        # 如果哦检测视频的时候想显示出来,可以在这里加一行 view_img = True
        view_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    # 获取类别名字
    names = model.module.names if hasattr(model, 'module') else model.names
    # 设置画框的颜色
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    # 进行一次前向推理,测试程序是否正常

    """
       path 图片/视频路径
       img 进行resize+pad之后的图片
       img0 原size图片
       cap 当读取图片时为None,读取视频时为视频源
    """
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        # 图片也设置为Float16
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        # 没有batch_size的话则在最前面添加一个轴
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        """
               前向传播 返回pred的shape是(1, num_boxes, 5+num_class)
               h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
               num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
               pred[..., 0:4]为预测框坐标
               预测框坐标为xywh(中心点+宽长)格式
               pred[..., 4]为objectness置信度
               pred[..., 5:-1]为分类结果
        """

        # Apply NMS
        """
               pred:前向传播的输出
               conf_thres:置信度阈值
               iou_thres:iou阈值
               classes:是否只保留特定的类别
               agnostic:进行nms是否也去除不同类别之间的框
               经过nms之后,预测框格式:xywh-->xyxy(左上角右下角)
               pred是一个列表list[torch.tensor],长度为batch_size
               每一个torch.tensor的shape为(num_boxes, 6),内容为box+conf+cls
        """

        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        # 添加二次分类,默认不使用
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        # 对每一张图片作处理
        for i, det in enumerate(pred):  # detections per image
        # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
        # 设置保存图片/视频的路径
            save_path = str(save_dir / p.name)  # img.jpg
        # 设置保存框坐标txt文件的路径
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
        # 设置打印信息(图片长宽)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标
                # 此时坐标格式为xyxy
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                # 打印检测到的类别数量
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                    # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
                            # 在原图上画框
                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

            # Print time (inference + NMS)
            # 打印前向传播+nms时间
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            # 如果设置展示,则show图片/视频
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            # 设置保存图片/视频
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")
        # 打开保存图片和txt的路径(好像只适用于MacOS系统)
    # 打印总时间
    print(f'Done. ({time.time() - t0:.3f}s)')
示例#2
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
示例#3
0
    def detect(self, opencv_img, data, save_img=False):
        self.weights = os.path.join(package_path, 'yolov5/weights', self.weights)
        self.source = os.path.join(package_path,'yolov5', self.source)
        # print(self.weights)
        source, weights, view_img, save_txt, imgsz = self.source, self.weights, self.view_img, self.save_txt, self.img_size
        webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://'))
        self.project = os.path.join(package_path,'yolov5', self.project)
        # Directories
        save_dir = Path(increment_path(Path(self.project) / self.name,
                        exist_ok=self.exist_ok))  # increment run
        
        (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True,
                                                            exist_ok=True)  # make dir

        # Initialize
        set_logging()
        device = select_device(self.device)
        half = device.type != 'cpu'  # half precision only supported on CUDA

        # print(os.getcwd())
        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        stride = int(model.stride.max())  # model stride
        imgsz = check_img_size(imgsz, s=stride)  # check img_size
        if half:
            model.half()  # to FP16

        # Second-stage classifier
        classify = False
        if classify:
            modelc = load_classifier(name='resnet101', n=2)  # initialize
            modelc.load_state_dict(torch.load(
                'weights/resnet101.pt', map_location=device)['model']).to(device).eval()

        # Set Dataloader
        vid_path, vid_writer = None, None
        if webcam:
            view_img = check_imshow()
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz, stride=stride)
        else:
            #save_img = True
            save_img = False
            dataset = LoadImages(source, img_size=imgsz, stride=stride)

        # Get names and colors
        names = model.module.names if hasattr(model, 'module') else model.names
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        if device.type != 'cpu':
            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
        t0 = time.time()

        # path = r"/workspace/yolov5/data/images/bus.jpg"
        vid_cap = None
        #im0s = cv2.imread(path)
        im0s = opencv_img
        img = letterbox(im0s, 640, stride=stride)[0]
        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)
        # img = cv2.imread("")

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        # Inference
        t1 = time_synchronized()
        # print("\nhaha: 83945723\n")
        # print(img.shape)
        # print(img)
        # print(self.conf_thres)
        # print(self.iou_thres)
        # print(self.classes)
        # print(self.agnostic_nms)
        # print("\nhaha: 02394857\n")

        pred = model(img, augment=False)[0]
        # Apply NMS
        pred = non_max_suppression(
            pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
        t2 = time_synchronized()
        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        detection_results = BoundingBoxes()
        detection_results.header = data.header
        detection_results.image_header = data.header

        for i, det in enumerate(pred):  # detections per image

            im0 = im0s
            p = self.path
            # if webcam:  # batch_size >= 1
            #     p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            # else:
            #     p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)
            # p = Path(p)  # to Path
            #save_path = str(self.save_dir + "/img.jpg")  # img.jpg
            #txt_path = str(self.save_dir + "/labels/label")
            s = ''
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    img.shape[2:], det[:, :4], im0.shape).round()
                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                
                xmin, ymin, xmax, ymax, conf, det_class = det[0]
                detection_msg = BoundingBox()
                detection_msg.xmin = xmin
                detection_msg.xmax = xmax
                detection_msg.ymin = ymin
                detection_msg.ymax = ymax
                detection_msg.probability = conf
                detection_msg.Class = names[int(det_class)]
                detection_results.bounding_boxes.append(detection_msg)

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        # label format
                        line = (cls, *xywh, conf) if self.save_conf else (cls, *xywh)
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, label=label,
                                    color=colors[int(cls)], line_thickness=3)
            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')
            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond
            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer
                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)
        self.pub_.publish(detection_results)
        #if save_txt or save_img:
        #    s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
            #print(f"Results saved to {save_dir}{s}")

        print(f'Done. ({time.time() - t0:.3f}s)')
示例#4
0
def detect(source, device, keys, reporter, save_img=False):
    weights = "yolov5s.pt"
    img_size = 640
    conf_thres = 0.25
    iou_thres = 0.45
    view_img = False
    save_txt = False
    save_conf = False
    classes = None
    agnostic_nms = False
    augment = False
    update = False
    project = 'runs/detect'
    exist_ok = False
    imgsz = img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = Path(Path(project))  # not increment run

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    view_img = check_imshow()
    cudnn.benchmark = True  # set True to speed up constant image size inference
    dataset = LoadStreams(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()

    # Found label in keys
    found = False

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    label = f'{names[int(cls)]} {conf:.2f}'
                    if names[int(cls)] and names[int(cls)] in keys:
                        found = True
                    else:
                        found = False
                    plot_one_box(xyxy,
                                 im0,
                                 label=label,
                                 color=colors[int(cls)],
                                 line_thickness=3)

            # Print time (inference + NMS)
            cprint.info(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if found and reporter.can_report():
                logging.info("get label in keys: " + names[int(cls)])
                file_path = str(save_dir / f'report_{t1}.jpg')
                cv2.imwrite(file_path, im0)
                reporter.report(file_path)

    cprint.info(f'Done to close. ({time.time() - t0:.3f}s)')
def detect(
        weights='mdp/weights/weights.pt',
        source_address='http://localhost:8008',  # 192.168.15.1
        img_size=416,
        conf_thres=0.01,
        iou_thres=0.5,
        device='',
        classes=None,
        agnostic_nms=False,
        augment=False,
        update=False,
        scale_percent=50):
    source = source_address + '/stream.mjpg'
    label_server = source_address + '/labels'

    predicted_label = None
    imgsz = img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(device)

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    row_num = 0
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].detach().unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    predicted_label = names[int(cls)]
                    if predicted_label:
                        label_id = label_id_mapping.get(predicted_label)
                        if False and conf < confidence_threshold(
                                label_id):  # fine tune for up arrow (white)
                            break
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh

                        print(('%s ' * 5 + '\n') %
                              (label_id, *xywh))  # label format

                        if not image_seen[predicted_label]:
                            # determine image position
                            x, y, w, h = xywh
                            # r = requests.post(label_server, json={'label': label_id, 'x': x, 'y': y})  # send result to rpi
                            # print(r.text)
                            image_seen[predicted_label] = True

                        label = '%s %.2f' % (label_id, conf)
                        good, text = check_bounding_box(
                            xywh, im0.shape[0], im0.shape[1])
                        if not good:
                            break
                            label = text
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
                        break
            cv2.imshow(p, im0)

            if cv2.waitKey(1) == ord('q'):  # q to quit
                raise StopIteration
示例#6
0
def detect(save_img=True):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)

    elif source == 'own_camera':
        pass
        # cap = cv2.VideoCapture(0)
        # while True:
        #     _, frame = cap.read()
        #     img0 = frame.copy()
        #     img_size = 640
        #     img, ratio, (dw, dh) = letterbox(img0, new_shape=img_size)
        #     img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        #     img = np.ascontiguousarray(img)
        #     img = torch.from_numpy(img).to(device)
        #     img = img.half() if half else img.float()  # uint8 to fp16/32
        #     img /= 255.0  # 0 - 255 to 0.0 - 1.0
        #     if img.ndimension() == 3:
        #         img = img.unsqueeze(0)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    if source == 'own_camera':
        # cap = cv2.VideoCapture(0)
        camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())
        # print(type(camera))
        camera.Open()
        converter = pylon.ImageFormatConverter()
        #camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())

            # ========== Grabing Continusely (video) with minimal delay ==========
        camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)


            # ========== converting to opencv bgr format ==========
        converter.OutputPixelFormat = pylon.PixelType_BGR8packed
        converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned

        img_name = 0

        while  camera.IsGrabbing():
            # print(source)
            # _, frame = cap.read()
            grabResult = camera.RetrieveResult(5000, pylon.TimeoutHandling_ThrowException)


            # print(type(frame))

            if grabResult.GrabSucceeded():

                image = converter.Convert(grabResult)
                frame = image.GetArray()
                # frame = cv2.resize(frame, (int(frame.shape[1]/3),int(frame.shape[0]/3)))
                if opt.collect == 'True' and img_name%5 == 0:
                    cv2.imwrite("./data/pylon/"+str(img_name)+"jpg", frame)
                img_name += 1 

            img0 = frame.copy()
            im0s = img0
            img_size = 640
            img, ratio, (dw, dh) = letterbox(img0, new_shape=img_size)
            img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
            img = np.ascontiguousarray(img)
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes,
                                       agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = Path('0'), '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = Path('0'), '', im0s

                save_path = str(save_dir / p.name)
                # txt_path = str(save_dir / 'labels' / p.stem) + (
                #     '_%g' % dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    classes = [] 
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string
                        s1 = '%g'%(n)
                        classes.append([names[int(c)], s1])

                    df_cls_info = pd.DataFrame(classes,columns=['Class','number'])
                    df_cls_info.to_csv('class_num.csv')
                    print("\n number of object : ", len(det[:, -1]))
                    print(df_cls_info)
                    # Write results
                    box_info = []
                    for *xyxy, conf, cls in reversed(det):
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                            line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                            # with open(txt_path + '.txt', 'a') as f:
                            #     f.write(('%g ' * len(line)).rstrip() % line + '\n')
                            

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)


                            c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3]))
                            confident = '%g'%(conf)
                            box_info.append([label, c1 ,c2 ,confident])
                        
                    df_box = pd.DataFrame(box_info,columns=['Class','top left','bottom_right','confidence'])
                    df_box.to_csv('box_info.csv')

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))
                im0 = cv2.resize(im0, (int(im0.shape[1]/1.5),int(im0.shape[0]/1.5)))
                cv2.imshow("asd", im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    break
                # Stream results
                # if view_img:
                #     cv2.imshow("asd", im0)
                #     if cv2.waitKey(1) == ord('q'):  # q to quit
                #         raise StopIteration

                # Save results (image with detections)
                # if save_img:
                #     if dataset.mode == 'images':
                #         # cv2.imwrite(save_path, im0)
                #         pass
                #     else:
                #         if vid_path != save_path:  # new video
                #             vid_path = save_path
                #             if isinstance(vid_writer, cv2.VideoWriter):
                #                 vid_writer.release()  # release previous video writer
                #
                #             fourcc = 'mp4v'  # output video codec
                #             fps = vid_cap.get(cv2.CAP_PROP_FPS)
                #             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                #             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                #             vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                #         vid_writer.write(im0)

            if save_txt or save_img:
                print('Results saved to %s' % save_dir)

            print('Done. (%.3fs)' % (time.time() - t0))

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()


                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow("asd", im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    # cv2.imwrite(save_path, im0)
                    pass
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    print_div('INTIL')
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size

    
# Initialize

    print_div('GET DEVICE')
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  
# half precision only supported on CUDA

    
    
# Load model

    print_div('LOAD MODEL')
    model = attempt_load(weights, map_location=device)  
# load FP32 model

    imgsz = check_img_size(imgsz, s=model.stride.max())  
# check img_size

    if half:
        model.half()  
# to FP16


    
# Second-stage classifier

    print_div('LOAD MODEL_CLASSIFIER')
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  
# initialize

        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  
# load weights

        modelc.to(device).eval()

    
# Get names and colors

    print_div('SET LABEL COLOR')
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    
# Run inference

    
###############################################################################

    print_div("RUN INFERENCE")
    
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  
# init img

    _ = model(img.half() if half else img) if device.type != 'cpu' else None  
# run once


    video_path = source
    cap = cv2.VideoCapture(video_path)

    print_div('Start Play VIDEO')
    while cap.isOpened():
        
        ret, frame = cap.read()
        t0 = time.time()
        
        if not ret: 
            print_div('No Frame')
            break

        fps_t1 = time.time()

        img, img0 = img_preprocess(frame)   
# img: Resize , img0:Orginal
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  
# uint8 to fp16/32

        img /= 255.0  
# 0 - 255 to 0.0 - 1.0

        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        
# Inference

        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        
# Apply NMS : 取得每項預測的數值

        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        
# Apply Classifier : 取得該數值的LAbel

        if classify:
            pred = apply_classifier(pred, modelc, img, img0)
        
        
# Draw Box

        for i, det in enumerate(pred):

            s = '%gx%g ' % img.shape[2:]  
# print string

            gn = torch.tensor(img0.shape)[[1, 0, 1, 0]]  
# normalization gain whwh

            if det is not None and len(det):
                
# Rescale boxes from img_size to im0 size

                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()

                
# Print results

                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  
# detections per class

                    s += '%g %ss, ' % (n, names[int(c)])  
# add to string


                
# Write results

                for *xyxy, conf, cls in reversed(det):

                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3)

        
# Print Results(inference + NMS)

        print_div('%sDone. (%.3fs)' % (s, t2 - t1))

        
# Draw Image

        x, y, w, h = (img0.shape[1]//4), 25, (img0.shape[1]//2), 30
        cv2.rectangle(img0, (x, 10),(x+w, y+h), (0,0,0), -1)
        
        rescale = 0.5
        re_img0 = (int(img0.shape[1]*rescale) ,int(img0.shape[0]*rescale))

        cv2.putText(img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format(opt.weights[0], t2-t1, 1/(time.time()-t0)),(x+20, y+20),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,255),2)
        cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0) )

        key = cv2.waitKey(1)
        if key == ord('q'): break

    
# After break

    cap.release()
    cv2.destroyAllWindows()
示例#8
0
def detect(save_img=False):
    warning = 0
    _sum = 0
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://')) or source == 'realsense'

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        #         dataset = LoadStreams(source, img_size=imgsz, stride=stride)
        if source == '1':
            #print('우헤헤헤헤')
            dataset = LoadRealSense2()

        else:
            #print('하ㅏ하하하')
            dataset = LoadStreams(source, img_size=imgsz)

    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)
        #print('오이잉이이이')
    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                    if names[int(c)] == ('no-helmet'):
                        warning = 1
                        _sum += 1
                    if names[int(c)] == ('no-helmets'):
                        warning = 1
                        _sum += 1
                    if names[int(c)] == ('no-mask'):
                        warning = 1
                        _sum += 1
                    if names[int(c)] == ('no-masks'):
                        warning = 1
                        _sum += 1
                    if names[int(c)] == ('person'):
                        if n == 1:
                            warning = 1
                            _sum += 1
                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                        #if 'helmet' in names[int(cls)]:

                        #print('캬캬캬캬캬캬',names[int(cls)])
                        #print('ㅋㅋㅋㅋㅋ',xyxy)
            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            file = open(
                '/home/piai/MinkyuKim/Week6~10_AI/Project/SocketConnection/detect.txt',
                'a')
            idx = 0
            if warning == 1:
                #file = open('detect.txt', 'w')
                idx += 1
                if idx <= 200:
                    file.write('1' + '\n')
                #idx += 1

#                 print(_sum)
#                 if _sum>=200:
#                     print('_sum은', _sum)
#                     file.write('20' + '\n')
#                 file.close()

                print('stop')
                warning = 0
            else:
                file.write('0' + '\n')
                print('pass')


#                 file.close()

# Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

        file.close()

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#9
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = 'data/images', 'yolov5s.pt', False, False, 640  #All manually set
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path('runs/detect') / 'exp',
                                   exist_ok=False))  # increment run adam
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device('')
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=False)[0]  # adam

        # Apply NMS
        pred = non_max_suppression(pred,
                                   0.25,
                                   0.45,
                                   classes=None,
                                   agnostic=False)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if False else (
                            cls, *xywh)  # label format adam
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))
    return (n)  #EDIT returns the number of objects detected in the last image
示例#10
0
                print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, loss))
            #running_loss = 0.0
        test(net,test_loader,labels.shape[0],classes)
    PATH = './soccer_classify.pth'
    torch.save(net.state_dict(), PATH)
    print('Finished Training')



def predict_image(model,image,test_transform):
    image_tensor = test_transforms(image)
    image_tensor.unsqueeze_(0)
    image_tensor=image_tensor.to(device)
    output = model(image_tensor)
    index = output.data.cpu().numpy().argmax()
    return index

if __name__=="__main__":
    train=True
    device = select_device("0")
    modelc = load_classifier(name='resnet101', n=4)  # initialize
    train_loader,test_loader,classes=load_dataset("/media/asad/adas_cv_2/caffe/train_classifier","/media/asad/adas_cv_2/caffe/train_classifier")
    modelc.to(device)
    if train:
        training(modelc,train_loader,test_loader,device,classes,epochs=100)
    else:
        test_transforms = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])
        modelc.load_state_dict(torch.load("./soccer_classify.pth"))
        modelc.eval()
        image_pil = Image.open("/media/asad/adas_cv_2/caffe/patches/9000.png")
        cls_idx=predict_image(modelc,image_pil,test_transforms)
def detect(save_img=False):
    print_div('INTIL')
    out, source, weights, view_img, save_txt, imgsz,count = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size,opt.threads

    # Initialize

    print_div('GET DEVICE')
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'
    # half precision only supported on CUDA

    # Load model
    print_div('LOAD MODEL')
    model = attempt_load(weights, map_location=device)
    # load FP32 model

    imgsz = check_img_size(imgsz, s=model.stride.max())
    # check img_size

    if half:
        model.half()
# to FP16

# Second-stage classifier

    print_div('LOAD MODEL_CLASSIFIER')
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)
        # initialize

        modelc.load_state_dict(
            torch.load('weights/resnet101.pt', map_location=device)['model'])
        # load weights

        modelc.to(device).eval()


# Get names and colors

    print_div('SET LABEL COLOR')
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    ###############################################################################
    print_div("RUN INFERENCE")
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)
    # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    # run muti

    video_path = source
    cap = cv2.VideoCapture(video_path)
    threads = []
    q = Queue()
    rescale = 0.5
    time_count = time.time()
    while cap.isOpened():
        #img0, re_img0=multithreading(cap.read(),device,half,names,colors,model)
        key = cv2.waitKey(1)
        if key == ord('q'): return

        for i in range(1):
            ret, frame = cap.read()
            if not ret:
                print_div('No Frame')
                break
            threads.append(
                Thread(target=multithreading,
                       args=(frame, device, half, names, colors, model, q)))
            threads[i].start()
            img0 = q.get()
            re_img0 = (int(img0.shape[1] * rescale),
                       int(img0.shape[0] * rescale))
            cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0))

        for i in range(1):
            if not ret:
                print_div('No Frame')
                break
            threads[i].join()
        threads = []
        if not ret:
            print_div('No Frame')
            break
    # After break
    cap.release()
    cv2.destroyAllWindows()
    print("Spending time: " + str(time.time() - time_count))
示例#12
0
def custom_detect(
    weights='yolov5l.pt', # model.pt path(s)
    source='data/images', # source folder
    imgsz=640, # inference size (pixels)
    conf_thres=0.5, # object confidence threshold
    iou_thres=0.45, # IOU threshold for NMS
    device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
    view_img=False, # display results
    box_only=False, # output only contains bounding boxes, not original image
    save_txt=False, # save results to *.txt
    save_conf=False, # save confidences in --save-txt labels
    classes: int=None, # only look for certain classes. Probably a list of ints
    agnostic_nms=False, # class-agnostic NMS
    augment=False, # augmented inference
    update=False, # update all models
    project='runs', # save results to project/name
    name='', # save results to project/name
    exist_ok=False # existing project/name ok, do not increment
    ):


    # Directories
    save_dir = Path(increment_path(Path(project) / name, exist_ok=exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    save_img = True
    dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, frame = path, '', getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            if box_only:
              im0 = np.zeros_like(im0) # Makes to img black to output only bounding boxes

            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#13
0
def detect(opt):
    weights, view_img, save_txt, imgsz = opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = True

    # Directories
    save_dir = increment_path(Path(opt.project) / opt.name,
                              exist_ok=opt.exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    dataset = LoadNLMFeatures(img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, imgsz, imgsz,
                        dataset.dimensions).to(device).type_as(
                            next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        # img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)

        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image

            p, s, im0, frame = path, '', im0s.copy(), getattr(
                dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt

            # s += f"{img.shape[::]}"  # print string

            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if opt.save_crop else im0  # for opt.save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                # det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or opt.save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if opt.hide_labels else (
                            names[c]
                            if opt.hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=opt.line_thickness)
                        if opt.save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # s += f"{len(pred)} table found. "
            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Save results (image with detections)
            if save_img:
                cv2.imwrite(save_path, im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#14
0
def detect(save_img=False):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.device
    source, weights, save_img, save_txt, imgsz = opt.source, opt.weights, opt.save_img, opt.save_txt, opt.img_size

    #save_img = not opt.nosave and not source.endswith('.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir
    print(save_dir)
    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # dataset = LoadImages_panda(source, img_size=imgsz, stride=stride, split_size=[[1000, 1000],[1500, 1500], [3000, 3000],[6000, 6000],[10000,10000]], over_lap=0.3)
    dataset = LoadImages_panda(source,
                               img_size=imgsz,
                               stride=stride,
                               split_size=[[1500, 1500], [3000, 3000],
                                           [6000, 6000], [10000, 10000]],
                               over_lap=0.3)
    #dataset = LoadImages_panda(source, img_size=imgsz, stride=stride,split_size=[], over_lap=0.3)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
    ret_matrix = np.zeros((2, 2))

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    jdict = []
    results = []
    for path, img_list, start_list, split_size_list, boundary_list, img0_list, im0s, vid_cap in dataset:
        det_list = []
        det_scale_dict = {}
        # Inference
        t1 = time_synchronized()
        for img_i in range(len(img_list)):
            img = torch.from_numpy(img_list[img_i]).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            pred = model(img, augment=opt.augment)[0]

            # Apply
            pred_list = []

            pred_output = non_max_suppression(pred,
                                              opt.conf_thres,
                                              opt.iou_thres,
                                              classes=opt.classes,
                                              agnostic=opt.agnostic_nms)
            if pred_list == []:
                pred_list = pred_output[0]
            if len(pred_output[0]):
                pred_list = torch.cat((pred_list, pred_output[0]), dim=0)
            pred_out = [pred_list]

            # Apply Classifier
            if classify:
                pred_out = apply_classifier(pred_out, modelc, img,
                                            img0_list[img_i])

            # Process detections
            for i, det in enumerate(pred_out):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0, frame = path[i], '%g: ' % i, img0_list[img_i][
                        i].copy(), dataset.count
                else:
                    p, s, im0, frame = path, '', img0_list[img_i], getattr(
                        dataset, 'frame', 0)

                p = Path(p)  # to Path
                save_path = str(save_dir / p.name)  # img.jpg
                txt_path = str(save_dir / 'labels' / p.stem) + (
                    '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh

                det = del_more(det,
                               img.shape[2:],
                               big_thres=0.3,
                               boundary=boundary_list[img_i])

                # del_small
                if len(det) != 0:
                    small_thres = 15
                    dets_wh_thres = det[:, 2:4] - det[:, :2]
                    det_thres = torch.minimum(dets_wh_thres[:, 0],
                                              dets_wh_thres[:, 1])
                    if split_size_list[img_i] > 2000:
                        det = det[det_thres > small_thres]

                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()
                    det[..., 0] += start_list[img_i][0]
                    det[..., 2] += start_list[img_i][0]
                    det[..., 1] += start_list[img_i][1]
                    det[..., 3] += start_list[img_i][1]
                    if det_list == []:
                        det_list = det
                    else:
                        det_list = torch.cat((det_list, det), dim=0)

                    if split_size_list[img_i] not in det_scale_dict.keys():
                        det_scale_dict[split_size_list[img_i]] = det
                    else:
                        det_scale_dict[split_size_list[img_i]] = torch.cat(
                            (det_scale_dict[split_size_list[img_i]], det),
                            dim=0)

        det_list = fuse_all_det(det_list[:, :6],
                                im0,
                                conf_thres=opt.conf_thres,
                                nms_thres=opt.iou_thres,
                                method='standard',
                                merge=False)

        scale_key = []
        for key in det_scale_dict.keys():
            scale_key.append(key)
        det_list = WBF_fuse(im0, [det_list, det_scale_dict[scale_key[0]]],
                            weights=[1, 1],
                            iou_thres=0.5,
                            conf_thres=0.5)

        # Print results
        for c in det_list[:, -1].unique():
            n = (det_list[:, -1] == c).sum()  # detections per class
            s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

        # Write results
        frame_id = int(path.split(".")[-2].split("_")[-1])
        id_list = []
        online_tlwhs = []
        for *xyxy, conf, cls in reversed(det_list):
            online_tlwhs.append((float(xyxy[0]), float(xyxy[1]),
                                 float(xyxy[2]) - float(xyxy[0]),
                                 float(xyxy[3]) - float(xyxy[1])))
            id_list.append(conf)
            if save_img:  # Add bbox to image
                plot_one_box(xyxy,
                             im0s,
                             label=0,
                             color=colors[int(cls)],
                             line_thickness=3)
        results.append((frame_id, online_tlwhs, id_list))
        # Print time (per image)
        t2 = time_synchronized()
        print(f'{s}Done. ({t2 - t1:.3f}s)')
        sys.stdout.flush()

        # Save results (image with detections)
        if save_img:
            if dataset.mode == 'image':
                cv2.imwrite(save_path, im0s)
            else:  # 'video' or 'stream'
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    if vid_cap:  # video
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    else:  # stream
                        fps, w, h = 30, im0.shape[1], im0.shape[0]
                        save_path += '.mp4'
                    vid_writer = cv2.VideoWriter(
                        save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                        (w, h))
                vid_writer.write(im0)

    result_detection, id_num = write_results(
        opt.source.split("/")[-1] + ".txt", results, "mot")
    print("detection_num", result_detection)
    print("id_num", id_num)
    print(f'Done. ({time.time() - t0:.3f}s)')
    print(f'object num. ({len(jdict)})')
    sys.stdout.flush()
示例#15
0
文件: detect.py 项目: Saido07/yolov5
def detect(opt):

    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(opt.project) / opt.name,
                              exist_ok=opt.exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        global boyutlarX, boyutlarY
        boyutlarY, boyutlarX = img.shape[2:]

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   opt.classes,
                                   opt.agnostic_nms,
                                   max_det=opt.max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if opt.save_crop else im0  # for opt.save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or opt.save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if opt.hide_labels else (
                            names[c]
                            if opt.hide_conf else f'{names[c]} {conf:.2f}')
                        print(label, " : ", end=" ")
                        global boxY, boxX
                        boxX = xyxy[0] + ((xyxy[2] - xyxy[0]) / 2)
                        boxY = xyxy[1] + ((xyxy[3] - xyxy[1]) / 2)

                        if boyutlarX * 0.5 < boxX:
                            print("Rechts", end=" ")
                        elif boyutlarX * 0.5 > boxX:
                            print("Links", end=" ")
                        if boyutlarY * 0.5 < boxY:
                            print("Hinter")
                        elif boyutlarY * 0.5 > boxY:
                            print("Vorne")
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=opt.line_thickness)
                        if opt.save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference + NMS)
            #print(f'{s}Done. ({t2 - t1:.3f}s)')  #***************************************

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
示例#16
0
def run(
        weights='yolov5s.pt',  # model.pt path(s)
        source='./test_1',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # = device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labelss
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        update=False,  # update all models
        project='runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
):
    desire_param = []
    coor = []
    all_info = []
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet50', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('resnet50.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0, frame = path, '', im0s.copy(), getattr(
                dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            #print()

            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()
                # Print results
                pr = ' '
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string
                    pr += f"{n} {names[int(c)]}{'s' * (n > 1)},"

                desire_param.append({"image_id": p.name, "prediction": pr})

                # for img_name in enumerate(p.name):
                #     if img_name not in desire_param:
                #         add_image = (img_name, "predictioni 0")
                # desire_param.append(add_image)

                # print(desire_param)

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=line_thickness)
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference + NMS)
        # print(f'{s}Done. ({t2 - t1:.3f}s)')
            result = [(f'{s}Done. ({t2 - t1:.3f}s)')]
            print(result)
            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:
                    None
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')

    # lst = []
    # for child in desire_param:
    #     info = ["img_name", "prediction"]
    #     lst1 = {k: v for k, v in zip(info, child)}
    #     lst.append(lst1)

    with open('result.json', 'w') as f:
        json.dump(desire_param, f)
示例#17
0
def run(
        weights='yolov5s.pt',  # model.pt path(s)
        source='data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project='runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
):
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    w = weights[0] if isinstance(weights, list) else weights
    classify, pt, onnx = False, w.endswith('.pt'), w.endswith(
        '.onnx')  # inference type
    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
    if pt:
        model = attempt_load(weights, map_location=device)  # load FP32 model
        stride = int(model.stride.max())  # model stride
        names = model.module.names if hasattr(
            model, 'module') else model.names  # get class names
        if half:
            model.half()  # to FP16
        if classify:  # second-stage classifier
            modelc = load_classifier(name='resnet50', n=2)  # initialize
            modelc.load_state_dict(
                torch.load('resnet50.pt',
                           map_location=device)['model']).to(device).eval()
    elif onnx:
        check_requirements(('onnx', 'onnxruntime'))
        import onnxruntime
        session = onnxruntime.InferenceSession(w, None)
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        if pt:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
        elif onnx:
            img = img.astype('float32')
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if len(img.shape) == 3:
            img = img[None]  # expand for batch dim

        # Inference
        t1 = time_sync()
        if pt:
            visualize = increment_path(save_dir / Path(path).stem,
                                       mkdir=True) if visualize else False
            pred = model(img, augment=augment, visualize=visualize)[0]
        elif onnx:
            pred = torch.tensor(
                session.run([session.get_outputs()[0].name],
                            {session.get_inputs()[0].name: img}))

        # NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        t2 = time_sync()

        # Second-stage classifier (optional)
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        #xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (
                            cls, *xyxy, conf
                        )  # if save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=line_thickness)
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer[i] = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer[i].write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#18
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):  # output dir
        shutil.rmtree(out)  # delete dir
    os.makedirs(out)  # make new dir
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # TODO: merge classes with same coordinates
                bbox_cls = {}
                for *xyxy, conf, cls in reversed(det):
                    xyxy = list(map(float, xyxy))
                    coord_str = ','.join(list(map(str, xyxy)))
                    bbox_cls.setdefault(coord_str, []).append([int(cls), float(conf)])
                # print('\nbefore merge: ', bbox_cls)

                # TODO: merge bboxes based on IOU
                bbox_cls_copy = {}
                while len(bbox_cls.keys()) > 0:
                    keys = list(bbox_cls.keys())
                    merge_lst = []
                    box = list(map(float, keys[0].split(',')))
                    for j in range(1, len(keys)):
                        boxj = list(map(float, keys[j].split(',')))
                        iou = box_iou(torch.Tensor([box]), torch.Tensor([boxj]))
                        if iou > 0.8:
                            merge_lst.append([j, boxj, bbox_cls[keys[j]]])

                    if len(merge_lst) > 0:
                        coords = [box, ]
                        values = bbox_cls[keys[0]]
                        for j, boxj, valj in merge_lst:
                            coords.append(boxj)
                            values.extend(valj)
                            del bbox_cls[keys[j]]

                        new_coord = torch.Tensor(coords).mean(dim=0).tolist()
                        new_key = ','.join(map(str, new_coord))
                        bbox_cls_copy[new_key] = values
                    else:
                        bbox_cls_copy[keys[0]] = bbox_cls[keys[0]]

                    del bbox_cls[keys[0]]
                # print('after merge: ', bbox_cls_copy)

                # TODO: keep at most two classes
                for key, value in bbox_cls_copy.items():
                    xyxy = list(map(float, key.split(',')))
                    value.sort(key=lambda x: x[1], reverse=True)  # based on conf
                    cls1, conf = value[0]
                    cls2 = -1
                    while len(value) > 1:
                        if (cls1 in [0, 1, 2] and value[1][0] in [3, 4, 5]) or \
                                (cls1 in [3, 4, 5] and value[1][0] in [0, 1, 2]):
                            cls2 = value[1][0]
                            break
                        del value[1]

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.Tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls1, cls2, conf, *xywh) if opt.save_conf else (cls1, cls2, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line) + '\n') % line)

                    if save_img or view_img:  # Add bbox to image
                        if cls2 == -1:
                            # cls_str = names[int(cls1)]
                            cls_str = str(cls1)
                        else:
                            # cls_str = ','.join([names[int(cls1)], names[int(cls2)]])
                            cls_str = ','.join(sorted([str(cls1), str(cls2)]))
                        label = cls_str + ' %.2f' % conf
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls1)], line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))

    print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        listOfObjectsDetected = []
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    listOfObjectsDetected.append(names[int(cls)])
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                kitchenObjects, livingroomObjects, bathroomObjects, bedroomObjects = read_rooms(
                )
                kitchen_score = len(
                    set(kitchenObjects) & set(listOfObjectsDetected))
                livingroom_score = len(
                    set(livingroomObjects) & set(listOfObjectsDetected))
                bathroom_score = len(
                    set(bathroomObjects) & set(listOfObjectsDetected))
                bedroom_score = len(
                    set(bedroomObjects) & set(listOfObjectsDetected))

                rooms = {
                    'Kitchen': kitchen_score,
                    'Livingroom': livingroom_score,
                    'Bathroom': bathroom_score,
                    'Bedroom': bedroom_score
                }
                textToOverlay = 'Room Detected: ' + str(
                    max(rooms, key=rooms.get))
                cv2.putText(
                    im0, textToOverlay,
                    (int(im0.shape[1] * 0.7), int(im0.shape[0] * 0.05)),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1, cv2.LINE_AA)
                print(textToOverlay)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#20
0
def detect(source,
           weights,
           conf_thres=0.25,
           imgsz=640,
           iou_thres=0.45,
           classes=None,
           device=''):

    # Initialize
    set_logging()
    device = select_device(device)

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=True)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=True)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    label = '%s %.2f' % (names[int(cls)], conf)
                    plot_one_box(xyxy,
                                 im0,
                                 label=label,
                                 color=colors[int(cls)],
                                 line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

    print('Done. (%.3fs)' % (time.time() - t0))
    return im0
示例#21
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA
    half = False

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # model = Darknet('cfg/prune_0.8_yolov3-spp.cfg', (opt.img_size, opt.img_size)).to(device)
    # initialize_weights(model)
    # model.load_state_dict(torch.load('weights/prune_0.8_yolov3-spp-ultralytics.pt')['model'])
    # model.eval()
    # stride = [8, 16, 32]
    # imgsz = check_img_size(imgsz, s=max(stride))  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    # if webcam:
    #     view_img = True
    #     cudnn.benchmark = True  # set True to speed up constant image size inference
    #     dataset = LoadStreams(source, img_size=imgsz)
    # else:
    #     save_img = True
    #     dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    # names = ['1', '2']
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    videopath_list = (
        # '2020double_company',
        # '2020double_company_1',
        # 'child_79_company',
        # 'child_137_huaxia',
        # 'child_137_huaxia_1',
        # 'double_54_zhuhai',
        # 'double_54_zhuhai_1',
        # 'double_59_huaxiaxueyuan',
        # 'double_59_huaxiaxueyuan_1',
        # 'double_990_close_company',
        # 'double_beijing',
        # 'double_beijing_1',
        'single_28_huaxia',
        # 'single_28_huaxia_2',
        # 'single_897_yinchuan',
        # 'single_897_yinchuan_2',
        # 'single_1000_beijng_shoudu',
        # 'single_1000_guangzhjou',
        # 'single_1000_wuhan',
    )

    video_dir_pass = [
        'single_1000_beijng_shoudu_kuan',
        'single_1000_wuhan_kuan',
    ]
    #     video_path='/home/lishuang/Disk/shengshi_data/video_test_split_all/single_1000_beijng_shoudu_test_frame'
    video_dir_path = '/home/lishuang/Disk/shengshi_data/video_test_split_all'
    video_paths = os.listdir(video_dir_path)
    for video_dir in video_paths:
        if video_dir not in videopath_list:
            print(video_dir, " pass")
            continue
        video_path = os.path.join(video_dir_path, video_dir)
        #         if video_dir !='double_54_zhuhai':
        #             continue

        csv_path = os.path.join(video_dir_path, 'video_test_csv',
                                f'{video_dir}_video_cut.csv')
        # csv_path = os.path.join(os.path.join(videopath, ".."), f'{basedirname}_video_cut.csv')

        video_name = []
        video_name_dic = {}
        with open(csv_path) as f:
            lines = f.readlines()[1:]
            for line in lines:
                line = line.rstrip()
                items = line.split(',')
                video_name.append(items[1])
                video_name_dic[items[1]] = [
                    items[2], items[3], items[4], items[5]
                ]

        if os.path.isdir(video_path):
            video_files = os.listdir(video_path)
            alarmvideo_list = {}
            for video_file in video_files:
                if video_file != '616643FEF1380C0E_2019-10-19-11-37-49-812_passenger_00000061_2.mp4':
                    continue
                if video_file[:-4] not in video_name_dic:
                    continue
                videosource = os.path.join(video_path, video_file)
                # if len(os.listdir(videosource))==0:
                #     continue
                save_img = True
                view_img = True
                videodataset = LoadImages(videosource, img_size=imgsz)
                video_file, extension = os.path.splitext(video_file)
                alarmvideo_list[video_file] = 0
                frame_record = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                frame_num = 0
                outvideo = str(Path(out) / video_dir / video_file)
                x1t, y1t, x2t, y2t = video_name_dic[video_file]
                ratio_width = 1
                ratio_height = 1
                x1t = int(x1t) * ratio_width
                x2t = int(x2t) * ratio_width
                y1t = int(y1t) * ratio_height
                y2t = int(y2t) * ratio_height

                if os.path.exists(outvideo):
                    shutil.rmtree(outvideo)  # delete output folder
                os.makedirs(outvideo)  # make new output folder
                for path, img, im0s, vid_cap in videodataset:  #one video
                    img = torch.from_numpy(img).to(device)
                    img = img.half() if half else img.float(
                    )  # uint8 to fp16/32
                    img /= 255.0  # 0 - 255 to 0.0 - 1.0
                    if img.ndimension() == 3:
                        img = img.unsqueeze(0)

                    # Inference
                    t1 = time_synchronized()
                    pred = model(img, augment=opt.augment)[0]

                    # Apply NMS
                    pred = non_max_suppression(pred,
                                               opt.conf_thres,
                                               opt.iou_thres,
                                               classes=opt.classes,
                                               agnostic=opt.agnostic_nms)
                    t2 = time_synchronized()

                    # Apply Classifier
                    if classify:
                        pred = apply_classifier(pred, modelc, img, im0s)

                    boxnum = 0
                    boxnumbody = 0
                    boxnumhead = 0
                    # Process detections
                    for i, det in enumerate(pred):  # detections per image
                        if webcam:  # batch_size >= 1
                            p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                        else:
                            p, s, im0 = path, '', im0s

                        save_path = str(Path(out) / video_dir / Path(p).name)
                        # txt_path = str(Path(out) /video_dir/video_file/ Path(p).stem) + ('_%g' % videodataset.frame if videodataset.mode == 'video' else '')
                        txt_path = str(
                            Path(out) / video_dir / video_file /
                            str(videodataset.frame))
                        s += '%gx%g ' % img.shape[2:]  # print string
                        gn = torch.tensor(
                            im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

                        if det is not None and len(det):
                            # Rescale boxes from img_size to im0 size
                            det[:, :4] = scale_coords(img.shape[2:],
                                                      det[:, :4],
                                                      im0.shape).round()

                            # Print results
                            for c in det[:, -1].unique():
                                n = (
                                    det[:,
                                        -1] == c).sum()  # detections per class
                                s += '%g %ss, ' % (n, names[int(c)]
                                                   )  # add to string

                            # Write results
                            for *xyxy, conf, cls in det:
                                # if cls == 0:
                                #     # label = 'person'
                                #     boxnum += 1
                                #     boxnumbody += 1
                                # elif cls == 1:
                                #     # label = 'head'
                                #     boxnumhead += 1
                                # if point_in_box(box_center, [x1, y1, x2, y2]):
                                #     boxnumhead += 1 * person_result['class'] == 2
                                #     boxnumbody += 1 * person_result['class'] == 1
                                if save_txt:  # Write to file
                                    xywh = (xyxy2xywh(
                                        torch.tensor(xyxy).view(1, 4)) /
                                            gn).view(
                                                -1).tolist()  # normalized xywh
                                    with open(txt_path + '.txt', 'a') as f:
                                        x0, y0, w0, h0 = xywh
                                        h, w = im0.shape[:2]
                                        x0 *= w
                                        y0 *= h
                                        w0 *= w
                                        h0 *= h
                                        x1 = x0 - w0 / 2
                                        y1 = y0 - h0 / 2
                                        if point_in_box([x0, y0],
                                                        [x1t, y1t, x2t, y2t]):
                                            boxnumhead += 1 * cls == 1
                                            boxnumbody += 1 * cls == 0
                                        f.write(('%s ' + '%.2g ' + '%d ' * 3 +
                                                 '%d' + '\n') %
                                                (names[int(cls)], conf, x1, y1,
                                                 w0, h0))  # label format
                                #                             f.write(('%ss '+'%.2g ' * 5 + '\n') % (names[int(cls)], conf,*xywh))  # label format

                                if save_img or view_img:  # Add bbox to image
                                    label = '%s %.2f' % (names[int(cls)], conf)
                                    plot_one_box(xyxy,
                                                 im0,
                                                 label=label,
                                                 color=colors[int(cls)],
                                                 line_thickness=3)

                        # Print time (inference + NMS)
                        print('%sDone. (%.3fs)' % (s, t2 - t1))

                        # Stream results
                        if view_img:
                            cv2.imshow(p, im0)
                            if cv2.waitKey(1) == ord('q'):  # q to quit
                                raise StopIteration

                        # Save results (image with detections)
                        if save_img:
                            if videodataset.mode == 'images':
                                cv2.imwrite(save_path, im0)
                            else:
                                if vid_path != save_path:  # new video
                                    vid_path = save_path
                                    if isinstance(vid_writer, cv2.VideoWriter):
                                        vid_writer.release(
                                        )  # release previous video writer

                                    fourcc = 'mp4v'  # output video codec
                                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                                    w = int(
                                        vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                                    h = int(
                                        vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                                    vid_writer = cv2.VideoWriter(
                                        save_path,
                                        cv2.VideoWriter_fourcc(*fourcc), fps,
                                        (w, h))
                                vid_writer.write(im0)
                                image_path = os.path.join(
                                    outvideo,
                                    str(videodataset.frame) + '.jpg')
                                cv2.imwrite(image_path, im0)
                        if boxnumbody > 1 or boxnumhead > 1:
                            frame_record[frame_num % 10] = 1
                        else:
                            frame_record[frame_num % 10] = 0
                        frame_num += 1
                        if alarmvideo_list[video_file] == 0 and sum(
                                frame_record) > 7:
                            alarmvideo_list[video_file] = 1
                            image_path = os.path.join(
                                outvideo,
                                str(videodataset.frame) + '_alarmvideo.jpg')
                            cv2.imwrite(image_path, im0)
            file_data = ""
            for single_video in alarmvideo_list:
                file_data += str(single_video) + ', value: ' + str(
                    alarmvideo_list[single_video]) + '\n'
            with open(
                    f'{os.path.basename(video_path)}_video_result_{opt.conf_thres}.txt',
                    'a') as f:
                f.write(file_data)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    def detect(self,
               app,
               weights,
               source='data/images',
               image_size=736,
               debug=False):
        """ Detects the image or video of the given source by using the specified weights.

        Parameters
        ----------
        weights: str
            Weights path.
        source: str
            Source of the detection. 0 for webcam.
        image_size: int
            Inference size (pixels).
        debug: bool
            Whether the debug mode is on.
        """
        print(f'Detecting using weights: {weights}')
        source = str(source)
        imgsz = image_size
        opt_project = 'runs/detect'
        opt_name = 'exp'
        opt_exist_ok = False
        opt_device = ''
        opt_augment = False
        opt_conf_thres = 0.25
        opt_iou_thres = 0.01
        opt_classes = 0
        opt_agnostic_nms = True
        opt_save_conf = False
        webcam = False

        if source.isnumeric() or source.endswith(
                '.txt') or source.lower().startswith(
                    ('rtsp://', 'rtmp://', 'http://')):
            print(source)
            webcam = source.isnumeric() or source.endswith(
                '.txt') or source.lower().startswith(
                    ('rtsp://', 'rtmp://', 'http://'))

        # Initialize
        set_logging()
        device = select_device(opt_device)
        half = device.type != 'cpu'  # half precision only supported on CUDA

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
        if half:
            model.half()  # to FP16

        # Second-stage classifier
        classify = False
        if classify:
            modelc = load_classifier(name='resnet101', n=2)  # initialize
            modelc.load_state_dict(
                torch.load('weights/resnet101.pt',
                           map_location=device)['model']).to(device).eval()

        # Set Dataloader
        vid_path, vid_writer = None, None
        if webcam:
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz)
            print("webcam")  # debug
            logging.debug("webcam")
        else:
            dataset = LoadImages(source, img_size=imgsz)
            print("image")  # debug

        # Get names and colors
        names = model.module.names if hasattr(model, 'module') else model.names
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        t0 = time.time()
        img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
        _ = model(img.half() if half else img
                  ) if device.type != 'cpu' else None  # run once
        logging.debug("start inference")
        for path, img, im0s, vid_cap in dataset:
            if self.detection is False and webcam is True:
                logging.debug("kill thread")
                dataset.kill_thread()
                break
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt_augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       opt_conf_thres,
                                       opt_iou_thres,
                                       classes=opt_classes,
                                       agnostic=opt_agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0, frame = path, '', im0s, getattr(
                        dataset, 'frame', 0)

                #p = Path(p)  # to Path
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f'{n} {names[int(c)]}s, '  # add to string

                    # Write results
                    for *xyxy, conf, cls in reversed(det):
                        logging.debug('Prediction: {}; Confidence {}'.format(
                            f'{names[int(cls)]}', f'{conf:.2f}'))
                        label = ''.join(
                            map(lambda x: x
                                if x.islower() else ' ' + x, names[int(cls)]))
                        label = f'{label} {conf:.2f}' if debug else label
                        if debug:
                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=colors[int(cls)],
                                         line_thickness=3)
                        plot_one_point(xyxy,
                                       im0,
                                       label=label,
                                       color=colors[int(cls)],
                                       point_thickness=None,
                                       r=10)

                app.image = QImage(bytearray(im0), im0.shape[1], im0.shape[0],
                                   QImage.Format_RGB888).rgbSwapped()
                app.Label_Bild.setPixmap(QPixmap(app.image))
                time.sleep(1 / 60)

                # Print time (inference + NMS)
                print(f'{s}Done. ({t2 - t1:.3f}s)')

        logging.debug(f'Done. ({time.time() - t0:.3f}s)')
        print(f'Done. ({time.time() - t0:.3f}s)')
示例#23
0
def detect(save_img=False):
    '''
    input: save_img_flag
    output(result):
    '''
    # 获取输出文件夹,输入路径,权重,参数等参数
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    # 获取设备
    device = select_device(opt.device)
    # 移除之前的输出文件夹,并新建输出文件夹
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # 如果设备为gpu,使用Float16
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    # 加载Float32模型,确保用户设定的输入图片分辨率能整除最大步长s=32(如不能则调整为能整除并返回)
    '''
    model = Model(
                  (model): Sequential(
                                       (0): Focus(...)
                                       (1): Conv(...)
                                            ...
                                       (24): Detect(...)
                    )
    '''
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # 设置Float16
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    # 通过不同的输入源来设置不同的数据加载方式
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    # 获取类别名字    names = ['person', 'bicycle', 'car',...,'toothbrush']
    names = model.module.names if hasattr(model, 'module') else model.names
    # 设置画框的颜色    colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    # 进行一次前向推理,测试程序是否正常  向量维度(1,3,imgsz,imgsz)
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    """
        path 图片/视频路径  'E:\...\bus.jpg'
        img 进行resize+pad之后的图片   1*3*re_size1*resize2的张量 (3,img_height,img_weight)
        img0 原size图片   (img_height,img_weight,3)          
        cap 当读取图片时为None,读取视频时为视频源   
    """
    for path, img, im0s, vid_cap in dataset:
        print(img.shape)
        img = torch.from_numpy(img).to(device)
        # 图片也设置为Float16
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        # 没有batch_size的话则在最前面添加一个轴
        if img.ndimension() == 3:
            # (in_channels,size1,size2) to (1,in_channels,img_height,img_weight)
            img = img.unsqueeze(0)  # 在[0]维增加一个维度

        # Inference
        t1 = time_synchronized()
        """
        model:
        input: in_tensor (batch_size, 3, img_height, img_weight)
        output: 推理时返回 [z,x]
        z tensor: [small+medium+large_inference]  size=(batch_size, 3 * (small_size1*small_size2 + medium_size1*medium_size2 + large_size1*large_size2), nc)
        x list: [small_forward, medium_forward, large_forward]  eg:small_forward.size=( batch_size, 3种scale框, size1, size2, [xywh,score,num_classes]) 
        '''
               
        前向传播 返回pred[0]的shape是(1, num_boxes, nc)
        h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
        num_boxes = 3 * h/32 * w/32 + 3 * h/16 * w/16 + 3 * h/8 * w/8
        pred[0][..., 0:4] 预测框坐标为xywh(中心点+宽长)格式
        pred[0][..., 4]为objectness置信度
        pred[0][..., 5:5+nc]为分类结果
        pred[0][..., 5+nc:]为Θ分类结果
        """
        # pred : (batch_size, num_boxes, no)  batch_size=1
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        # 进行NMS
        # pred : list[tensor(batch_size, num_conf_nms, [xylsθ,conf,classid])] θ∈[0,179]
        #pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        pred = rotate_non_max_suppression(pred,
                                          opt.conf_thres,
                                          opt.iou_thres,
                                          classes=opt.classes,
                                          agnostic=opt.agnostic_nms,
                                          without_iouthres=False)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(
                pred
        ):  # i:image index  det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179]
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)  # 图片保存路径+图片名字
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            #print(txt_path)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :5] = scale_labels(img.shape[2:], det[:, :5],
                                          im0.shape).round()

                # Print results    det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179]
                for c in det[:, -1].unique(
                ):  # unique函数去除其中重复的元素,并按元素(类别)由大到小返回一个新的无元素重复的元组或者列表
                    n = (det[:, -1] == c
                         ).sum()  # detections per class  每个类别检测出来的素含量
                    s += '%g %ss, ' % (n, names[int(c)]
                                       )  # add to string 输出‘数量 类别,’

                # Write results  det:(num_nms_boxes, [xywhθ,conf,classid]) θ∈[0,179]
                for *rbox, conf, cls in reversed(
                        det):  # 翻转list的排列结果,改为类别由小到大的排列
                    # rbox=[tensor(x),tensor(y),tensor(w),tensor(h),tsneor(θ)] θ∈[0,179]
                    # if save_txt:  # Write to file
                    #     xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    #     with open(txt_path + '.txt', 'a') as f:
                    #         f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        classname = '%s' % names[int(cls)]
                        conf_str = '%.3f' % conf
                        rbox2txt(rbox, classname, conf_str,
                                 Path(p).stem,
                                 str(out + '/result_txt/result_before_merge'))
                        #plot_one_box(rbox, im0, label=label, color=colors[int(cls)], line_thickness=2)
                        plot_one_rotated_box(rbox,
                                             im0,
                                             label=label,
                                             color=colors[int(cls)],
                                             line_thickness=1,
                                             pi_format=False)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results 播放结果
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                    pass
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('   Results saved to %s' % Path(out))

    print('   All Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False):
    out, source, weights, imgsz = \
        opt.output, opt.source, opt.weights, opt.img_size
    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None

    save_img = True
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half()
              if half else img)  #if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Write results
                img2 = im0.copy()
                nperson = []
                nname = []
                for *xyxy, conf, cls in reversed(det):
                    if save_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        ########################################################################################################
                        ##classes 변수 생성 (이름)
                        classes = names[int(cls)]
                        ##classes 변수 함수에 추가
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3,
                                     classes=classes)
                        ##사람이라고 판단한 물체의 각 좌표 리스트에 저장
                        if classes == 'person':
                            nperson.append([
                                int(xyxy[0]),
                                int(xyxy[1]),
                                int(xyxy[2]),
                                int(xyxy[3])
                            ])
                        if classes == 'name_tag':
                            nname.append([
                                int(xyxy[0]),
                                int(xyxy[1]),
                                int(xyxy[2]),
                                int(xyxy[3])
                            ])
                ##사람이 아닌 리스트의 크기가 0보다 클 때 미리 복사해둔 프레임의 구역으로 이미지 덮기
                if len(nname) > 0:
                    for ii in range(len(nname)):
                        for pi in range(len(nperson)):
                            if nname[ii][1] >= nperson[pi][1] and nname[ii][
                                    3] <= nperson[pi][3] and nname[ii][
                                        0] >= nperson[pi][0] and nname[ii][
                                            2] <= nperson[pi][2]:
                                proi = img2[nname[ii][1]:nname[ii][3],
                                            nname[ii][0]:nname[ii][2]]
                                cv2.imwrite(
                                    "./temp/{0}_{1}_{2}_{3}.jpg".format(
                                        nname[ii][1], nname[ii][3],
                                        nname[ii][0], nname[ii][2]), proi)
                                roi = img2[nperson[pi][1]:nperson[pi][3],
                                           nperson[pi][0]:nperson[pi][2]]
                                im0[nperson[pi][1]:nperson[pi][3],
                                    nperson[pi][0]:nperson[pi][2]] = roi
                ########################################################################################################
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Save results (image with detections)
            if save_img:
                if vid_path != save_path:  # new video
                    vid_path = save_path
                    if isinstance(vid_writer, cv2.VideoWriter):
                        vid_writer.release()  # release previous video writer
                    fourcc = 'mp4v'  # output video codec
                    fps = vid_cap.get(cv2.CAP_PROP_FPS)
                    w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                    h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                    vid_writer = cv2.VideoWriter(
                        './inference/output/output.mp4',
                        cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                vid_writer.write(im0)
示例#25
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    print('reading in Deep Sort Readings')
    DSOutput = pd.read_csv('/content/outputs.txt', sep=' ', header=None)

    print('Successfully read in Deep Sort Reading')
    print(DSOutput)

    history = defaultdict(list)

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    # print(imgsz)
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for frameNumber, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        print('pred shape', len(pred))
        # Process detections

        for i, det in enumerate(pred):  # detections per image
            print('det shape', det.shape)
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                num_cat = 6

                classes = (det[:, -1].cpu().numpy()).astype(int)
                one_hot_cats = np.eye(num_cat)[classes].reshape(-1, num_cat)

                counts_per_cat = one_hot_cats.sum(axis=0)
                #print("Countspercat ", counts_per_cat)
                score = counts_per_cat[[1, 3, 5]].sum() / len(det)
                #ConfidenceMetric =

                weighted_counts_per_cat = one_hot_cats.T @ np.asarray(
                    det[:, -2].cpu())
                WeightedCompliance = weighted_counts_per_cat[[
                    1, 3, 5
                ]].sum() / weighted_counts_per_cat.sum()

                # print(score)
                # print(WeightedCompliance)

                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()
                # print(det)

                person_coords = DSOutput[DSOutput.iloc[:, 0] ==
                                         frameNumber].values.reshape(-1, 11)

                CurrentFrameDetection = -1 * np.zeros(len(det))

                if (len(person_coords != 0)):
                    for itemp, mask_coord in enumerate(det):

                        # overlaps = [Overlap(mask_coord[:4], person_coord, img.shape[2], img.shape[3]) for person_coord in person_coords[:,2:6]]
                        overlaps = [
                            Overlap(mask_coord[:4].cpu(), person_coord, 10000,
                                    10000)
                            for person_coord in person_coords[:, 2:6]
                        ]

                        best_overlap = np.argmax(overlaps)
                        best_person = person_coords[best_overlap, 1]
                        history[best_person].append(
                            mask_coord[-1].cpu().item())
                        CurrentFrameDetection[itemp] = best_person
                #print(frameNumber , history)
                # Print results
                #0 - no mask
                #1 - non medical full
                #2 - non medical partial
                #3 - medical full
                #4 medical partial
                #5 face shield
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # print("n: " , n)
                    # print("c: " , c)
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                #save_txt = True
                CurrentFrameDetection = list(reversed(CurrentFrameDetection))
                for mask, (*xyxy, conf, cls) in enumerate(reversed(det)):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        # print(xywh)
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     score,
                                     label=label,
                                     color=colors[int(cls)],
                                     personid=CurrentFrameDetection[mask],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')
            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:

        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    compliance = 0
    total = 0
    for k, v in history.items():
        # 1,3,5 are full
        # 2,4 are partial
        # 0 no
        good_frames = sum(np.array(v) % 2 == 1)
        bad_frames = sum(np.array(v) % 2 == 0)
        if len(v) > 4:
            total += 1
            if good_frames >= bad_frames:
                compliance += 1
                print('Person {} is compliant'.format(k))
            else:
                print('Person {} is not compliant'.format(k))
    print('overall compliance', compliance / total)

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#26
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # take a pic !!
    img_arr, depth_arr = realsense.get_image(show=False)  #array RGB
    # print('shape=',depth_arr.shape)  #(480, 640)
    print('central depth=', depth_arr[
        240,
        320])  #depth at center in mm      #should it be depth_arr[row,col]?
    x1, y1, x2, y2, depth_avg = 0, 0, 0, 0, 0
    xyz_obj = np.array([0, 0, 0])
    K = np.array([[609.674560546875, 0.0, 323.9862365722656],
                  [0.0, 608.5648193359375, 227.5126495361328],
                  [0.0, 0.0, 1.0]])  # intrinsic ?

    # convert
    img_pad = letterbox(img_arr[:, :, ::-1],
                        new_shape=imgsz)[0]  # first to BGR and padding
    img_pad = img_pad[:, :, ::-1].transpose(2, 0,
                                            1)  # BGR to RGB, to 3x416x416
    img_pad = np.ascontiguousarray(img_pad)  # 将一个内存不连续存储的数组转换为内存连续存储的数组
    # cv2.imwrite('letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image

    realsense_once = 1
    # 1: take an image from realsense and infer without imwrite locally;
    # 0: infer all images from the path, including one taken from realsense
    if realsense_once == 0:
        cv2.imwrite('./inference/images/snap.jpg',
                    cv2.cvtColor(img_arr,
                                 cv2.COLOR_RGB2BGR))  #opencv assume BGR

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference !!
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    # infer one image from realsense
    if realsense_once:
        img = img_pad
        im0s = img_arr[:, :, ::-1]  # BGR
        path = '/home/hanwen/test_ros_ws/src/yolov5_test/scripts/inference/images/snap.jpg'
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    # average depth
                    x1, y1, x2, y2 = xyxy[0].cpu().numpy(), xyxy[1].cpu(
                    ).numpy(), xyxy[2].cpu().numpy(), xyxy[3].cpu().numpy()
                    print('x1,y1,x2,y2 = ', x1, y1, x2, y2)
                    xc, yc = (x1 + x2) / 2, (y1 + y2) / 2
                    x1c, x2c, y1c, y2c = (x1 + xc) / 2, (x2 + xc) / 2, (
                        y1 + yc) / 2, (y2 + yc) / 2
                    depth_4samples = [
                        depth_arr[int(y1c), int(x1c)], depth_arr[int(y1c),
                                                                 int(x2c)],
                        depth_arr[int(y2c), int(x1c)], depth_arr[int(y2c),
                                                                 int(x2c)]
                    ]
                    print('depth_4samples:', depth_4samples)
                    depth_validsamples = [b for b in depth_4samples if b > 0]
                    depth_avg = np.mean(depth_validsamples)
                    print('depth for grasping =', depth_avg)
                    xyz_obj = np.dot(np.linalg.inv(K),
                                     depth_avg *
                                     np.array([xc, yc, 1]).transpose()
                                     )  # estimated object center-XYZ (mm)
                    xyz_obj = xyz_obj.transpose()

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        im0 = np.array(
                            im0
                        )  # this fix the error https://github.com/opencv/opencv/issues/18120
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)  # error

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))  #done with one det

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    # realsense_once=0, use the default dataloader
    else:
        for path, img, im0s, vid_cap in dataset:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(pred,
                                       opt.conf_thres,
                                       opt.iou_thres,
                                       classes=opt.classes,
                                       agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                save_path = str(Path(out) / Path(p).name)
                txt_path = str(Path(out) / Path(p).stem) + (
                    '_%g' % dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string

                    # Write results
                    for *xyxy, conf, cls in reversed(det):
                        # average depth
                        x1, y1, x2, y2 = xyxy[0].cpu().numpy(), xyxy[1].cpu(
                        ).numpy(), xyxy[2].cpu().numpy(), xyxy[3].cpu().numpy(
                        )
                        print('x1,y1,x2,y2 = ', x1, y1, x2, y2)
                        xc, yc = (x1 + x2) / 2, (y1 + y2) / 2
                        x1c, x2c, y1c, y2c = (x1 + xc) / 2, (x2 + xc) / 2, (
                            y1 + yc) / 2, (y2 + yc) / 2
                        depth_4samples = [
                            depth_arr[int(y1c), int(x1c)], depth_arr[int(y1c),
                                                                     int(x2c)],
                            depth_arr[int(y2c), int(x1c)], depth_arr[int(y2c),
                                                                     int(x2c)]
                        ]
                        print('depth_4samples:', depth_4samples)
                        depth_validsamples = [
                            b for b in depth_4samples if b > 0
                        ]
                        depth_avg = np.mean(depth_validsamples)
                        print('depth for grasping =', depth_avg)
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * 5 + '\n') %
                                        (cls, *xywh))  # label format

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=colors[int(cls)],
                                         line_thickness=3)

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))  #done with one det

                # Stream results
                if view_img:
                    cv2.imshow(p, im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release(
                                )  # release previous video writer

                            fourcc = 'mp4v'  # output video codec
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(
                                save_path, cv2.VideoWriter_fourcc(*fourcc),
                                fps, (w, h))
                        vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('All Done. (%.3fs)' % (time.time() - t0))  #done with all imgs
    return x1, y1, x2, y2, xyz_obj[0], xyz_obj[1], xyz_obj[2], depth_avg
def detect(save_img=False):

    plate_detect_model = os.path.join(pwd, 'models', 'best.pt')
    corner_detect_model = os.path.join(pwd, 'models',
                                       'corner_epoch_227_valoss_0.000117.pt')
    recognition_crnn_model = os.path.join(pwd, 'models',
                                          'checkpoint_123_acc_0.9940.pth')
    corner_model = cornernet()
    corner_model.eval()  # 验证模式

    out, weights, view_img, save_txt, imgsz = \
        opt.output, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    generate_crnn_trainset = False
    if generate_crnn_trainset:
        source = os.path.join(pwd, 'CCPD2019', 'ccpd_base')
        save_path_train = os.path.join(pwd, 'data', 'crnn', 'warpimg')
        save_path_test = os.path.join(pwd, 'data', 'crnn', 'test')
        ftrain = open(
            os.path.join(pwd, 'CRNN_Chinese_Characters_Rec', 'lib',
                         'train_own.txt'), 'w')
        ftest = open(
            os.path.join(pwd, 'CRNN_Chinese_Characters_Rec', 'lib',
                         'test_own.txt'), 'w')
        if not os.path.exists(save_path_train):
            os.makedirs(save_path_train)
        if not os.path.exists(save_path_test):
            os.makedirs(save_path_test)
        all_length = len(os.listdir(source))
    else:
        # source = os.path.join(pwd, 'data', 'stage1', 'test')
        source = r'/data/CCPD2019/ccpd_challenge'
    print('image\' length is: ', len(os.listdir(source)))
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(plate_detect_model,
                         map_location=device)  # load FP32 model
    corner_model_state = torch.load(corner_detect_model, map_location=device)
    corner_model.load_state_dict(corner_model_state['model_state_dict'])
    print('loaded the plate detected model:', plate_detect_model)
    print('loaded the corner detected model:', corner_detect_model)
    crnn_train_data = r''

    alphabet = '0123456789abcdefghjklmnpqrstuvwxyz云京冀吉宁川新晋桂沪津浙渝湘琼甘皖粤苏蒙西豫贵赣辽鄂闽陕青鲁黑'
    converter = utils.strLabelConverter(alphabet)
    nclass = len(alphabet) + 1
    # crnn_model = crnn.CRNN(32, 1, nclass, 190)
    if not generate_crnn_trainset:
        crnn_model = crnn.CRNN(32, 1, nclass, 256)
        checkpoint = torch.load(recognition_crnn_model)
        print('loaded the corner recognition model:', recognition_crnn_model)
        if 'state_dict' in checkpoint.keys():
            crnn_model.load_state_dict(checkpoint['state_dict'])
        else:
            crnn_model.load_state_dict(checkpoint)
    if torch.cuda.is_available():
        model = model.cuda()
        corner_model = corner_model.cuda()
        if not generate_crnn_trainset:
            crnn_model = crnn_model.cuda()
            crnn_model.eval()

    corner_model.eval()
    model.eval()

    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    count = 0
    n_correct = 0
    false_img = []
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        count += 1
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format
                    c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]),
                                                            int(xyxy[3]))
                    imgple = im0[int(xyxy[1]):int(xyxy[3]),
                                 int(xyxy[0]):int(xyxy[2]), :]
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label='',
                                     color=colors[int(cls)],
                                     line_thickness=3)
                    warp_img, im0 = corner_detection(corner_model, imgple, im0,
                                                     xyxy, colors, cls, c1, c2)
                    if generate_crnn_trainset:
                        name = save_path.split(os.sep)[-1]
                        string = name.replace('.jpg', '').replace(
                            '.png',
                            '').replace('&', '_').replace('-', '_').split('_')
                        platestring = chinesechar_reverse[int(string[15])]
                        for ij in string[16:-2]:
                            platestring += alphaenglish_reverse[int(ij)]
                        platestring = platestring.upper()
                        if count < 3800:
                            ftest.write(
                                os.path.join(save_path_test, name) + ' ' +
                                platestring + '\n')
                            cv2.imwrite(os.path.join(save_path_test, name),
                                        warp_img)
                        else:
                            ftrain.write(
                                os.path.join(save_path_train, name) + ' ' +
                                platestring + '\n')
                            cv2.imwrite(os.path.join(save_path_train, name),
                                        warp_img)
                    else:
                        try:
                            name = save_path.split(os.sep)[-1].replace(
                                '.jpg', '').replace('.png', '').replace(
                                    '&', '_').replace('-', '_').split('_')
                            platestring = chinesechar_reverse[int(name[15])]
                            for ij in name[16:-2]:
                                platestring += alphaenglish_reverse[int(ij)]
                            platestring = platestring.upper()
                        except:
                            pass
                        #
                        sim_pred = recognition(warp_img, crnn_model, converter)
                        sim_pred = sim_pred.upper()
                        #
                        try:
                            if sim_pred == platestring:
                                print()
                                n_correct += 1
                            else:
                                false_img.append(platestring)
                                cv2.imwrite(save_path, warp_img)
                            # print('gt: ', platestring, ',predict: ', sim_pred, ' ', sim_pred == platestring, ' ', n_correct,
                            #       ' ', count)
                        except:
                            pass
                        #
                        sim_pred = sim_pred[:2] + ' ' + sim_pred[2:]
                        print('predict: ', sim_pred)
                        img_PIL = Image.fromarray(im0).convert('RGB')
                        font = ImageFont.truetype(
                            '/usr/share/fonts/truetype/wqy/wqy-microhei.ttc',
                            50)  #('simsun.ttc', 50)
                        # 字体颜色
                        fillColor = (255, 0, 0)
                        draw = ImageDraw.Draw(img_PIL)
                        draw.text((
                            c1[0],
                            c1[1] - 39,
                        ),
                                  sim_pred,
                                  font=font,
                                  fill=fillColor)
                        im0 = np.asarray(img_PIL)
            # Print time (inference + NMS)
            # print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img and (not generate_crnn_trainset):
                if dataset.mode == 'images':
                    # pass
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)
    if generate_crnn_trainset:
        print('生成的CRNN训练集数量:{},测试集数量是:{}'.format(
            len(os.listdir(save_path_train)), len(os.listdir(save_path_test))))
    else:
        try:
            print('n_correct: {},count: {},test accuray: '.format(
                n_correct, count, n_correct / count))
            print('false_image', false_img)
        except:
            pass
    if generate_crnn_trainset:
        ftrain.close()
        ftest.close()
    print('Done. (%.3fs)' % (time.time() - t0))
示例#28
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))
    file_name = []
    file_code = []
    # result = dict()

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir if save_txt else save_dir).mkdir(parents=True,
                                               exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                x_value = dict()
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        cls = torch.tensor(cls).tolist()
                        x_value[xywh[0]] = int(cls)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

        file_name.append(os.path.split(path)[-1])
        res = ''
        for key in sorted(x_value):
            res += str(x_value[key])
        file_code.append(res)
    save_csv_path = str(
        os.getcwd()) + '\\' + str(save_dir) + '\\submission.csv'
    print(save_csv_path)
    sub = pd.DataFrame({"file_name": file_name, 'file_code': file_code})
    sub.to_csv(save_csv_path, index=False)

    print(f'Done. ({time.time() - t0:.3f}s)')
示例#29
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    if opt.fixed_colors:
        rng = random.RandomState(seed=1825)
        colors = [[rng.randint(0, 255) for _ in range(3)] for _ in names]
    else:
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = Path(
                    path[i]), '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = Path(path), '', im0s, getattr(
                    dataset, 'frame', 0)

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f'{n} {names[int(c)]}s, '  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=opt.line_thickness)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False):
    logging.basicConfig(filename='detect.log', level=logging.INFO)
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))
    
    imglist = opt.imlist
    source_list = source.split('\n') 
    
    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
#     save_dir = Path(increment_path(Path(opt.project) / opt.name))  # increment run
#     (save_dir / 'labels' if save_txt else save_dir).mkdir  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = False
        if imglist:
            dataset_list = []
            for i in range(len(source_list)):
                print("source_list : ", source_list[i])
                try:
                    dataset_list.append(LoadImages(source_list[i], img_size=imgsz, stride=stride))
                except:
                    print("error!!!!!: ", source_list[i])

        else:    
            dataset = LoadImages(source, img_size=imgsz, stride=stride)
    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    
     #sujin
    if imglist:
        j = len(dataset_list)
        print("imglist True,  j = ", j)
    else:
        j = 1
        print("imglist False, j = ", j)
        
    
    count = 0
    for k in range(j):

        if imglist:
            dataset = dataset_list[k]
            for path, img, im0s, vid_cap in dataset:

                img = torch.from_numpy(img).to(device)
                img = img.half() if half else img.float()  # uint8 to fp16/32
                img /= 255.0  # 0 - 255 to 0.0 - 1.0


                if img.ndimension() == 3:
                    img = img.unsqueeze(0)

                # Inference
                t1 = time_synchronized()
                pred = model(img, augment=opt.augment)[0]

                # Apply NMS
                pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
                t2 = time_synchronized()

                # Apply Classifier
                if classify:
                    pred = apply_classifier(pred, modelc, img, im0s)
                prevTime = 0

                # Process detections
                for i, det in enumerate(pred):  # detections per image

                    curTime = time.time() * 1000
                    sec = curTime - prevTime
                    prevTime = curTime #이전 시간을 현재시간으로 다시 저장시킴

                    if webcam:  # batch_size >= 1
                        p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
                        fps_ = 1/(sec)

                    else:
                        p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

                    p = Path(p)  # to Path
                    print("p.name=", p.name)

                    save_path = str(save_dir / p.name)  # img.jpg
                    txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
                    s += '%gx%g ' % img.shape[2:]  # print string
                    gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

                    if len(det):
                        # Rescale boxes from img_size to im0 size
                        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                        # Print results
                        for c in det[:, -1].unique():
                            n = (det[:, -1] == c).sum()  # detections per class
                            s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                        # Write results
                        for *xyxy, conf, cls in reversed(det):
                            if save_txt:  # Write to file
                                xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                                line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                                with open(txt_path + '.txt', 'a') as f:
                                    f.write(('%g ' * len(line)).rstrip() % line + '\n')

                            if save_img or view_img:  # Add bbox to image
                                label = f'{names[int(cls)]} {conf:.2f}'
                                pt_start = time.time()*1000

                                plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)

                                pt_end = time.time()*1000  
                                
                        
                        if os.path.exists(result_dir)  == False: 
                        	os.makedirs(result_dir)
                                                	
                        with open(result_dir + p.name[4:-4] + '.txt', 'w') as f:
                            xmin=(int(xyxy[0]))
                            ymin=(int(xyxy[1]))
                            xmax=(int(xyxy[2]))
                            ymax=(int(xyxy[3]))
                            h, w, bs = im0.shape

                            print("bs h w = ",bs, h,  w)
                            absolute_x = xmin + 0.5 * (xmax - xmin)
                            absolute_y = ymin + 0.5 * (ymax - ymin)

                            absolute_width = xmax - xmin
                            absolute_height = ymax - ymin

                            x = str(absolute_x / w)
                            y = str(absolute_y / h)
                            width = str(absolute_width / w)
                            height = str(absolute_height / h)

                            f.write(str(int(cls))+ " " + x + " " + y + " " + width + " " + height)
                            count += 1
                    else:
                        tl = 3 or round(0.002 * (im0.shape[0] + im0.shape[1]) / 2) + 1  # line/font thickness
                        tf = max(tl - 1, 1)  # font thickness


                    # Print time (inference + NMS)
                    print(f'{s}Done. ({t2 - t1:.3f}s)')


    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')