Exemplo n.º 1
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    #Load model 2
    model1 = attempt_load('gunNduckAugmentFinal.pt',
                          map_location=device)  # load FP32 model

    if half:
        model.half()  # to FP16
        #model 2 helf
        model1.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    #model 2 Get names and colors
    names1 = model1.module.names if hasattr(model1, 'module') else model1.names
    colors1 = [[random.randint(0, 255) for _ in range(3)]
               for _ in range(len(names1))]

    # Run inference
    toReturn = []
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    #The main loop
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections on model 1
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    toReturn += [
                        names[int(cls)],
                        (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                         gn).view(-1).tolist(),
                        '%.2f' % (conf)
                    ]
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywhs
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
        print('%sDone. (%.3fs)' % (s, t2 - t1))
        if view_img:
            cv2.imshow(p, im0)
            if cv2.waitKey(1) == ord('q'):  # q to quit
                raise StopIteration
        #Load model 2
        pred1 = model1(img, augment=opt.augment)[0]
        pred1 = non_max_suppression(pred1,
                                    opt.conf_thres,
                                    opt.iou_thres,
                                    classes=opt.classes,
                                    agnostic=opt.agnostic_nms)
        # Process detections on model 2
        for i, det in enumerate(pred1):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss ' % (n, names1[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    toReturn += [
                        names1[int(cls)],
                        (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                         gn).view(-1).tolist(),
                        '%.2f' % (conf)
                    ]
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywhs
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names1[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors1[int(cls)],
                                     line_thickness=3)

            print('%sDone. (%.3fs)' % (s, t2 - t1))
            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return toReturn
Exemplo n.º 2
0
def run(
        weights='yolov5s.pt',  # model.pt path(s)
        source='data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        update=False,  # update all models
        project='runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
):
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check image size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet50', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('resnet50.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=line_thickness)
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 3
0
def detect(save_img=False, write_label=False):
    out, source_type, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source_type, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = None
    if source_type == 'webcam':
        webcam = source.isnumeric() or source.startswith(('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    elif source_type == 'image':
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, rotate=opt.rotate)
    else:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadVideo(source, img_size=imgsz, rotate=opt.rotate)
        vid_path = opt.source

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    # img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # im0s_rotate = np.rot90(im0s, k=opt.rotate, axes=(0, 1)).copy()
        # img0_rotate = copy.deepcopy(np.ascontiguousarray(img0_rotate))

        # img_rotate = np.rot90(img, k=opt.rotate, axes=(0, 1)).copy()
        # img_rotate = copy.deepcopy(np.ascontiguousarray(img_rotate))

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)

            txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'image' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy, im0, label=label, write_label=write_label, color=colors[int(cls)], line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                im0 = cv2.resize(im0, None, fx=opt.stream_scale, fy=opt.stream_scale)
                cv2.imshow(p, im0)
                if dataset.mode == 'video':
                    cv2.waitKey(delay=1)
                elif cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('sults saved to %s' % Path(out))

    print('Done. (%.3fs)' % (time.time() - t0))
def detect_recog():
    source, weights_detect, weights_recog, view_img, save_txt, imgsz_detect, imgsz_recog, save_img = opt.source, opt.weights_detect, opt.weights_recog, opt.view_img, opt.save_txt, opt.img_size_detect, opt.img_size_recog, opt.save_img

    # Set Dataloader
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))
    vid_path, vid_writer = None, None
    shmstream = source.startswith('/tmp/')
    if shmstream:
        source = f"shmsrc socket-path={source} \
                ! video/x-raw, format=BGR, width={int(imgsz_detect*4/3)}, height={imgsz_detect}, pixel-aspect-ratio=1/1, framerate=30/1 \
                ! decodebin \
                ! videoconvert \
                ! appsink"

        dataset = LoadStreamsBuffered(source, img_size=imgsz_detect)
    elif webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz_detect)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz_detect)

    # Directories
    if opt.save_dir == 'runs/exp':
        save_dir = Path(
            increment_path(Path(opt.project) / opt.name,
                           exist_ok=opt.exist_ok))  # increment run
    else:
        save_dir = Path(opt.save_dir)
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model_detect = attempt_load(weights_detect,
                                map_location=device)  # load FP32 model
    model_recog = attempt_load(weights_recog,
                               map_location=device)  # load FP32 model
    imgsz_detect = check_img_size(
        imgsz_detect, s=model_detect.stride.max())  # check img_size
    imgsz_recog = check_img_size(imgsz_recog,
                                 s=model_recog.stride.max())  # check img_size
    if half:
        model_detect.half()  # to FP16
        model_recog.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()
    else:
        modelc = None

    # Get names and colors
    names_detect = model_detect.module.names if hasattr(
        model_detect, 'module') else model_detect.names
    names_recog = model_recog.module.names if hasattr(
        model_recog, 'module') else model_recog.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names_detect]

    t0 = time.time()
    img = torch.zeros((1, 3, imgsz_detect, imgsz_detect),
                      device=device)  # init img
    img_lp = torch.zeros((1, 3, imgsz_recog, imgsz_recog),
                         device=device)  # init img
    if device.type != 'cpu':  # run once
        _ = model_detect(img.half() if half else img)
        _ = model_recog(img.half() if half else img)

    # Run inference
    shmcounter = 0
    for path, img, im0s, vid_cap in dataset:
        if img is None:
            continue

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        t1 = time_synchronized()

        # Inference
        pred = model_detect(img, augment=opt.augment)[0]
        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres_detect,
                                   opt.iou_thres_detect,
                                   classes=opt.classes_detect,
                                   agnostic=opt.agnostic_nms)

        t2 = time_synchronized()
        all_t2_t1 = t2 - t1

        # Print time (inference + NMS)
        print('Done Detection. (%.3fs)' % (all_t2_t1))

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if shmstream:
                p, s, im0 = Path(
                    f"{shmcounter}.jpg"), '%g: ' % i, im0s[i].copy()
                shmcounter += 1
            elif webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')

            s += '%gx%g ' % img.shape[2:]  # print string
            # normalization gain whwh
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    # add to string
                    s += '%g %ss, ' % (n, names_detect[int(c)])

                # Write results
                # But first, Recognition
                all_t2_t1 = recog(det, im0, device, img_lp, imgsz_recog, half,
                                  model_recog, all_t2_t1, classify, modelc,
                                  names_recog, save_txt, gn, txt_path,
                                  save_img, view_img, colors)

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

        # Print time (inference + NMS)
        print('%sDone Recognition. (%.3fs)' % (s, all_t2_t1))

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 5
0
    def pyqt_detect(self, show_frame, tracked_X, tracked_Y):
        self.tracked_X = tracked_X
        self.tracked_Y = tracked_Y
        print("XXXXXXXX", self.tracked_X)
        print("YYYYYYYY", self.tracked_Y)
        source = r'rtsp://' + cam_user + r':' + cam_psw + r'@' + cam_ip + r':' + cam_port + r'/id=1'
        # source=r'rtsp://*****:*****@192.168.1.110/id=1'
        # weights= r'./yolov5x.pt'
        view_img = True
        imgsz = 640
        # = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
        webcam = source.isnumeric() or source.endswith(
            '.txt') or source.lower().startswith(
                ('rtsp://', 'rtmp://', 'http://'))

        # Initialize
        set_logging()
        device = select_device()  # 获取设备
        # 如果设备为GPU 使用float16
        half = device.type != 'cpu'  # half precision only supported on CUDA

        # Load model
        # 加载float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除返回)
        model = attempt_load(weights, map_location=device)  # load FP32 model
        stride = int(model.stride.max())  # model stride
        imgsz = check_img_size(imgsz, s=stride)  # check img_size
        if half:
            # 设置float16
            model.half()  # to FP16

        # Set Dataloader
        # 通过不同的输入源来设置不同的数据加载方式
        vid_path, vid_writer = None, None
        if webcam:
            view_img = check_imshow()
            cudnn.benchmark = True  # set True to speed up constant image size inference
            dataset = LoadStreams(source, img_size=imgsz, stride=stride)
        else:
            save_img = True
            # 如果哦检测视频的时候想显示出来,可以在这里加一行 view_img = True
            view_img = True
            dataset = LoadImages(source, img_size=imgsz, stride=stride)

        # Get names and colors
        # 获取类别名字
        names = model.module.names if hasattr(model, 'module') else model.names
        # 设置画框的颜色
        colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

        # Run inference
        if device.type != 'cpu':
            model(
                torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                    next(model.parameters())))  # run once
        t0 = time.time()
        # 进行一次前向推理,测试程序是否正常
        """
           path 图片/视频路径
           img 进行resize+pad之后的图片
           img0 原size图片
           cap 当读取图片时为None,读取视频时为视频源
        """
        for path, img, im0s, vid_cap in dataset:
            img = torch.from_numpy(img).to(device)
            # 图片也设置为Float16
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            # 没有batch_size的话则在最前面添加一个轴
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=False)[0]
            """
                   前向传播 返回pred的shape是(1, num_boxes, 5+num_class)
                   h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
                   num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
                   pred[..., 0:4]为预测框坐标
                   预测框坐标为xywh(中心点+宽长)格式
                   pred[..., 4]为objectness置信度
                   pred[..., 5:-1]为分类结果
            """

            # Apply NMS
            pred = non_max_suppression(pred,
                                       0.25,
                                       0.45,
                                       classes=None,
                                       agnostic=False)
            t2 = time_synchronized()

            # Process detections
            # 检测每一帧图片
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                    ), dataset.count
                else:
                    p, s, im0, frame = path, '', im0s, getattr(
                        dataset, 'frame', 0)

                p = Path(p)  # to Path
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(im0.shape)[[1, 0, 1,
                                              0]]  # normalization gain whwh
                if len(det):
                    # Rescale boxes from img_size to im0 size
                    # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标
                    # 此时坐标格式为xyxy
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    # 打印检测到的类别数量
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                    # Write results
                    countCls = 0
                    self.predList.clear()

                    checkTracked = False
                    for *xyxy, conf, cls in reversed(det):
                        # 画框
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
                        c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]),
                                                                int(xyxy[3]))

                        # 判断tracked是否与上一针存在联系
                        cX = 0
                        cY = 0
                        # print(self.TRACKFLAG)
                        # print(checkTracked)
                        #print(abs(self.tracked_X - cX), abs(self.tracked_Y - cY))
                        print(abs(self.tracked_X - cX),
                              abs(self.tracked_Y - cY))
                        if self.TRACKFLAG:
                            cX = (c1[0] + c2[0]) / 2
                            cY = (c1[1] + c2[1]) / 2
                            # print((c1[0]+c2[0])/2,(c1[1]+c2[1])/2)

                            # 如果追踪到了则按差值移动相机
                            print('loss:', self.tracked_X - self.midX)
                            if (self.tracked_X - self.midX < -10):
                                print('left:', self.tracked_X - self.midX)
                                self.ipcCon.Move(EnumPtzMoveType.moveleft,
                                                 self.cam_speed)
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                            elif (self.tracked_X - self.midX > 10):
                                print('right:', self.tracked_X - self.midX)
                                self.ipcCon.Move(EnumPtzMoveType.moveright,
                                                 self.cam_speed)
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                            else:

                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                                # self.TRACKFLAG=False

                            if abs(self.tracked_X -
                                   cX) < 50 and abs(self.tracked_Y - cY) < 50:
                                self.tracked_X = cX
                                self.tracked_Y = cY
                                checkTracked = True
                                break
                                # print('tracked_X',self.tracked_X)
                                # print('tracked_Y',self.tracked_Y)
                            else:
                                checkTracked = False
                                self.ipcCon.Move(EnumPtzMoveType.movestop)
                        # 存储当前帧的检测框与下一帧关联起来
                        predBox = (c1, c2)
                        self.predList.append(predBox)
                        # print(c1)
                        # print("******************")

                        # print("ffffffffffffffffffffffff")
                        # print(self.predList[-1])
                        # print("ffffffffffffffffffffffff")
                        # print(label,c1,c2)

                    if checkTracked == False:
                        self.TRACKFLAG = False
                    print(self.TRACKFLAG)
                    # print(checkTracked)

                    # for pred in self.predList:

                    # print(pred[:1])
                # Print time (inference + NMS)
                # 打印前向传播+nms时间
                # print(f'{s}Done. ({t2 - t1:.3f}s)')

                # 调整显示的像素排列
                cv2.cvtColor(im0, cv2.COLOR_BGR2RGB, im0)
                show_frame(im0)
Exemplo n.º 6
0
def run(
        weights=ROOT / 'yolov3.pt',  # model.pt path(s)
        source=ROOT / 'data/images',  # file/dir/URL/glob, 0 for webcam
        imgsz=640,  # inference size (pixels)
        conf_thres=0.25,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=False,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project=ROOT / 'runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        dnn=False,  # use OpenCV DNN for ONNX inference
):
    source = str(source)
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
    is_url = source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://', 'https://'))
    webcam = source.isnumeric() or source.endswith('.txt') or (is_url
                                                               and not is_file)
    if is_url and is_file:
        source = check_file(source)  # download

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=dnn)
    stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Half
    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
    if pt:
        model.model.half() if half else model.model.float()

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source,
                              img_size=imgsz,
                              stride=stride,
                              auto=pt and not jit)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source,
                             img_size=imgsz,
                             stride=stride,
                             auto=pt and not jit)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.model.parameters())))  # warmup
    dt, seen = [0.0, 0.0, 0.0], 0
    for path, im, im0s, vid_cap, s in dataset:
        t1 = time_sync()
        im = torch.from_numpy(im).to(device)
        im = im.half() if half else im.float()  # uint8 to fp16/32
        im /= 255  # 0 - 255 to 0.0 - 1.0
        if len(im.shape) == 3:
            im = im[None]  # expand for batch dim
        t2 = time_sync()
        dt[0] += t2 - t1

        # Inference
        visualize = increment_path(save_dir / Path(path).stem,
                                   mkdir=True) if visualize else False
        pred = model(im, augment=augment, visualize=visualize)
        t3 = time_sync()
        dt[1] += t3 - t2

        # NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        dt[2] += time_sync() - t3

        # Second-stage classifier (optional)
        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # per image
            seen += 1
            if webcam:  # batch_size >= 1
                p, im0, frame = path[i], im0s[i].copy(), dataset.count
                s += f'{i}: '
            else:
                p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # im.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # im.txt
            s += '%gx%g ' % im.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if save_crop else im0  # for save_crop
            annotator = Annotator(im0,
                                  line_width=line_thickness,
                                  example=str(names))
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(im.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        annotator.box_label(xyxy, label, color=colors(c, True))
                        if save_crop:
                            save_one_box(xyxy,
                                         imc,
                                         file=save_dir / 'crops' / names[c] /
                                         f'{p.stem}.jpg',
                                         BGR=True)

            # Print time (inference-only)
            LOGGER.info(f'{s}Done. ({t3 - t2:.3f}s)')

            # Stream results
            im0 = annotator.result()
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer[i] = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer[i].write(im0)

    # Print results
    t = tuple(x / seen * 1E3 for x in dt)  # speeds per image
    LOGGER.info(
        f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}'
        % t)
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}{s}")
    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)
Exemplo n.º 7
0
def Detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        '', '', '', False, '', 640
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    weights = 'weights.pt'
    source = './INPUTS/image.jpg'
    out = './OUTPUTS'
    save_txt = './OUTPUTS'
    device = ''
    augment = False
    conf_thres = 0.5
    iou_thres = 0.6
    classes = None
    agnostic_nms = False

    set_logging()
    device = select_device(device)
    if os.path.exists(out):
        shutil.rmtree(out)
    os.makedirs(out)
    half = device.type != 'cpu'

    model = attempt_load(weights, map_location=device)
    imgsz = check_img_size(imgsz, s=model.stride.max())
    if half:
        model.half()

    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt', map_location=device)['model'])
        modelc.to(device).eval()

    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()
        img /= 255.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        t1 = time_synchronized()
        pred = model(img, augment=augment)[0]

        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=agnostic_nms)
        t2 = time_synchronized()

        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        for i, det in enumerate(pred):
            if webcam:
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if det is not None and len(det):
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()
                    s += '%g %ss, ' % (n, names[int(c)])

                pred_lists = []
                for *xyxy, conf, cls in reversed(det):
                    flag = True
                    if save_txt:
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()
                        pred_str = ('%g ' * 5) % (cls, *xywh)
                        pred_list = pred_str.split(' ')
                        pred_list.pop()
                        pred_list.append('%.2f' % (conf))
                        for item in pred_lists:
                            if abs(float(item[1]) -
                                   float(pred_list[1])) < 0.05 and abs(
                                       float(item[2]) -
                                       float(pred_list[2])) < 0.05 and abs(
                                           float(item[3]) -
                                           float(pred_list[3])) < 0.05 and abs(
                                               float(item[4]) -
                                               float(pred_list[4])) < 0.05:
                                if (float(pred_list[5]) < float(item[5])):
                                    flag = False
                                else:
                                    print(item)
                                    pred_lists.remove(item)
                        if flag:
                            pred_lists.append(pred_list)
                    if (save_img or view_img) and flag:
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
                file_list_preds = []
                for item in pred_lists:
                    pred_str = ' '.join(item)
                    file_list_preds.append(pred_str + '\n')
                with open(txt_path + '.txt', 'w') as fp:
                    fp.writelines(file_list_preds)

            print('%sDone. (%.3fs)' % (s, t2 - t1))

            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):
                    raise StopIteration

            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()

                        fourcc = 'mp4v'
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 8
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz, save_csv = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.save_csv
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    # if os.path.exists(out):
    #     shutil.rmtree(out)  # delete output folder
    # os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # csv output
    csv_output = []

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        with open(txt_path + '.txt', 'a') as f:
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_csv:  # csv output append
                        xyxyNormalized = (torch.tensor(xyxy) / gn).tolist()
                        csv_output.append([
                            Path(p).name,
                            int(cls),
                            float(conf), *xyxyNormalized
                        ])

                    if save_img or view_img:  # Add bbox to image
                        # label = '%s' % (names[int(cls)])
                        label = '%s %.2f' % (names[int(cls)], conf)
                        # plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)])

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    # write csv output
    if save_csv:
        current_time = datetime.datetime.now(
            pytz.timezone('Asia/Seoul')).strftime('%Y-%m-%d-%H-%M')
        with open(
                f'/DL_data_big/AI_factory_dataset/tracking_recognition/ai_factory_tracking/dataset/yolo_output/output_{current_time}.csv',
                'w',
                newline='') as f:
            f_writer = csv.writer(f)
            f_writer.writerow([
                'ImageID', 'LabelName', 'Conf', 'XMin', 'XMax', 'YMin', 'YMax'
            ])
            f_writer.writerows(csv_output)
            print("csv saved")

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform == 'darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 9
0
def detect(opt, dp, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
     opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model

    imgsz = check_img_size(imgsz, s=model.stride.max(
    ))  # check img_size 如果不是32的倍数,就向上取整调整至32的倍数并答应warning

    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if opt.use_roi:
        # print(dp.cl)
        # print(dp.cl[0], dp.cl[1])
        # cl = opt.control_line
        cl = dp.cl
        roi_in_pixels = np.array([0, cl[0], 1280,
                                  cl[1]])  # two points coor, x1, y1, x2, y2
    else:
        roi_in_pixels = None

    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, roi=roi_in_pixels)

    # Get names and colors
    names = model.module.names if hasattr(
        model, 'module') else model.names  # 解决GPU保存的模型多了module属性的问题
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]  # 随机颜色,对应names,names是class

    # fix issue: when single cls, names = ['item'] rather than names = ['crosswalk']
    if 'item' in names:
        names = ['crosswalk']

    # prune
    # torch_utils.prune(model, 0.7)
    model.eval()

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once 空跑一次,释放!!牛逼

    detected_img_id = 0
    time_list = [None] * len(dataset)
    bar = tqdm(dataset)
    for iii, (path, img, im0s, vid_cap, recover) in enumerate(bar):
        # print(img.shape, im0s.shape, vid_cap)
        # exit()

        # img.shape [3, 384, 640] im0s.shape [720, 1280, 3] None
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)  # 从[3, h, w]转换为[batch_size, 3, h, w]的形式

        # Inference
        t1 = time_synchronized()
        # print('aug', opt.augment)  # False
        pred = model(img, augment=opt.augment)[0]
        # print(pred.shape) [1, 15120, 25]
        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()
        infer_time = t2 - t1
        time_list[iii] = t2 - t1

        # print('\n', len(pred), pred, recover)  # list 长度是bs,代表每张图, 元素tensor,代表检测到的目标,每个tensor.shape [n, 6] xy4, conf, cls

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if opt.use_roi and det is not None:
                small_img_shape = torch.from_numpy(
                    np.array([recover[1], recover[0]]).astype(np.float))
                det[:,
                    0], det[:,
                            2] = det[:, 0] + recover[2], det[:, 2] + recover[2]
                det[:,
                    1], det[:,
                            3] = det[:, 1] + recover[3], det[:, 3] + recover[3]
            else:
                small_img_shape = img.shape[2::]
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s  # im0s是原图

            save_path = str(Path(out) /
                            Path(p).name)  # output/filenamexxxx.jpg
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            # output/filenamexxxx.txt
            s += '%gx%g ' % img.shape[2:]  # print string, 640x640
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            # 本来是[720, 1280, 3],重复取,变成[1280, 720, 1280, 720]
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(
                    small_img_shape, det[:, :4],
                    im0.shape).round()  # 转换成原图的x1 y1 x2 y1,像素值
                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)]
                                       )  # add to string # i.e. 1 crosswalk
                # s += f'{det[:, 4].item():.4f} '
                # print(n)

                # Write results
                for *xyxy, conf, cls in det:
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            x, y, w, h = xywh
                            string = f"{int(cls)} {conf.item():.4f} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n"
                            f.write(string)  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        # print(type(im0), im0.shape) array, 720, 1280, 3
                        if names[int(cls)] in opt.plot_classes:
                            # color = colors[int(cls)]
                            color = (255, 85, 33)
                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=color,
                                         line_thickness=5)

            # Print time (inference + NMS)
            prt_str = '%sDone. (%.5fs)' % (s, t2 - t1)
            # print(prt_str)
            os.system(f'echo "{prt_str}" >> {opt.output}/detect.log')

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    im0 = dp.dmpost(im0,
                                    det,
                                    det_id=detected_img_id,
                                    filename=Path(p).name,
                                    names=names)
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    # print(detected_img_id, p, txt_path)
                    tmp_filename = Path(txt_path).stem
                    im0 = dp.dmpost(im0,
                                    det,
                                    det_id=detected_img_id,
                                    filename=tmp_filename,
                                    names=names)
                    vid_writer.write(im0)
            detected_img_id += 1
        bar.set_description(f'inf_time: {infer_time*1000:.2f}ms {prt_str:<40}')

    if save_txt or save_img:
        print('Results saved to %s' % out)
        if platform == 'darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    time_arr = np.array(time_list)
    prnt = f'Done. Network mean inference time: {np.mean(time_arr)*1000:.2f}ms,  Mean FPS: {1/np.mean(time_arr):.2f}.'
    print(f'\nModel size {opt.img_size} inference {prnt}')
    os.system(f'echo "{prnt}" >> {opt.output}/detect.log')
    os.system(f'echo "useroi {opt.img_size} {prnt}" >> detect2.log')
Exemplo n.º 10
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    if opt.labels:
        names = load_labels(opt.labels)
    else:
        names = model.module.names if hasattr(model, 'module') else model.names

    if opt.color == "same":
        # same color for all
        colors = [[255, 0, 255] for _ in range(len(names))]
    else:
        if opt.color == "det":
            random.seed(2)

        colors = [[random.randint(0, 255) for _ in range(3)]
                  for _ in range(len(names))]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    if opt.bboxfilt:
        # Detections filter
        bbox_filter = BboxFilter(30, 5)  # 30x30 pixel grids, 5 deep in time

    # Object sizes
    # se, comml, jet, heli, drone
    OBj_SIZES = (10.0, 30.0, 10.0, 5.0, 0.3)
    CAM_FOVH = 60  # degree

    quit = False

    for path, img, im0s, vid_cap in dataset:

        if quit: break

        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        #print("pred shape", pred.shape)

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(
                pred
        ):  # detections per image, this is always 1  except for maybe (batched images?)
            #print("i det",i, det)

            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            imh, imw, _ = im0.shape

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(
                        xyxy[2]), int(xyxy[3])

                    if opt.bboxfilt:
                        bbox_filter.add((x1, y1, x2, y2), conf.item(),
                                        int(cls))

                    #print("imw", im0.shape, int(cls))

                    # if int(cls) > len(OBj_SIZES):
                    #     sz = 1.0
                    # else:
                    #     sz = OBj_SIZES[int(cls)]

                    # eul, quat, dist = estimate_yaw_pitch_dist(CAM_FOVH, sz, imw, imh, x1, y1, x2, y2)
                    else:
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            line = (cls, *xywh, conf) if opt.save_conf else (
                                cls, *xywh)  # label format
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * len(line)).rstrip() % line +
                                        '\n')

                        if save_img or view_img:  # Add bbox to image
                            label = f'{names[int(cls)]} {conf:.2f}'
                            #     label = '%s %.2f %.1f %.2f %.2f %.2f %.2f' % (names[int(cls)], conf, dist, quat[0], quat[1], quat[2], quat[3])

                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=colors[int(cls)],
                                         line_thickness=2)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            # if view_img:
            #     cv2.imshow(p, im0)
            #     if cv2.waitKey(1) == ord('q'):  # q to quit
            #         raise StopIteration

            # Save results (image with detections)
            # if save_img:
            #     if dataset.mode == 'images':
            #         cv2.imwrite(save_path, im0)
            #     else:
            #         if vid_path != save_path:  # new video
            #             vid_path = save_path
            #             if isinstance(vid_writer, cv2.VideoWriter):
            #                 vid_writer.release()  # release previous video writer

            #             fourcc = 'mp4v'  # output video codec
            #             fps = vid_cap.get(cv2.CAP_PROP_FPS)
            #             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #             vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
            #         vid_writer.write(im0)

            if opt.bboxfilt:
                print("-----------------BBOX")

                boxes = bbox_filter.get_boxes()
                #print("filt boxes",boxes)

                for box in boxes:
                    print("b", box)
                    xyxy, conf, cls = box
                    #     label = '%s %.2f' % (names[cls], conf)
                    #     print(label, conf)
                    #     plot_one_box(b, im0, label=label, color=colors[cls], line_thickness=3)

                    if int(cls) > len(OBj_SIZES):
                        sz = 1.0
                    else:
                        sz = OBj_SIZES[int(cls)]

                    eul, quat, dist = estimate_yaw_pitch_dist(
                        CAM_FOVH, sz, imw, imh, x1, y1, x2, y2)

                    if save_img or view_img:  # Add bbox to image
                        # with quaternion
                        # label = '%s %.2f %.1f %.2f %.2f %.2f %.2f' % (names[int(cls)], conf, dist, quat[0], quat[1], quat[2], quat[3])
                        label = '%s %.2f %.1f' % (names[int(cls)], conf, dist)

                        print(label)
                        print("xyxy", xyxy)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=2)

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

            if view_img:
                cv2.imshow(str(p), im0)
                k = cv2.waitKey(1)  # 1 millisecond

                if k == 27:  # ESC
                    quit = True

            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 11
0
def detect(save_img=True):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)

    elif source == 'own_camera':
        pass

    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    if source == 'own_camera':

        camera_matrix = np.array(
            [[3.50379164e+03, 0.00000000e+00, 7.96197449e+02],
             [0.00000000e+00, 3.49926605e+03, 6.94741115e+02],
             [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
        dist_coefs = np.array([
            -6.93155538e-01, 2.44136043e+00, -8.62122690e-03, 6.01043129e-03,
            -1.64531179e+01
        ])

        camera = pylon.InstantCamera(
            pylon.TlFactory.GetInstance().CreateFirstDevice())

        camera.Open()
        converter = pylon.ImageFormatConverter()
        # camera = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateFirstDevice())

        # ========== Grabing Continusely (video) with minimal delay ==========
        camera.StartGrabbing(pylon.GrabStrategy_LatestImageOnly)
        camera.ExposureTimeAbs = 80000

        # ========== converting to opencv bgr format ==========
        converter.OutputPixelFormat = pylon.PixelType_BGR8packed
        converter.OutputBitAlignment = pylon.OutputBitAlignment_MsbAligned

        pygame.init()
        # clock = pygame.time.Clock()
        display_width = 1920
        display_height = 1080
        screen = pygame.display.get_surface()
        screen = pygame.display.set_mode((display_width, display_height),
                                         pygame.FULLSCREEN)
        modes = pygame.display.list_modes()
        pygame.display.set_mode(max(modes))
        black = (0, 0, 0)
        white = (255, 255, 255)
        # screen.fill(black)  # show black image

        while camera.IsGrabbing():

            # for projector
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    crashed = True

                screen.fill(black)  # show black image

            grabResult = camera.RetrieveResult(
                5000, pylon.TimeoutHandling_ThrowException)

            if grabResult.GrabSucceeded():
                # image = converter.Convert(grabResult)
                image = converter.Convert(grabResult)
                img = image.GetArray()
                width = int(img.shape[1])
                height = int(img.shape[0])
                dim = (width, height)
                newcameramtx, roi = cv2.getOptimalNewCameraMatrix(
                    camera_matrix, dist_coefs, (width, height), 1,
                    (width, height))
                dst = cv2.undistort(img, camera_matrix, dist_coefs, None,
                                    newcameramtx)
                x, y, width, height = roi
                img0 = dst[y:y + height, x:x + width]

                frame = img0

                # frame = cv2.resize(frame, (int(frame.shape[1]/1.5),int(frame.shape[0]/1.5)))

            img0 = frame.copy()
            im0s = img0
            img_size = 640
            img, ratio, (dw, dh) = letterbox(img0, new_shape=img_size)
            img = img[:, :, ::-1].transpose(2, 0,
                                            1)  # BGR to RGB, to 3x416x416
            img = np.ascontiguousarray(img)
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]
            # Apply NMS
            pred = non_max_suppression(pred,
                                       opt.conf_thres,
                                       opt.iou_thres,
                                       classes=opt.classes,
                                       agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = Path('0'), '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = Path('0'), '', im0s

                save_path = str(save_dir / p.name)
                # txt_path = str(save_dir / 'labels' / p.stem) + (
                #     '_%g' % dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                gn = torch.tensor(img0.shape)[[1, 0, 1,
                                               0]]  # normalization gain whwh
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                              im0.shape).round()

                    # Print results
                    classes = []
                    for c in det[:, -1].unique():
                        n = (det[:, -1] == c).sum()  # detections per class
                        s += '%g %ss, ' % (n, names[int(c)])  # add to string
                        s1 = '%g' % (n)
                        classes.append([names[int(c)], s1])

                    df_cls_info = pd.DataFrame(classes,
                                               columns=['Class', 'number'])
                    df_cls_info.to_csv('class_num.csv')
                    print("\n number of object : ", len(det[:, -1]))
                    print(df_cls_info)
                    # Write results
                    box_info = []
                    box3D = {}
                    indice_3D = 0
                    for *xyxy, conf, cls in reversed(det):

                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            line = (cls, *xywh, conf) if opt.save_conf else (
                                cls, *xywh)  # label format
                            # with open(txt_path + '.txt', 'a') as f:
                            #     f.write(('%g ' * len(line)).rstrip() % line + '\n')

                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy,
                                         img0,
                                         label=label,
                                         color=colors[int(cls)],
                                         line_thickness=3)

                            c1, c2 = (int(xyxy[0]),
                                      int(xyxy[1])), (int(xyxy[2]),
                                                      int(xyxy[3]))
                            # print("name : ",names[int(cls)])
                            # print("opt",opt.classflash)
                            if names[int(cls)] == opt.classflash:

                                box3D[indice_3D] = (int(xyxy[0]), int(xyxy[1]),
                                                    int(xyxy[2]), int(xyxy[3]))
                            elif opt.classflash == "all":
                                box3D[indice_3D] = (int(xyxy[0]), int(xyxy[1]),
                                                    int(xyxy[2]), int(xyxy[3]))
                            indice_3D += 1
                            confident = '%g' % (conf)
                            box_info.append([label, c1, c2, confident])

                    # print(box3D)

                    if box3D != {}:
                        depth, box_min, dists = D435camera(box3D, img0)
                        # print(depth, box_min, dists)

                        # screen.fill(black)  # show black image
                        if dists != {}:
                            img = drawimage(box3D, dists,
                                            (200, 0, 0))  # set color
                            # _,img = cv2.threshold(img,0,255,cv2.THRESH_BINARY_INV)
                            imgpro = pygame.image.frombuffer(
                                img.tostring(), img.shape[1::-1], "RGB")
                            screen.blit(imgpro, (50, 50))
                        else:
                            screen.fill(black)
                    pygame.display.flip()
                    pygame.display.update()

                    # df_box = pd.DataFrame(box_info,columns=['Class','top left','bottom_right','confidence'])
                    # df_box.to_csv('box_info.csv')

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))
                im0 = cv2.resize(
                    im0, (int(im0.shape[1] / 1.5), int(im0.shape[0] / 1.5)))
                cv2.imshow("asd", im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    break

            if save_txt or save_img:
                print('Results saved to %s' % save_dir)

            print('Done. (%.3fs)' % (time.time() - t0))

    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow("asd", im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    # cv2.imwrite(save_path, im0)
                    pass
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 12
0
def detect(save_img=False):
    atama = 0
    flag1 = False
    flag2 = False
    flag3 = False


    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:        # her frame burda dönüyor
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh

            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                sonuc = Tespit()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):   # xyxy koordinatlar imiş
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)


                        # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        (plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3))
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()

                        ab = torch.tensor(xyxy).view(1,4)[0]
                        ab = ab.numpy()

                        isim = f'{names[int(cls)]}'
                        sonuc.label_list.append(isim)
                        sonuc.koordinat_list.append(ab)

                #sol cıkıs ve kalıp label bulma
                i = 0
                cıkıs_list = []
                while i < len(sonuc.label_list):
                    if('CIKIS' == sonuc.label_list[i]):
                        cıkıs_list.append(sonuc.koordinat_list[i])

                    if('KALIP' == sonuc.label_list[i]):
                        kalıp = sonuc.koordinat_list[i]

                    i += 1
                if(len(cıkıs_list) == 2):
                    if(cıkıs_list[0][0] < cıkıs_list[1][0]):
                        sol_cıkıs = cıkıs_list[0]
                    else:
                        sol_cıkıs = cıkıs_list[1]
                elif(len(cıkıs_list) == 1):
                    sol_cıkıs = cıkıs_list[0]
                else:
                    sol_cıkıs = [0,0,0,0]
                sol_alan = (sol_cıkıs[2] - sol_cıkıs[0]) * (sol_cıkıs[3] - sol_cıkıs[1])

                cv2.putText(im0, "SOL ALAN" + str(sol_alan), (800, 500), cv2.FONT_HERSHEY_SIMPLEX, 1,
                            (209, 80, 0, 255), 3)

                try:
                    x1 = kalıp[0]
                    x2 = kalıp[2]
                    y1 = kalıp[1]
                    y2 = kalıp[3]

                    print("x1 degeri : " , x1 , " x2 degeri : " , x2 , " y1 degeri : " , y1 , " y2 degeri : " , y2)
                    if ((640 < x1 < 675) and (855 < x2 < 874) and (290 < y1 < 305) and (835 < y2 < 855)) or flag1:
                        flag1 = True
                        print("butun kosullar saglandı")
                        cv2.putText(im0, "UYGUN KONUM" ,(200,200), cv2.FONT_HERSHEY_SIMPLEX, 1, (209,80, 0 ,255), 3)
                        print("sol kapak alanı : " , str(sol_alan))

                        if(2000 < sol_alan < 2400) or flag2:
                            # print("cıkmıs")
                            flag2 = True
                            if (900 < sol_alan < 1200) or flag3:
                                # print("en aşşa indi")

                                flag3 = True
                                if (2000 < sol_alan):
                                    if 'BOS' in sonuc.label_list:
                                        cv2.putText(im0, "SIKINTI YOK", (400, 400), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                                    (209, 80, 0, 255), 3)
                                        print("******************sıkıntı yok***********************")

                                    else:
                                        cv2.putText(im0, "KALIP DUSMEDI", (400, 400), cv2.FONT_HERSHEY_SIMPLEX, 1,
                                                    (209, 80, 0, 255), 3)
                                        print("bunu bi şekilde halletmemiz gerek")

                                    time.sleep(1)
                                    #print("flag yazdırdık: "+ str(flag1)+ " "+ str(flag2)+ " "+ str(flag3))

                    else:
                        print("kosullar saglanmadı")

                    if ((sol_cıkıs[0] > 800) and flag3):
                        flag1 = False
                        flag2 = False
                        flag3 = False

                except IndexError:
                    pass


            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')
            atama += (t2 - t1)
            print("toplam zaman", str(atama))


            # Stream results
            if True:        # önceden view_img idi, şimdi True oldu yani resimleri video gibi oynatma sağlandı
                cv2.imshow("Result", im0)   # farklı isim olursa ayrı pencerelerde açılır, aynı isimle aynı pencerede açar
                cv2.waitKey(1)  # 1 millisecond  - 0 girilir ise oynaması için imagein input bekler -- 1 kalması yeterli bizim için

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)
        #print("gecen zaman :" , str(t2-t1))
    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 13
0
def detect(opt):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    save_img = not opt.nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(opt.project) / opt.name,
                              exist_ok=opt.exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = opt.half and device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    names = model.module.names if hasattr(
        model, 'module') else model.names  # get class names
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        max_crop_img_size = 0
        max_crop_img = None
        max_crop_img_path = None
        max_mask_array = None
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   opt.classes,
                                   opt.agnostic_nms,
                                   max_det=opt.max_det)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image

            no_det = None
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            imc = im0.copy() if opt.save_crop else im0  # for opt.save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or opt.save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if opt.hide_labels else (
                            names[c]
                            if opt.hide_conf else f'{names[c]} {conf:.2f}')
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors(c, True),
                                     line_thickness=opt.line_thickness)
                        if opt.save_crop:

                            crop, path, mask = save_one_box(
                                xyxy,
                                imc,
                                file=save_dir / 'crops' / f'{p.stem}.JPEG',
                                BGR=True)

                    crop_x, crop_y, _ = crop.shape
                    crop_img_size = crop_x * crop_y
                    if (names[c] in opt.accepted_names) and (
                            crop_img_size > max_crop_img_size):
                        max_crop_img = crop
                        max_crop_img_path = path
                        max_crop_img_size = crop_img_size
                        max_mask_array = mask
                        no_det = False
                    else:
                        if no_det is None:
                            no_det = True

                if max_crop_img_path:
                    max_crop_img_path = max_crop_img_path.split(
                        '.'
                    )[0] + '.JPEG' if '.jpg' in max_crop_img_path else max_crop_img_path
                    cv2.imwrite(max_crop_img_path, max_crop_img)
                    mask_dir = '/'.join(max_crop_img_path.split('/')[:-1])
                    mask_filename = max_crop_img_path.split('/')[-1].split(
                        '.')[0]
                    mask_filepath = f'{mask_dir}/{mask_filename}.npy'
                    np.save(mask_filepath, mask, allow_pickle=False)
            else:
                if no_det is None:
                    no_det = True

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                    if no_det:
                        full_img_save_dir = '/'.join(
                            save_path.split('/')[:-1]) + '/crops/'
                        if not os.path.isdir(full_img_save_dir):
                            os.mkdir(full_img_save_dir)
                        save_path = '/'.join(
                            save_path.split('/')[:-1] + ['crops'] +
                            [save_path.split('/')[-1]])
                        cv2.imwrite(save_path, im0s)
                else:  # 'video' or 'stream'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 14
0
def detect(save_img=False):
    '''
    input: save_img_flag
    output(result):
    '''
    # 获取输出文件夹,输入路径,权重,参数等参数
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    # 获取设备
    device = select_device(opt.device)
    # 移除之前的输出文件夹,并新建输出文件夹
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    # 如果设备为gpu,使用Float16
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    # 加载Float32模型,确保用户设定的输入图片分辨率能整除最大步长s=32(如不能则调整为能整除并返回)
    '''
    model = Model(
                  (model): Sequential(
                                       (0): Focus(...)
                                       (1): Conv(...)
                                            ...
                                       (24): Detect(...)
                    )
    '''
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size

    # 设置Float16
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    # 通过不同的输入源来设置不同的数据加载方式
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    # 获取类别名字    names = ['person', 'bicycle', 'car',...,'toothbrush']
    names = model.module.names if hasattr(model, 'module') else model.names
    # 设置画框的颜色    colors = [[178, 63, 143], [25, 184, 176], [238, 152, 129],....,[235, 137, 120]]随机设置RGB颜色
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    # 进行一次前向推理,测试程序是否正常  向量维度(1,3,imgsz,imgsz)
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    """
        path 图片/视频路径  'E:\...\bus.jpg'
        img 进行resize+pad之后的图片   1*3*re_size1*resize2的张量 (3,img_height,img_weight)
        img0 原size图片   (img_height,img_weight,3)          
        cap 当读取图片时为None,读取视频时为视频源   
    """
    for path, img, im0s, vid_cap in dataset:
        print(img.shape)
        img = torch.from_numpy(img).to(device)
        # 图片也设置为Float16
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        # 没有batch_size的话则在最前面添加一个轴
        if img.ndimension() == 3:
            # (in_channels,size1,size2) to (1,in_channels,img_height,img_weight)
            img = img.unsqueeze(0)  # 在[0]维增加一个维度

        # Inference
        t1 = time_synchronized()
        """
        model:
        input: in_tensor (batch_size, 3, img_height, img_weight)
        output: 推理时返回 [z,x]
        z tensor: [small+medium+large_inference]  size=(batch_size, 3 * (small_size1*small_size2 + medium_size1*medium_size2 + large_size1*large_size2), nc)
        x list: [small_forward, medium_forward, large_forward]  eg:small_forward.size=( batch_size, 3种scale框, size1, size2, [xywh,score,num_classes]) 
        '''
               
        前向传播 返回pred[0]的shape是(1, num_boxes, nc)
        h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
        num_boxes = 3 * h/32 * w/32 + 3 * h/16 * w/16 + 3 * h/8 * w/8
        pred[0][..., 0:4] 预测框坐标为xywh(中心点+宽长)格式
        pred[0][..., 4]为objectness置信度
        pred[0][..., 5:5+nc]为分类结果
        pred[0][..., 5+nc:]为Θ分类结果
        """
        # pred : (batch_size, num_boxes, no)  batch_size=1
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        # 进行NMS
        # pred : list[tensor(batch_size, num_conf_nms, [xylsθ,conf,classid])] θ∈[0,179]
        #pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        pred = rotate_non_max_suppression(pred,
                                          opt.conf_thres,
                                          opt.iou_thres,
                                          classes=opt.classes,
                                          agnostic=opt.agnostic_nms,
                                          without_iouthres=False)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(
                pred
        ):  # i:image index  det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179]
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)  # 图片保存路径+图片名字
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            #print(txt_path)
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :5] = scale_labels(img.shape[2:], det[:, :5],
                                          im0.shape).round()

                # Print results    det:(num_nms_boxes, [xylsθ,conf,classid]) θ∈[0,179]
                for c in det[:, -1].unique(
                ):  # unique函数去除其中重复的元素,并按元素(类别)由大到小返回一个新的无元素重复的元组或者列表
                    n = (det[:, -1] == c
                         ).sum()  # detections per class  每个类别检测出来的素含量
                    s += '%g %ss, ' % (n, names[int(c)]
                                       )  # add to string 输出‘数量 类别,’

                # Write results  det:(num_nms_boxes, [xywhθ,conf,classid]) θ∈[0,179]
                for *rbox, conf, cls in reversed(
                        det):  # 翻转list的排列结果,改为类别由小到大的排列
                    # rbox=[tensor(x),tensor(y),tensor(w),tensor(h),tsneor(θ)] θ∈[0,179]
                    # if save_txt:  # Write to file
                    #     xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                    #     with open(txt_path + '.txt', 'a') as f:
                    #         f.write(('%g ' * 5 + '\n') % (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        classname = '%s' % names[int(cls)]
                        conf_str = '%.3f' % conf
                        rbox2txt(rbox, classname, conf_str,
                                 Path(p).stem,
                                 str(out + '/result_txt/result_before_merge'))
                        #plot_one_box(rbox, im0, label=label, color=colors[int(cls)], line_thickness=2)
                        plot_one_rotated_box(rbox,
                                             im0,
                                             label=label,
                                             color=colors[int(cls)],
                                             line_thickness=1,
                                             pi_format=False)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results 播放结果
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                    pass
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('   Results saved to %s' % Path(out))

    print('   All Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 15
0
def detect(weights='mdp/weights/weights.pt',
           source_address='http://192.168.15.1:8008',
           img_size=416,
           conf_thres=0.01,
           iou_thres=0.5,
           device='',
           classes=None,
           agnostic_nms=False,
           augment=False,
           update=False,
           scale_percent=50):
    source = source_address + '/stream.mjpg'
    # label_server = source_address + '/labels'

    predicted_label = None
    imgsz = img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(device)

    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    if webcam:
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once

    row_num = 0
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        pred = model(img, augment=augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes=classes,
                                   agnostic=agnostic_nms)

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].detach().unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in det:
                    predicted_label = names[int(cls)]
                    if predicted_label:
                        if not image_seen[predicted_label]:
                            label_id = label_id_mapping.get(predicted_label)
                            if conf < confidence_threshold(
                                    label_id
                            ):  # fine tune for up arrow (white)
                                break
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            good, text = check_bounding_box(
                                xywh, im0.shape[0], im0.shape[1])
                            if not good:
                                break

                            print(('%s ' * 5 + '\n') %
                                  (label_id, *xywh))  # label format
                            image_seen[predicted_label] = True

                            # determine image position
                            x, y, w, h = xywh
                            # r = requests.post(label_server, json={'label': label_id, 'x': x, 'y': y})  # send result to rpi
                            # print(r.text)

                            label = '%s %.2f' % (label_id, conf)
                            plot_one_box(xyxy,
                                         im0,
                                         label=label,
                                         color=colors[int(cls)],
                                         line_thickness=3)

                            #percent by which the image is resized
                            # scale_percent = 50
                            #calculate the 50 percent of original dimensions
                            width = int(im0.shape[1] * scale_percent / 100)
                            height = int(im0.shape[0] * scale_percent / 100)
                            # dsize
                            dsize = (width, height)
                            # resize image
                            im0 = cv2.resize(im0, dsize)

                            # detected_images.append(im0)
                            row_num = append_image(im0, row_num)

                            def vconcat_resize_min(
                                    im_list, interpolation=cv2.INTER_CUBIC):
                                w_min = min(im.shape[1] for im in im_list)
                                im_list_resize = [
                                    cv2.resize(im, (w_min,
                                                    int(im.shape[0] * w_min /
                                                        im.shape[1])),
                                               interpolation=interpolation)
                                    for im in im_list
                                ]
                                return cv2.vconcat(im_list_resize)

                            def hconcat_resize_min(
                                    im_list, interpolation=cv2.INTER_CUBIC):
                                h_min = min(im.shape[0] for im in im_list)
                                im_list_resize = [
                                    cv2.resize(im, (int(im.shape[1] * h_min /
                                                        im.shape[0]), h_min),
                                               interpolation=interpolation)
                                    for im in im_list
                                ]
                                return cv2.hconcat(im_list_resize)

                            def concat_tile_resize(
                                    im_list_2d, interpolation=cv2.INTER_CUBIC):
                                im_list_v = [
                                    hconcat_resize_min(
                                        im_list_h,
                                        interpolation=cv2.INTER_CUBIC)
                                    for im_list_h in im_list_2d
                                ]
                                return vconcat_resize_min(
                                    im_list_v, interpolation=cv2.INTER_CUBIC)

                            im_tile = concat_tile_resize(detected_images)

                            cv2.imshow('ImageWindow', im_tile)
                            cv2.imwrite('result.png', im_tile)
                            break
            if cv2.waitKey(1) == ord('q'):  # q to quit
                raise StopIteration
Exemplo n.º 16
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per img
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                handles_ymax = []
                #handles_xmid = []
                handles_ymid = []
                handle_mids = []

                tailgates_ymin = []
                tailgates_ymax = []
                tailgate_ythird_coord = []

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                det_sorted = sorted(det, key=lambda x: x[
                    -1])  # sort detected items by last index which is class

                # Write results
                for *xyxy, conf, cls in reversed(
                        det_sorted
                ):  #coords, confidence, classes.... reversed for some reason? But actually helpful since plate is cls 2

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if int(cls) == 2:  # license plate
                        license_width = abs(int(xyxy[2]) - int(xyxy[0]))
                        px_ratio = license_width / 12  # number of pixels per inch as license plates are 12"

                    if save_img or view_img:  # Add bbox to image
                        #label = '%s %.2f' % (names[int(cls)], conf) #confidence not needed
                        label = '%s ' % (names[int(cls)])
                        coord1, coord2, dim_label = plot_one_box(
                            xyxy,
                            im0,
                            label=label,
                            color=colors[int(cls)],
                            line_thickness=3,
                            px_ratio=px_ratio)

                        # get important points for line drawing
                        if int(cls) == 1:  #handle
                            ymax = max(coord1[1], coord2[1])
                            handles_ymax.append(ymax)

                            xmid = int((coord1[0] + coord2[0]) / 2)
                            ymid = int((coord1[1] + coord2[1]) / 2)
                            handle_mids.append([xmid, ymid])

                            handle_img = img[:, coord1[0]:coord2[0],
                                             coord1[1]:coord2[1]]

                        elif int(cls) == 0:  #tailgate
                            tailgate_xmin = min(coord1[0], coord2[0])

                            ymax = max(coord1[1], coord2[1])
                            tailgates_ymax.append(ymax)
                            ymin = min(coord1[1], coord2[1])
                            tailgates_ymin.append(ymin)
                            tailgate_ythird = int(
                                abs(coord1[1] - coord2[1]) / 3 + ymin)
                            tailgate_ythird_coord.append(
                                [tailgate_xmin, tailgate_ythird])

                            #tailgate_img = img[coord1[0]:coord2[0], coord1[1]:coord2[1]]
                            #

                # added ability to measure between bottom of handle and bottom of tailgate if handle in top 1/3
                for i, (handle_mid,
                        max_point) in enumerate(zip(handle_mids,
                                                    handles_ymax)):
                    hyps = [
                        hypotenuse(handle_mid, b)
                        for b in tailgate_ythird_coord
                    ]
                    closest_index = np.argmin(hyps)

                    if handle_mid[1] < tailgate_ythird_coord[closest_index][1]:
                        min_dist_tg = min(
                            [int(abs(max_point - x)) for x in tailgates_ymax])
                        start_point = (handle_mid[0], handles_ymax[i])
                        end_point = (handle_mid[0],
                                     handles_ymax[i] + min_dist_tg)
                        cv2.line(im0, start_point, end_point, (100, 100, 0), 4)
                        line_mid = int((start_point[1] + end_point[1]) / 2)
                        label = f'Distance: {((end_point[1] - start_point[1]) / px_ratio):.4f}"'
                        cv2.putText(im0,
                                    label, (start_point[0], line_mid),
                                    0,
                                    1, [0, 0, 0],
                                    thickness=2,
                                    lineType=cv2.LINE_AA)

                ### Previous ability to measure between bottom of handle and tailgate --- was not robust.
                ### Keeping until determined not needed
                # for i, (mid_point, max_point)  in enumerate(zip(handles_ymid, handles_ymax)):
                #     print(f'\nmidpoint: {mid_point}')
                #     min_y_dist = min([int(abs(mid_point - x)) for x in tailgate_ythird]) # gets min distance from handle midpoint to tailgate third
                #     print(f'min y dist: {min_y_dist}')
                #     print(f'tailgate third: {tailgate_ythird}')
                #     min_dist_third = min([x for x in tailgate_ythird if abs(x - min_y_dist) in handles_ymid])
                #     print(f'min_dist_third: {min_dist_third}')
                #     if mid_point < min_dist_third: #handle mid point in top 1/3 of truck
                #         min_dist_tg = min([int(abs(max_point - x)) for x in tailgates_ymax])
                #         print(f'min_dist_tg {min_dist_tg}')
                #         start_point = (handles_xmid[i], handles_ymax[i])
                #         print(f'start point: {start_point}')
                #         end_point = (handles_xmid[i], handles_ymax[i] + min_dist_tg)
                #         print(f'end point: {end_point}')
                #         cv2.line(im0, start_point, end_point, (100,100,0), 4)
                #         label = f'Distance: {min_dist_tg/300:.4f}"L'
                #         line_mid = int((start_point[1] + end_point[1])/2)
                #         cv2.putText(im0, label, (start_point[0], line_mid), 0, 1, [0, 0, 0],
                #                     thickness=2, lineType=cv2.LINE_AA)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return tailgate_img, handle_img
Exemplo n.º 17
0
def detect(source, opt=opt_, save_img=False):
    weights, view_img, save_txt, imgsz = opt["weights"], opt["view_img"], opt[
        "save_txt"], opt["img_size"]
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt["project"]) / opt["name"],
                       exist_ok=opt["exist_ok"]))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt["device"])
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    text_ret = ""
    im0 = None
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt["augment"])[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt["conf_thres"],
                                   opt["iou_thres"],
                                   classes=opt["classes"],
                                   agnostic=opt["agnostic_nms"])
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt["save_conf"] else (
                            cls, *xywh)  # label format
                        text_ret = ('%g ' * len(line)).rstrip() % line + '\n'
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(text_ret)

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % save_dir)

    print('Done. (%.3fs)' % (time.time() - t0))

    return im0, text_ret
Exemplo n.º 18
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    cnt = 0
    mat = [0, 0, 0, 0, 0]  # can pls gls trsh none

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    for path, img, im0s, vid_cap in dataset:
        if ARD.readable():  # readable을 통해 값을 받을 수 있으면
            cur = ARD.readline().decode().strip()  # Serial로 받은 한 줄을 읽고
            # 받은 값이 1이거나, cnt가 0이 아니면 카메라에 촬영된 한 프레임을 판별함.
            # cnt는 0으로 관리하다가 Serial로 1을 받으면 1씩 증가시키고 들어온 물건을 판별했다고 판단되면 다시 cnt를 0으로 만듬
            if cur == "1" or cnt != 0:

                cnt += 1

                img = torch.from_numpy(img).to(device)
                img = img.half() if half else img.float()  # uint8 to fp16/32
                img /= 255.0  # 0 - 255 to 0.0 - 1.0
                if img.ndimension() == 3:
                    img = img.unsqueeze(0)

                # Inference
                t1 = time_synchronized()
                pred = model(img, augment=opt.augment)[0]

                # Apply NMS
                pred = non_max_suppression(pred,
                                           opt.conf_thres,
                                           opt.iou_thres,
                                           classes=opt.classes,
                                           agnostic=opt.agnostic_nms)
                t2 = time_synchronized()

                # Apply Classifier
                if classify:
                    pred = apply_classifier(pred, modelc, img, im0s)

                # Process detections
                for i, det in enumerate(pred):  # detections per image
                    if webcam:  # batch_size >= 1
                        p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            # normalization gain whwh
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                # 한 객체가 여러개 판별된 경우 문자로는 한번만 출력하기 위해 unique사용
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string
                    # 여기서 c에 현재 프레임에 판별된 클래스가 들어있음
                    # 판별된 객체마다 mat리스트에 카운트해줌 0인덱스는 can 1인덱스는 pls 2인덱스는 gls 3인덱스는 trsh
                    mat[int(c)] += 1
            else:  # det이 None일 때 -> 4(none)인덱스에 1증가
                mat[4] += 1

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        # label format
                        line = (cls, *xywh, conf) if opt.save_conf else (cls,
                                                                         *xywh)
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    # 프레임에 라벨 씌우기(can 0.7)이런식으로
                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                    # Print time (inference + NMS)
                    print('%sDone. (%.3fs)' % (s, t2 - t1))

                    if cnt == 10:  # 10프레임의 결과를 가지고
                        cnt = 0
                        # can, pls, gls, (trsh + none) 중 젤 많이 나온 것을 serial로 보냄
                        max_mat = -1
                        max_value = 0
                        for i in range(3):
                            if mat[i] > max_value:
                                max_value = mat[i]
                                max_mat = i
                        if sum(mat[3:5]) > max_value:
                            max_value = sum(mat[3:5])
                            max_mat = 3  # trsh, none은 합쳐서 3으로 취급
                        print(mat)
                        for i in range(5):
                            mat[i] = 0
                        # 여기서 serial로 max_mat 전송. 0을 넘기면 아두이노에서 인식을 못함. 그래서 1을 더해서 넘겨주기로 함
                        ARD.write(str(max_mat + 1).encode())
                        print("print", max_mat + 1)

                    # 물건이 한번 들어왔을 때 10프레임이나 기다리는 건 비효율적이라 생각되어 0~3인덱스 중 하나라도 4번 이상 판별되면
                    # 해당 번호의 물건으로 분류된다고 확신하고 값을 출력시킴
                    elif mat[0] >= 4 or mat[1] >= 4 or mat[2] >= 4 or mat[
                            3] >= 4:
                        cnt = 0
                        max_mat = -1
                        max_value = 0
                        for i in range(4):
                            if mat[i] > max_value:
                                max_value = mat[i]
                                max_mat = i

                        for i in range(5):
                            mat[i] = 0

                        ARD.write(str(max_mat + 1).encode())
                        print("print", max_mat + 1)

                    # Stream results
                    if view_img:
                        cv2.imshow("video", im0)
                        if cv2.waitKey(1) == ord('q'):  # q to quit
                            raise StopIteration

                    # Save results (image with detections)
                    if save_img:
                        if dataset.mode == 'images':
                            cv2.imwrite(save_path, im0)
                        else:
                            if vid_path != save_path:  # new video
                                vid_path = save_path
                                if isinstance(vid_writer, cv2.VideoWriter):
                                    vid_writer.release(
                                    )  # release previous video writer

                                fourcc = 'mp4v'  # output video codec
                                fps = vid_cap.get(cv2.CAP_PROP_FPS)
                                w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                                h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                                vid_writer = cv2.VideoWriter(
                                    save_path, cv2.VideoWriter_fourcc(*fourcc),
                                    fps, (w, h))
                            vid_writer.write(im0)
        else:
            if view_img:
                cv2.imshow("video", im0s[0].copy())
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
Exemplo n.º 19
0
def detect(save_img=False):
    adet_sayisi = 0
    cikissüresi = 0
    p1 = resultss()
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                sınıf = []
                kordinat = []
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):

                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image

                        label = f'{names[int(cls)]} {conf:.2f}'
                        clas = f'{names[int(cls)]}'
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

                        maps = torch.tensor(xyxy).view(1, 4)[0]
                        konum = maps.numpy()
                        sınıf.append(clas)
                        kordinat.append(konum)
                    # maps=str(maps).strip("tensor")
                    # p1.deniz(clas, konum)

    ####################### ilk cikisi bulup kordinatlarini icin #########################
                ilkcikis = 0  # ilk cikisin ilk x degerini tutacak
                ilkcikisikincix = 0  # ilk cikisin ikinci x degerini tutacak

                if len(sınıf) == 4:
                    if (sınıf[0] == "CIKIS" and sınıf[2] == "CIKIS"):
                        if (kordinat[0][0] < kordinat[2][0]):
                            ilkcikis = kordinat[0][0]
                            ilkcikisikincix = kordinat[0][2]
                        else:
                            ilkcikis = kordinat[2][0]
                            ilkcikisikincix = kordinat[2][2]

                    elif (sınıf[1] == "CIKIS" and sınıf[2] == "CIKIS"):
                        if (kordinat[1][0] < kordinat[2][0]):
                            ilkcikis = kordinat[1][0]
                            ilkcikisikincix = kordinat[1][2]
                        else:
                            ilkcikis = kordinat[2][0]
                            ilkcikisikincix = kordinat[2][2]

                    elif (sınıf[0] == "CIKIS" and sınıf[1] == "CIKIS"):
                        if (kordinat[0][0] < kordinat[1][0]):
                            ilkcikis = kordinat[0][0]
                            ilkcikisikincix = kordinat[0][2]
                        else:
                            ilkcikis = kordinat[1][0]
                            ilkcikisikincix = kordinat[1][2]

                    elif (sınıf[0] == "CIKIS" and sınıf[3] == "CIKIS"):
                        if (kordinat[0][0] < kordinat[1][0]):
                            ilkcikis = kordinat[0][0]
                            ilkcikisikincix = kordinat[0][2]
                        else:
                            ilkcikis = kordinat[3][0]
                            ilkcikisikincix = kordinat[3][2]

                    elif (sınıf[1] == "CIKIS" and sınıf[3] == "CIKIS"):
                        if (kordinat[1][0] < kordinat[3][0]):
                            ilkcikis = kordinat[1][0]
                            ilkcikisikincix = kordinat[1][2]
                        else:
                            ilkcikis = kordinat[3][0]
                            ilkcikisikincix = kordinat[3][2]
                    elif (sınıf[2] == "CIKIS" and sınıf[3] == "CIKIS"):
                        if (kordinat[2][0] < kordinat[3][0]):
                            ilkcikis = kordinat[2][0]
                            ilkcikisikincix = kordinat[2][2]
                        else:
                            ilkcikis = kordinat[3][0]
                            ilkcikisikincix = kordinat[3][2]

                elif len(sınıf) == 3:
                    if (sınıf[0] == "CIKIS" and sınıf[2] == "CIKIS"):
                        if (kordinat[0][0] < kordinat[2][0]):
                            ilkcikis = kordinat[0][0]
                            ilkcikisikincix = kordinat[0][2]
                        else:
                            ilkcikis = kordinat[2][0]
                            ilkcikisikincix = kordinat[2][2]

                    elif (sınıf[1] == "CIKIS" and sınıf[2] == "CIKIS"):
                        if (kordinat[1][0] < kordinat[2][0]):
                            ilkcikis = kordinat[1][0]
                            ilkcikisikincix = kordinat[1][2]
                        else:
                            ilkcikis = kordinat[2][0]
                            ilkcikisikincix = kordinat[2][2]
                    elif (sınıf[0] == "CIKIS" and sınıf[1] == "CIKIS"):
                        if (kordinat[0][0] < kordinat[1][0]):
                            ilkcikis = kordinat[0][0]
                            ilkcikisikincix = kordinat[0][2]
                        else:
                            ilkcikis = kordinat[1][0]
                            ilkcikisikincix = kordinat[1][2]

                elif len(sınıf) == 2:
                    if (sınıf[0] == "CIKIS"):
                        ilkcikis = kordinat[0][0]
                        ilkcikisikincix = kordinat[0][2]
                    elif (sınıf[1] == "CIKIS"):
                        ilkcikis = kordinat[1][0]
                        ilkcikisikincix = kordinat[1][2]

                elif (len(sınıf) == 1 and sınıf[0] == "CIKIS"):
                    ilkcikis = kordinat[0][0]
                    ilkcikisikincix = kordinat[0][2]

    ###################düsüp düsmedigini kontrol et ###################

                if (
                        ilkcikis <= 668
                ):  #sistem en sola dayandiginda ilk cikisin 1.x degeri 667-668
                    cv2.putText(im0, "son noktaya dayali", (10, 700),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
                    if (ilkcikisikincix >= 703 and cikissüresi < 22
                        ):  #2.kez cikisindaki ortalama frame sayisi => 22
                        #cv2.putText(im0,"ilk kez cikti", (700,70),cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,255),3)
                        cikissüresi += 1  # ilk cikista kac frame boyunca dısarda onu hesaplamak icin => 13-20 frame
                        cv2.putText(im0, str(cikissüresi), (10, 400),
                                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                                    3)

                if (cikissüresi >= 22 and ilkcikisikincix >= 703
                        and ilkcikis <= 668
                    ):  # 2.cıkısta kalıbın düsüp düsmedigine bakılan yer
                    cikissüresi += 1
                    #cv2.putText(im0,"2. kez cikti", (650,70),cv2.FONT_HERSHEY_SIMPLEX, 2, (0,0,255),3)
                    cv2.putText(im0, str(cikissüresi), (10, 400),
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3)
                    if ("KALIP" in sınıf):
                        cv2.putText(im0, "kalip dusmemis", (10, 70),
                                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                                    3)
                    elif ("BOS" in sınıf):
                        cv2.putText(im0, "kalip dusmus", (10, 70),
                                    cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                                    3)
                if (
                        ilkcikis >= 800 and cikissüresi >= 24
                ):  #sistem geri dönerken=> adet sayısı hesapnıyor, cıkılı kaldıgı süre sifirlaniyor
                    cikissüresi = 0
                    adet_sayisi += 1
                    cv2.putText(im0, str(adet_sayisi), (10, 70),
                                cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3)

                cv2.putText(im0, str(ilkcikis), (800, 700),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                            3)  # ilk cikisin 1. x degerini görmek icin
                cv2.putText(im0, str(ilkcikisikincix), (800, 900),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255),
                            3)  # ilk cikisin 2. x degerini görmek icin

                print(sınıf)  #sınıf elemanlarını görmek icin

                sınıf.clear(
                )  # diger resme gecince tutulan sınıf ve kordinatlar silinsin diye
                kordinat.clear()

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if True:
                cv2.imshow("result", im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    time_ls = []
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        fps0 = time.time()
        print('\n\n', path)  # Image path
        print('\n\n', img.shape)  # After Resize
        print('\n\n', im0s.shape)  # Original Size
        print('\n\n', vid_cap)  # if video
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS : 取得每項預測的數值
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier : 取得該數值的LAbel
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections :
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))
            time_ls.append(t2 - t1)

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # PUT TEXT
            x, y, w, h = (im0.shape[1] // 4), 15, (im0.shape[1] // 2) + 180, 80
            cv2.rectangle(im0, (x, 10), (x + w, y + h), (0, 0, 0), -1)
            cv2.putText(
                im0, '{} | inference: {:.4f}s | fps: {:.4f}'.format(
                    opt.weights[0], t2 - t1,
                    1 / (time.time() - fps0)), (x + 20, y + 50),
                cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)

            # Save results (image with detections)
            # if save_img:
            #     if dataset.mode == 'images':
            #         cv2.imwrite(save_path, im0)
            #     else:
            #         if vid_path != save_path:  # new video
            #             vid_path = save_path
            #             if isinstance(vid_writer, cv2.VideoWriter):
            #                 vid_writer.release()  # release previous video writer

            #             fourcc = 'mp4v'  # output video codec
            #             fps = vid_cap.get(cv2.CAP_PROP_FPS)
            #             w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            #             h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            #             vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
            #         vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    def print_block(text):
        print('')
        print(text)

    print_block('INFO:')
    print_block("=" * 50)
    print_block(f'* Weights Size: {opt.weights[0][6].upper()}')
    if torch.cuda.is_available():
        print_block('* GPU: {} - {}MB '.format(
            torch.cuda.get_device_properties(0).name,
            torch.cuda.get_device_properties(0).total_memory / 1024**2))
    else:
        print_block('* GPU: Only CPU !!!')
    print_block('* Infer Time : {:.3f}s'.format(sum(time_ls) / len(time_ls)))
    print_block('* Total Cost. {:.3f}s \t '.format(time.time() - t0, ))
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    print('\n\n\n', view_img)
    print(save_img)
    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Find index corresponding to a person
    idx_person = names.index("person")

    # Deep SORT: initialize the tracker
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Deep SORT: person class only
                idxs_ppl = (
                    det[:, -1] == idx_person
                ).nonzero(as_tuple=False).squeeze(
                    dim=1)  # 1. List of indices with 'person' class detections
                dets_ppl = det[idxs_ppl, :
                               -1]  # 2. Torch.tensor with 'person' detections
                print('\n {} people were detected!'.format(len(idxs_ppl)))

                # Deep SORT: convert data into a proper format
                xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu")
                confs = dets_ppl[:, 4].to("cpu")

                # Deep SORT: feed detections to the tracker
                if len(dets_ppl) != 0:
                    trackers, features = deepsort.update(xywhs, confs, im0)
                    for d in trackers:
                        plot_one_box(d[:-1],
                                     im0,
                                     label='ID' + str(int(d[-1])),
                                     color=colors[1],
                                     line_thickness=1)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 22
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
        ('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # ***********************************************************************************************************************
        frena = 0
        # ***********************************************************************************************************************

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')                   
                    '''
                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
                    '''

                    # ***********************************************************************************************************************
                    
                    # Se determina si el cuadrado es relvante o no para el avance del coche

                    x_test2 = [
                                [int(cls), int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3])]
                              ]   
                    # Se pasan las imagenes por el modelo ya entrenado
                    y_pred2 = logmodel.predict(x_test2)
                    # Se mira en el resultado si se puede continuar o se debe frenar
                    
                    # Se recorre el array en busca de 1's, que indican que se debe frenar
                    for i in range(len(y_pred2)): 
                      if y_pred2[i] == 1:
                        frena = 1
                                                  
                    # Llamada a la funcion que pinta las cajas en las imagenes
                    if save_img or view_img:  # Add bbox to image
                        #label = '%s %.2f' % (names[int(cls)], conf)
                        label = f'{names[int(cls)]} {conf:.2f}'
                        #plot_one_box(xyxy, im0, path, label=label, color=colors[int(cls)], line_thickness=3)
                        plot_one_box(xyxy, im0, frena, label=label, color=colors[int(cls)], line_thickness=3)
                    
                  # ***********************************************************************************************************************

            # ***********************************************************************************************************************
            # Seleccion entre video e imagen
            video = 1
            # -----------------------------------------------------------------------------------------------------------------------
            if video == 0:
              # Se imprime por pantalla la orden final que debe realizar el vehiculo - Para imagenes de 416x416
              array_imagen = np.asarray(im0)
              # Opciones del texto
              posicion    = (100, 400)
              fuente      = cv2.FONT_HERSHEY_TRIPLEX   
              escala      = 1
              colorLetras = [225, 255, 255]
              grosor      = 2
              tipoLinea   = cv2.LINE_8 

              # Se mira que viene en frena - si es 1 debe parar
              if frena == 1:
                cv2.putText(array_imagen, 'Frena', posicion, fuente, escala, colorLetras, grosor, tipoLinea)
              else:
                cv2.putText(array_imagen, 'Adelante', posicion, fuente, escala, colorLetras, grosor, tipoLinea)
            # -----------------------------------------------------------------------------------------------------------------------
            else:
              # Se imprime por pantalla la orden final que debe realizar el vehiculo - Para el video
              array_imagen = np.asarray(im0)
              # Opciones del texto
              posicion    = (550, 650)
              fuente      = cv2.FONT_HERSHEY_TRIPLEX   
              escala      = 2
              colorLetras = [225, 255, 255]
              grosor      = 3
              tipoLinea   = cv2.LINE_8 

              # Se mira que viene en frena - si es 1 debe parar
              if frena == 1:
                cv2.putText(array_imagen, 'Frena', posicion, fuente, escala, colorLetras, grosor, tipoLinea)
              else:
                cv2.putText(array_imagen, 'Adelante', posicion, fuente, escala, colorLetras, grosor, tipoLinea)
            # ***********************************************************************************************************************

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1) # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release()  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 23
0
def detect(save_img=False,
           o_weights="weights/yolov5s.pt",
           o_source="inference/images",
           o_output="inference/output",
           o_img_size=640,
           o_conf_thres=0.4,
           o_iou_thres=0.5,
           o_fourcc="mp4v",
           o_device='',
           o_view_img=True,
           o_save_txt=False,
           o_classes=None,
           o_agnostic_nms=False,
           o_augment=False):
    p = ''
    c1 = (0, 0)
    c2 = (0, 0)
    label_no_value = ''
    detection_result_list = []

    out, source, weights, view_img, save_txt, imgsz = \
        o_output, o_source, o_weights, o_view_img, o_save_txt, o_img_size
    webcam = source.isnumeric() or source.startswith(
        'rtsp') or source.startswith('http') or source.endswith('.txt')

    # Initialize
    set_logging()
    device = select_device('cpu')
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        """
        前向传播 返回pred的shape是(1, num_boxes, 5+num_class)
        h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w
        num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8
        pred[..., 0:4]为预测框坐标
        预测框坐标为xywh(中心点+宽长)格式
        pred[..., 4]为objectness置信度
        pred[..., 5:-1]为分类结果
        """

        pred = model(img, augment=o_augment)[0]
        # print(pred[..., 5:-1])

        # Apply NMS
        pred = non_max_suppression(pred,
                                   o_conf_thres,
                                   o_iou_thres,
                                   classes=o_classes,
                                   agnostic=o_agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            save_path = str(Path(out) / Path(p).name)
            txt_path = str(Path(out) / Path(p).stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标
                # 此时坐标格式为xyxy
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                # for c in det[:, -1].unique():
                #     n = (det[:, -1] == c).sum()  # detections per class
                #     s += '%g %ss, ' % (n, names[int(c)])  # add to string
                #     print(n.numpy())
                #     detection_result_list.append(names[int(c)])

                # 写出结果
                for *xyxy, conf, cls in det:
                    label = '%s: %.2f' % (names[int(cls)], conf)
                    # 输出:label: car: 0.47
                    print("label: " + label)
                    label_no_value = '%s' % (names[int(cls)])
                    confidences_value = '%.2f' % conf
                    c1, c2 = plot_one_box(xyxy,
                                          im0,
                                          label=label,
                                          color=colors[int(cls)],
                                          line_thickness=3)
                    print(c1, c2, label_no_value)
                    if label_no_value == 'car' or label_no_value == 'truck':
                        print('检测到汽车!开始处理')
                        # car = Car()
                        # car.last_position = (c1, c2)
                        # car.stop_time = time.time()
                        # car_timing(car)

                        # 保存图片
                        img_name = str(time.time()).split('.')[0]
                        img_path = os.path.join(r'vehicle_imgs\cars',
                                                img_name) + '.jpg'
                        print("保存图片:", img_path)
                        frame_name = o_source
                        frame = cv2.imread(frame_name)
                        cv2.imwrite(img_path, frame)
                        detected_cars(time.time(), (c1, c2), img_path)
                    elif label_no_value == 'bicycle' or label_no_value == 'motocycle':
                        print('检测到电动车!开始计时!')

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * 5 + '\n') %
                                    (cls, *xywh))  # label format

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % Path(out))
        if platform.system() == 'Darwin' and not opt.update:  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
    return c1, c2, label_no_value
Exemplo n.º 24
0
def detect(opt, save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source == '0' or source.startswith('rtsp') or source.startswith(
        'http') or source.endswith('.txt')

    # initialize deepsort
    cfg = get_config()
    cfg.merge_from_file(opt.config_deepsort)
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)

    # Initialize
    device = select_device(opt.device)
    if os.path.exists(out):
        shutil.rmtree(out)  # delete output folder
    os.makedirs(out)  # make new output folder
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = torch.load(weights,
                       map_location=device)['model'].float()  # load to FP32
    model.to(device).eval()
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        view_img = True
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None

    save_path = str(Path(out))
    txt_path = str(Path(out)) + '/results.txt'

    for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset):
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = path, '', im0s

            s += '%gx%g ' % img.shape[2:]  # print string
            save_path = str(Path(out) / Path(p).name)

            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                bbox_xywh = []
                confs = []
                clses = []
                # Adapt detections to deep sort input format
                for *xyxy, conf, cls in det:
                    x_c, y_c, bbox_w, bbox_h = bbox_rel(*xyxy)
                    obj = [x_c, y_c, bbox_w, bbox_h]
                    bbox_xywh.append(obj)
                    confs.append([conf.item()])
                    clses.append([cls.item()])
                xywhs = torch.Tensor(bbox_xywh)
                confss = torch.Tensor(confs)
                clses = torch.Tensor(clses)
                outputs = deepsort.update(xywhs, confss, clses, im0)

                # draw boxes for visualization
                if len(outputs) > 0:
                    bbox_tlwh = []
                    bbox_xyxy = outputs[:, :4]
                    identities = outputs[:, 4]
                    clses = outputs[:, 5]
                    scores = outputs[:, 6]
                    stays = outputs[:, 7]
                    draw_boxes(im0, bbox_xyxy, [names[i] for i in clses],
                               scores, identities)

                # Write MOT compliant results to file
                if save_txt and len(outputs) != 0:
                    for j, output in enumerate(outputs):
                        bbox_left = output[0]
                        bbox_top = output[1]
                        bbox_w = output[2]
                        bbox_h = output[3]
                        identity = output[-1]
                        with open(txt_path, 'a') as f:
                            f.write(('%g ' * 10 + '\n') %
                                    (frame_idx, identity, bbox_left, bbox_top,
                                     bbox_w, bbox_h, -1, -1, -1,
                                     -1))  # label format

            else:
                deepsort.increment_ages()

            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(p, im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

            # Save results (image with detections)
            if save_img:
                print('saving img!')
                if dataset.mode == 'images':
                    cv2.imwrite(save_path, im0)
                else:
                    print('saving video!')
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*opt.fourcc),
                            fps, (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        print('Results saved to %s' % os.getcwd() + os.sep + out)
        if platform == 'darwin':  # MacOS
            os.system('open ' + save_path)

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 25
0
def object_detection(
        weights='yolov5/runs/train/exp/weights/best.pt',  # model.pt path(s)
        source='yolov5/hanssem/images/val',  # file/dir/URL/glob, 0 for webcam
        imgsz=[640],  # inference size (pixels)
        conf_thres=0.1,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device=None,  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        view_img=False,  # show results
        save_txt=False,  # save results to *.txt
        save_conf=False,  # save confidences in --save-txt labels
        save_crop=True,  # save cropped prediction boxes
        nosave=False,  # do not save images/videos
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        update=False,  # update all models
        project='yolov5/runs/detect',  # save results to project/name
        name='exp',  # save results to project/name
        exist_ok=False,  # existing project/name ok, do not increment
        line_thickness=3,  # bounding box thickness (pixels)
        hide_labels=False,  # hide labels
        hide_conf=False,  # hide confidences
        half=False,  # use FP16 half-precision inference
        tfl_int8=False,  # INT8 quantized TFLite model
        model=None,
        modelc=None):

    imgsz *= 2 if len(imgsz) == 1 else 1  # expand
    save_img = not nosave and not source.endswith(
        '.txt')  # save inference images
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(
            ('rtsp://', 'rtmp://', 'http://', 'https://'))

    # Directories
    save_dir = increment_path(Path(project) / name,
                              exist_ok=exist_ok)  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    # device = select_device(device)
    half &= device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    w = weights[0] if isinstance(weights, list) else weights
    classify, suffix = False, Path(w).suffix.lower()
    pt, onnx, tflite, pb, saved_model = (
        suffix == x for x in ['.pt', '.onnx', '.tflite', '.pb', ''])  # backend
    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
    if pt:
        #model = attempt_load(weights, map_location=device)  # load FP32 model

        stride = int(model.stride.max())  # model stride
        names = model.module.names if hasattr(
            model, 'module') else model.names  # get class names
        if half:
            model.half()  # to FP16
        if classify:  # second-stage classifier
            # modelc = load_classifier(name='resnet50', n=2)  # initialize
            modelc.load_state_dict(
                torch.load('resnet50.pt',
                           map_location=device)['model']).to(device).eval()
    elif onnx:
        check_requirements(('onnx', 'onnxruntime'))
        import onnxruntime
        session = onnxruntime.InferenceSession(w, None)
    else:  # TensorFlow models
        check_requirements(('tensorflow>=2.4.1', ))
        import tensorflow as tf
        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt

            def wrap_frozen_graph(gd, inputs, outputs):
                x = tf.compat.v1.wrap_function(
                    lambda: tf.compat.v1.import_graph_def(gd, name=""),
                    [])  # wrapped import
                return x.prune(
                    tf.nest.map_structure(x.graph.as_graph_element, inputs),
                    tf.nest.map_structure(x.graph.as_graph_element, outputs))

            graph_def = tf.Graph().as_graph_def()
            graph_def.ParseFromString(open(w, 'rb').read())
            frozen_func = wrap_frozen_graph(gd=graph_def,
                                            inputs="x:0",
                                            outputs="Identity:0")
        elif saved_model:
            model = tf.keras.models.load_model(w)
        elif tflite:
            interpreter = tf.lite.Interpreter(
                model_path=w)  # load TFLite model
            interpreter.allocate_tensors()  # allocate
            input_details = interpreter.get_input_details()  # inputs
            output_details = interpreter.get_output_details()  # outputs
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    # Dataloader
    if webcam:
        view_img = check_imshow()
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
        bs = len(dataset)  # batch_size
    else:
        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
        bs = 1  # batch_size
    vid_path, vid_writer = [None] * bs, [None] * bs

    # Run inference
    formatList = list()
    if pt and device.type != 'cpu':
        model(
            torch.zeros(1, 3, *imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        if onnx:
            img = img.astype('float32')
        else:
            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
        img = img / 255.0  # 0 - 255 to 0.0 - 1.0
        if len(img.shape) == 3:
            img = img[None]  # expand for batch dim

        # Inference
        t1 = time_sync()
        if pt:
            visualize = increment_path(save_dir / Path(path).stem,
                                       mkdir=True) if visualize else False
            pred = model(img, augment=augment, visualize=visualize)[0]
        elif onnx:
            pred = torch.tensor(
                session.run([session.get_outputs()[0].name],
                            {session.get_inputs()[0].name: img}))
        else:  # tensorflow model (tflite, pb, saved_model)
            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
            if pb:
                pred = frozen_func(x=tf.constant(imn)).numpy()
            elif saved_model:
                pred = model(imn, training=False).numpy()
            elif tflite:
                if tfl_int8:
                    scale, zero_point = input_details[0]['quantization']
                    imn = (imn / scale + zero_point).astype(np.uint8)
                interpreter.set_tensor(input_details[0]['index'], imn)
                interpreter.invoke()
                pred = interpreter.get_tensor(output_details[0]['index'])
                if tfl_int8:
                    scale, zero_point = output_details[0]['quantization']
                    pred = (pred.astype(np.float32) - zero_point) * scale
            pred[..., 0] *= imgsz[1]  # x
            pred[..., 1] *= imgsz[0]  # y
            pred[..., 2] *= imgsz[1]  # w
            pred[..., 3] *= imgsz[0]  # h
            pred = torch.tensor(pred)

        # NMS
        pred = non_max_suppression(pred,
                                   conf_thres,
                                   iou_thres,
                                   classes,
                                   agnostic_nms,
                                   max_det=max_det)
        t2 = time_sync()

        # Second-stage classifier (optional)
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process predictions
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s.copy(), getattr(
                    dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            save_crop = True
            imc = im0.copy() if save_crop else im0  # for save_crop
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh,
                                conf) if save_conf else (cls,
                                                         *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or save_crop or view_img:  # Add bbox to image
                        c = int(cls)  # integer class
                        label = None if hide_labels else (
                            names[c]
                            if hide_conf else f'{names[c]} {conf:.2f}')
                        im0 = plot_one_box(xyxy,
                                           im0,
                                           label=label,
                                           color=colors(c, True),
                                           line_width=line_thickness)
                        if save_crop:
                            #save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)
                            # print("names : ", names[c])
                            _, imagePath = save_one_box(
                                xyxy,
                                imc,
                                file=save_dir / 'croppedImages' /
                                f'{p.stem}_{names[c]}.jpg',
                                BGR=True)
                            format = save_one_json(xyxy,
                                                   imc,
                                                   tag=names[c],
                                                   imagePath=imagePath)
                            formatList.append(format)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':

                    cv2.imwrite(save_path, im0)
                else:  # 'video' or 'stream'
                    if vid_path[i] != save_path:  # new video
                        vid_path[i] = save_path
                        if isinstance(vid_writer[i], cv2.VideoWriter):
                            vid_writer[i].release(
                            )  # release previous video writer
                        if vid_cap:  # video
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        else:  # stream
                            fps, w, h = 30, im0.shape[1], im0.shape[0]
                            save_path += '.mp4'
                        vid_writer[i] = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps,
                            (w, h))
                    vid_writer[i].write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {colorstr('bold', save_dir)}{s}")

    if update:
        strip_optimizer(weights)  # update model (to fix SourceChangeWarning)

    print(f'Done. ({time.time() - t0:.3f}s)')
    return formatList
Exemplo n.º 26
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    _ = model(img.half() if half else img
              ) if device.type != 'cpu' else None  # run once
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
            else:
                p, s, im0 = Path(path), '', im0s

            save_path = str(save_dir / p.name)
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '_%g' % dataset.frame if dataset.mode == 'video' else '')
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += '%g %ss, ' % (n, names[int(c)])  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = '%s %.2f' % (names[int(cls)], conf)
                        plot_one_box(xyxy,
                                     im0,
                                     label=label,
                                     color=colors[int(cls)],
                                     line_thickness=3)
                        print(xyxy)
            # Print time (inference + NMS)
            print('%sDone. (%.3fs)' % (s, t2 - t1))

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration

    print('Done. (%.3fs)' % (time.time() - t0))
Exemplo n.º 27
0
def detect(weights: str, src: str):
    device = select_device('')
    half = device.type != 'cpu'

    # Load model
    model = attempt_load(weights, map_location=device)
    stride = int(model.stride.max())
    infer_size = check_img_size(640, s=stride)
    if half:
        model.half()

    cudnn.benchmark = True  # set True to speed up constant image size inference
    dataset = LoadStreams(src, img_size=infer_size, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    np.random.seed(2)
    colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, infer_size, infer_size).to(device).type_as(
                next(model.parameters())))

    for _, img, im0s, _ in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)
        pred = model(img)[0]
        # pred, features = model(img)
        # print(pred.shape)
        # print(features[0].shape)
        # print(features[1].shape)
        # print(features[2].shape)

        pred = non_max_suppression(prediction=pred,
                                   conf_thres=0.25,
                                   iou_thres=0.45,
                                   classes=None,
                                   agnostic=False)

        # 1batch
        im0 = im0s[0].copy()
        det = pred[0]

        det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
        for *xyxy, conf, cls in reversed(det):
            label = f'{names[int(cls)]} {conf:.2f}'
            plot_one_box(xyxy,
                         im0,
                         label=label,
                         color=colors[int(cls)],
                         line_thickness=2)

        cv2.imshow("img", im0)
        if cv2.waitKey(1) == ord("q"):
            cv2.destroyAllWindows()
            break
Exemplo n.º 28
0
def detect(save_img=False):
    out, source, weights, view_img, save_txt, imgsz = \
        opt.save_dir, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.startswith(
        ('rtsp://', 'rtmp://', 'http://')) or source.endswith('.txt')

    cnt = 0
    mat = [0, 0, 0, 0, 0]  # can pls gls trsh none

    # Initialize
    set_logging()
    device = select_device(opt.device)
    if os.path.exists(out):  # output dir
        shutil.rmtree(out)  # delete dir
    os.makedirs(out)  # make new dir
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(torch.load(
            'weights/resnet101.pt', map_location=device)['model'])  # load weights
        modelc.to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)]
              for _ in range(len(names))]

    # Run inference
    t0 = time.time()
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    # run once
    _ = model(img.half() if half else img) if device.type != 'cpu' else None
    for path, img, im0s, vid_cap in dataset:
        # 값을 받으면
        if ARD.readline().decode().strip() == "1" or cnt != 0:

            cnt += 1

            img = torch.from_numpy(img).to(device)
            img = img.half() if half else img.float()  # uint8 to fp16/32
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)

            # Inference
            t1 = time_synchronized()
            pred = model(img, augment=opt.augment)[0]

            # Apply NMS
            pred = non_max_suppression(
                pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
            t2 = time_synchronized()

            # Apply Classifier
            if classify:
                pred = apply_classifier(pred, modelc, img, im0s)

            # Process detections
            for i, det in enumerate(pred):  # detections per image
                if webcam:  # batch_size >= 1
                    p, s, im0 = path[i], '%g: ' % i, im0s[i].copy()
                else:
                    p, s, im0 = path, '', im0s

                save_path = str(Path(out) / Path(p).name)
                txt_path = str(Path(out) / Path(p).stem) + ('_%g' %
                                                            dataset.frame if dataset.mode == 'video' else '')
                s += '%gx%g ' % img.shape[2:]  # print string
                # normalization gain whwh
                gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
                if det is not None and len(det):
                    # Rescale boxes from img_size to im0 size
                    det[:, :4] = scale_coords(
                        img.shape[2:], det[:, :4], im0.shape).round()

                    # Print results
                    # det[:, -1]???? 무슨 의미지??
                    # 한 객체가 여러개 판별된 경우 문자로는 한번만 출력하기 위해 unique사용
                    for c in det[:, -1].unique():
                        # detections per class.
                        n = (det[:, -1] == c).sum()
                        # add to string
                        s += '%g %ss, ' % (n, names[int(c)])
                        # 여기서 c에 판별된 클래스가 들어있음
                        # 판별된 객체마다 카운트해줌 0 can 1 pls 2 gls
                        mat[int(c)] += 1
                else:  # det이 None일 때 -> trsh 1 증가
                    mat[4] += 1

                # Write results
                    for *xyxy, conf, cls in reversed(det):
                        if save_txt:  # Write to file
                            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                    gn).view(-1).tolist()  # normalized xywh
                            # label format
                            line = (cls, conf, *
                                    xywh) if opt.save_conf else (cls, *xywh)
                            with open(txt_path + '.txt', 'a') as f:
                                f.write(('%g ' * len(line) + '\n') % line)

                        # 프레임에 라벨 씌우기(can 0.7)이런식으로
                        if save_img or view_img:  # Add bbox to image
                            label = '%s %.2f' % (names[int(cls)], conf)
                            plot_one_box(xyxy, im0, label=label,
                                         color=colors[int(cls)], line_thickness=3)

                # Print time (inference + NMS)
                print('%sDone. (%.3fs)' % (s, t2 - t1))

                if cnt == 10:
                    cnt = 0
                    # can, pls, gls, (trsh + none) 중 젤 많이 나온 것을 serial로 보냄
                    # 나중에 값이 같은 경우도 고려해야 할듯
                    max_mat = -1
                    max_value = 0
                    # for i, m in enumerate(mat):
                    #     if m > max_value:
                    #         max_value = m
                    #         max_mat = i
                    for i in range(3):
                        if mat[i] > max_value:
                            max_value = mat[i]
                            max_mat = i
                    if sum(mat[3:5]) > max_value:
                        max_value = sum(mat[3:5])
                        max_mat = 3  # trsh, none은 합쳐서 3으로 취급
                    print(mat)
                    for i in range(5):
                        mat[i] = 0
                    # 여기서 serial로 max_mat 보내면 됨 0을 넘기면 아두이노에서 인식을 못함. 그래서 1을 더해서 넘겨주기로 함
                    ARD.write(str(max_mat+1).encode())
                    print("print", max_mat + 1)

                elif mat[0] >= 4 or mat[1] >= 4 or mat[2] >= 4 or mat[3] >= 4:
                    cnt = 0
                    max_mat = -1
                    max_value = 0
                    for i in range(4):
                        if mat[i] > max_value:
                            max_value = mat[i]
                            max_mat = i

                    for i in range(5):
                        mat[i] = 0

                    ARD.write(str(max_mat+1).encode())
                    print("print", max_mat + 1)

                # Stream results
                if view_img:
                    cv2.imshow("video", im0)
                    if cv2.waitKey(1) == ord('q'):  # q to quit
                        raise StopIteration

                # Save results (image with detections)
                if save_img:
                    if dataset.mode == 'images':
                        cv2.imwrite(save_path, im0)
                    else:
                        if vid_path != save_path:  # new video
                            vid_path = save_path
                            if isinstance(vid_writer, cv2.VideoWriter):
                                vid_writer.release()  # release previous video writer

                            fourcc = 'mp4v'  # output video codec
                            fps = vid_cap.get(cv2.CAP_PROP_FPS)
                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                            vid_writer = cv2.VideoWriter(
                                save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
                        vid_writer.write(im0)
        else:
            if view_img:
                cv2.imshow("video", im0s[0].copy())
                if cv2.waitKey(1) == ord('q'):  # q to quit
                    raise StopIteration
Exemplo n.º 29
0
def detect(save_img=False):
    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
    webcam = source.isnumeric() or source.endswith(
        '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://'))

    # Directories
    save_dir = Path(
        increment_path(Path(opt.project) / opt.name,
                       exist_ok=opt.exist_ok))  # increment run
    (save_dir / 'labels' if save_txt else save_dir).mkdir(
        parents=True, exist_ok=True)  # make dir

    # Initialize
    set_logging()
    device = select_device(opt.device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    stride = int(model.stride.max())  # model stride
    imgsz = check_img_size(imgsz, s=stride)  # check img_size
    if half:
        model.half()  # to FP16

    # Second-stage classifier
    classify = False
    if classify:
        modelc = load_classifier(name='resnet101', n=2)  # initialize
        modelc.load_state_dict(
            torch.load('weights/resnet101.pt',
                       map_location=device)['model']).to(device).eval()

    # Set Dataloader
    vid_path, vid_writer = None, None
    if webcam:
        view_img = True
        cudnn.benchmark = True  # set True to speed up constant image size inference
        dataset = LoadStreams(source, img_size=imgsz, stride=stride)
    else:
        save_img = True
        dataset = LoadImages(source, img_size=imgsz, stride=stride)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    if device.type != 'cpu':
        model(
            torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(
                next(model.parameters())))  # run once
    t0 = time.time()
    for path, img, im0s, vid_cap in dataset:
        img = torch.from_numpy(img).to(device)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        # Inference
        t1 = time_synchronized()
        pred = model(img, augment=opt.augment)[0]

        # Apply NMS
        pred = non_max_suppression(pred,
                                   opt.conf_thres,
                                   opt.iou_thres,
                                   classes=opt.classes,
                                   agnostic=opt.agnostic_nms)
        t2 = time_synchronized()

        # Apply Classifier
        if classify:
            pred = apply_classifier(pred, modelc, img, im0s)

        # Process detections
        for i, det in enumerate(pred):  # detections per image
            if webcam:  # batch_size >= 1
                p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(
                ), dataset.count
            else:
                p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0)

            p = Path(p)  # to Path
            save_path = str(save_dir / p.name)  # img.jpg
            txt_path = str(save_dir / 'labels' / p.stem) + (
                '' if dataset.mode == 'image' else f'_{frame}')  # img.txt
            s += '%gx%g ' % img.shape[2:]  # print string
            gn = torch.tensor(im0.shape)[[1, 0, 1,
                                          0]]  # normalization gain whwh
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          im0.shape).round()

                # Print results
                for c in det[:, -1].unique():
                    n = (det[:, -1] == c).sum()  # detections per class
                    s += f"{n} {names[int(c)]}{'s' * (n > 1)}, "  # add to string

                # Write results
                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) /
                                gn).view(-1).tolist()  # normalized xywh
                        line = (cls, *xywh, conf) if opt.save_conf else (
                            cls, *xywh)  # label format
                        with open(txt_path + '.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

                    if save_img or view_img:  # Add bbox to image
                        label = f'{names[int(cls)]} {conf:.2f}'
                        plot_one_box(xyxy,
                                     im0,
                                     color=colors[int(cls)],
                                     line_thickness=1)

            # Print time (inference + NMS)
            print(f'{s}Done. ({t2 - t1:.3f}s)')

            # Stream results
            if view_img:
                cv2.imshow(str(p), im0)
                cv2.waitKey(1)  # 1 millisecond

            # Save results (image with detections)
            if save_img:
                if dataset.mode == 'image':
                    cv2.imwrite(save_path, im0)
                else:  # 'video'
                    if vid_path != save_path:  # new video
                        vid_path = save_path
                        if isinstance(vid_writer, cv2.VideoWriter):
                            vid_writer.release(
                            )  # release previous video writer

                        fourcc = 'mp4v'  # output video codec
                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        vid_writer = cv2.VideoWriter(
                            save_path, cv2.VideoWriter_fourcc(*fourcc), fps,
                            (w, h))
                    vid_writer.write(im0)

    if save_txt or save_img:
        s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
        print(f"Results saved to {save_dir}{s}")

    print(f'Done. ({time.time() - t0:.3f}s)')
Exemplo n.º 30
0
from utils.datasets import LoadStreams
import cv2

dataset = LoadStreams('0')

for path, img, im0s, vid_cap in dataset:
    cv2.imshow('original', im0s[-1])
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cv2.destroyAllWindows()

del (dataset)