示例#1
0
    def detect(self, img_path):

        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        _, H, W = img.shape
        h_factor, w_factor = (H, W)
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape
        imgs = torch.stack([resize(img_, 416) for img_ in [img]]).to('cuda')

        with torch.no_grad():
            output = self.net(imgs)
            output = non_max_suppression(output, 0.5, 0.5)[0]
        output = rescale_boxes(output, 416, (H, W)).numpy()
        detections = []

        boxes = []
        confidences = []
        class_ids = []

        for x1, y1, x2, y2, conf, cls_conf, cls_pred in output:
            width = x2 - x1
            height = y2 - y1
            x = x1
            y = y1
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(cls_conf))
            class_ids.append(int(cls_pred))
            class_ = self.get_class(int(cls_pred))
            top_left = (int(x1), int(y1))
            bottom_right = (int(x2), int(y2))
            box_2d = [top_left, bottom_right]
            detections.append(Detection(box_2d, class_))

        return detections
    def predict(self,
                image,
                device='cuda',
                img_size=446,
                conf_thres=0.6,
                nms_thres=0.2,
                normalize=False,
                normalize_input=True):
        """
        Util method for getting preprocessed bboxes
        
        img - (W,H,3)
        
        returns - bboxes (N, 7)
        """

        if normalize_input:
            img = transforms.ToTensor()(image)
        else:
            img = torch.from_numpy(np.array(image)).float().permute(2, 0, 1)
        padded_img, pad = pad_to_square(img)
        warped_img = F.interpolate(padded_img.unsqueeze(0),
                                   size=img_size,
                                   mode="nearest").squeeze(0)

        if device:
            device = torch.device(device)
            self = self.to(device)
            warped_img = warped_img.to(device)

        result = self(warped_img.unsqueeze(0))
        outputs = non_max_suppression(result,
                                      conf_thres=conf_thres,
                                      nms_thres=nms_thres)

        pred_bboxes = outputs[0]

        if pred_bboxes is None:
            return []

        _, img_h, img_w = img.shape
        _, width, height = warped_img.shape

        # normalize back
        pred_bboxes[:, [0, 2]] /= width
        pred_bboxes[:, [1, 3]] /= height

        pred_bboxes[:, [0, 2]] -= pad[0] / max(img_w, img_h)
        pred_bboxes[:, [1, 3]] -= pad[2] / max(img_w, img_h)

        if not normalize:
            pred_bboxes[:, [0, 2]] *= max(img_w, img_h)
            pred_bboxes[:, [1, 3]] *= max(img_w, img_h)

        return pred_bboxes
def infer_video(model,
                video_path,
                save_path,
                iou_thres,
                conf_thres,
                nms_thres,
                img_size,
                batch_size=1):
    model.eval()
    if os.path.isfile(video_path):
        cap = cv2.VideoCapture(video_path)
    else:
        raise Exception('no such video')
    tp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
    stream = cv2.VideoWriter(os.path.join(save_path, 'out.mp4'),
                             cv2.VideoWriter_fourcc(*'MJPG'), 20.0,
                             (int(cap.get(3)), int(cap.get(4))))
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            print(frame.shape)
            img = transform(frame).to(device)
            print(img.shape)
            img, _ = pad_to_square(img, 0)
            print(img.shape)
            img = F.interpolate(img.unsqueeze(0), img_size,
                                mode='nearest').squeeze()
            print(img.shape)
            img = img.unsqueeze(0)
            with torch.no_grad():
                output = model(img)
                output = non_max_suppression(output,
                                             conf_thres=conf_thres,
                                             nms_thres=nms_thres)
                output = rescale_boxes(output[0], img_size, frame.shape[:2])
                #                print(output)
                # print(len(output[0][0]))
                img = vis_frame(frame, output)
                stream.write(img)
        else:
            break
    cap.release()
    stream.release()
示例#4
0
def Yolo_detect(model,
                camInputFrame,
                img_size=416,
                conf_thres=0.8,
                nms_thres=0.4):

    img = transforms.ToTensor()(Image.fromarray(camInputFrame))
    # Pad to square resolution
    img, _ = pad_to_square(img, 0)
    # Resize
    img = resize(img, img_size)
    img = img.unsqueeze(0)  #(1,3,416.419)

    input_imgs = img.cuda()
    with torch.no_grad():
        detections = model(input_imgs)
        detections = non_max_suppression(detections, conf_thres, nms_thres)

    if detections is not None:
        detections = detections[0]
        if detections is not None:
            detections = rescale_boxes(detections, img_size,
                                       camInputFrame.shape[:2])
    return detections
        ret, frame = input.read()
        nums += 1
        if not ret:
            break

        print("---------------读取第" + str(nums) + "帧")

        frame_start_t = time.time()
        # cv2 读取图片转换为 PIL 格式 转换为 Tensor
        # img = torchvision.transforms.ToTensor()(Image.open(img_path).convert(mode="RGB"))
        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img = torchvision.transforms.ToTensor()(frame_pil.convert(mode="RGB"))
        # NEW 创建一个可用来对其进行draw的对象
        draw = ImageDraw.Draw(frame_pil)

        input_imgs, _ = pad_to_square(img, 0)
        # Resize
        input_imgs = resize(input_imgs, opt.img_size).unsqueeze(0)

        # Configure input
        input_imgs = Variable(input_imgs.type(Tensor))
        tensor_t = time.time()
        print("转换为 Tensor 用时:" + str(time.time() - frame_start_t))

        # 开始检测
        with torch.no_grad():
            detections = model(input_imgs.to(device))
            detections = non_max_suppression(detections, opt.conf_thres,
                                             opt.nms_thres)[0]
        detect_t = time.time()
        print("进行物体检测用时:" + str(detect_t - tensor_t))
示例#6
0
                             20.0, (int(cap.get(3)), int(cap.get(4))) )
    
    trans = transforms.Compose([
                                    transforms.ToTensor(),
#                                    transforms.Normalize(mean=[0.486, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                ])
    plt.figure()
    plt.margins(0, 0)
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)
            img = trans(frame)

            img, _ = pad_to_square(img, 0)
            img = F.interpolate(img.unsqueeze(0), opt.img_size, mode='bilinear').squeeze()
            img = img.unsqueeze(0).to(device)
            with torch.no_grad():
                detections = model(img)
                detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
                # Bounding-box colors
                cmap = plt.get_cmap("tab20b")
                colors = [cmap(i) for i in np.linspace(0, 1, 20)]
                # Create plot
#                img = np.array(frame)
#                plt.figure()
                fig, ax = plt.subplots(1)
                ax.imshow(np.array(frame))

                # Draw bounding boxes and labels of detections
示例#7
0
    def test_dataloader_getitem(self):
        unloader = transforms.ToPILImage()

        self.normalized_labels = True
        # img_path =   'e:/ML_data/images/train2014/COCO_train2014_000000000092.jpg'
        # label_path = 'e:/ML_data/labels/train2014/COCO_train2014_000000000092.txt'

        img_path = 'E:/trafficlight_detect/light_img/IMG_20181124_125821.jpg'
        label_path = 'E:/trafficlight_detect/labels/IMG_20181124_125821.txt'

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        fig, ax = plt.subplots(1)
        image_orig = unloader(img)
        ax.imshow(image_orig)
        #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 ))
        plt.show()
        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        image_padded = unloader(img)
        fig, ax = plt.subplots(1)
        ax.imshow(image_padded)
        plt.show()

        boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
        x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
        y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
        x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
        y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)

        fig, ax = plt.subplots(1)
        ax.imshow(image_orig)
        #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 ))
        for box_i, (xx1, yy1, xx2, yy2) in enumerate(zip(x1, y1, x2, y2)):
            ax.add_patch(
                patches.Rectangle(xy=(xx1.item(), yy1.item()),
                                  width=(xx2 - xx1).item(),
                                  height=(yy2 - yy1).item(),
                                  linewidth=1))
        plt.show()

        # Adjust for added padding
        x1 += pad[0]
        y1 += pad[2]
        x2 += pad[1]
        y2 += pad[3]

        fig, ax = plt.subplots(1)
        ax.imshow(image_padded)
        #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 ))
        for box_i, (xx1, yy1, xx2, yy2) in enumerate(zip(x1, y1, x2, y2)):
            ax.add_patch(
                patches.Rectangle(xy=(xx1.item(), yy1.item()),
                                  width=(xx2 - xx1).item(),
                                  height=(yy2 - yy1).item(),
                                  linewidth=1))
        plt.show()

        boxes[:, 1] = ((x1 + x2) / 2) / padded_w
        boxes[:, 2] = ((y1 + y2) / 2) / padded_h
        boxes[:, 3] = boxes[:, 3] * w_factor / padded_w
        boxes[:, 4] = boxes[:, 4] * h_factor / padded_h
        fig, ax = plt.subplots(1)
        ax.imshow(image_padded)
        #ax.add_patch( patches.Rectangle( xy=( 225, 131), width=75, height=50, linewidth=1 ))
        for box_i in range(len(boxes)):
            x = boxes[box_i, 1] * padded_w
            y = boxes[box_i, 2] * padded_h
            width = boxes[box_i, 3] * padded_w
            height = boxes[box_i, 3] * padded_h
            ax.add_patch(
                patches.Circle(xy=(x.item(), y.item()), radius=10,
                               linewidth=1))
        plt.show()
示例#8
0
def YOLO():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_def", type=str, default="config/yolov3.cfg", help="path to model definition file")
    parser.add_argument("--weights_path", type=str, default="weights/yolov3.weights", help="path to weights file")
    parser.add_argument("--class_path", type=str, default="data/coco.names", help="path to class label file")
    parser.add_argument("--conf_thres", type=float, default=0.8, help="object confidence threshold")
    parser.add_argument("--nms_thres", type=float, default=0.2, help="iou thresshold for non-maximum suppression")
    parser.add_argument("--batch_size", type=int, default=1, help="size of the batches")
    parser.add_argument("--img_size", type=int, default=416, help="size of each image dimension")
    parser.add_argument("--video", type=str, required=True, help="input video")
    parser.add_argument("--display", action="store_true", default=False)
    parser.add_argument("--output", default="./output", help="output dir")
    opt = parser.parse_args()
    print(opt)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = Darknet(opt.model_def, img_size=opt.img_size).to(device)

    if opt.weights_path.endswith(".weights"):
        # Load darknet weights
        model.load_darknet_weights(opt.weights_path)
    else:
        # Load checkpoint weights
        model.load_state_dict(torch.load(opt.weights_path))

    model.eval()  # Set in evaluation mode

    classes = load_classes(opt.class_path)  # Extracts class labels from file

    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor

    #cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture(opt.video)
    cap.set(3, 1280)
    cap.set(4, 720)
    # out = cv2.VideoWriter(
    #     "output.avi", cv2.VideoWriter_fourcc(*"MJPG"), 10.0,
    #     (darknet.network_width(netMain), darknet.network_height(netMain)))
    print("Starting the YOLO loop...")

    while True:
        try:
            prev_time = time.time()
            ret, frame_read = cap.read()

            frame = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)

            # Extract image as PyTorch tensor
            img = transforms.ToTensor()(frame)

            # Pad to square resolution
            img, _ = pad_to_square(img, 0)
            # Resize
            img = resize(img, opt.img_size)
            img = img.unsqueeze(0)
            # Configure input
            input_imgs = nn.Variable(img.type(Tensor))
            # Get detections
            with torch.no_grad():
                detections = model(input_imgs)
                detections = non_max_suppression(detections, opt.conf_thres, opt.nms_thres)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

            detections = list(filter(lambda x: x is not None, detections))
            if detections is not None and len(detections) > 0:
                # Rescale boxes to original image
                detections = rescale_boxes(detections[0], opt.img_size, frame.shape[:2])
                frame = cvDrawBoxes(frame, detections, classes)
                current_time = datetime.datetime.now()
                if int(time.time()*10) % 10 == 0:
                    str_date = datetime.datetime.strftime(current_time, "%Y%m%d")
                    str_time = datetime.datetime.strftime(current_time, "%Y%m%d%H%M%S")
                    os.makedirs(os.path.join(opt.output, str_date), exist_ok=True)
                    cv2.imwrite(os.path.join(opt.output, str_date, str_time + ".jpg"), frame)
            # print(1/(time.time()-prev_time))
            if opt.display:
                cv2.imshow('Demo', frame)
                cv2.waitKey(3)
        except Exception as e:
            print("fail to detect", e)
    cap.release()