예제 #1
0
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3):
    """
    表格检测
    img:GBR
    
    """
    scale = sc[0]
    img_height, img_width = img.shape[:2]
    inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale))
    inputBlob = cv2.dnn.blobFromImage(inputBlob,
                                      scalefactor=1.0,
                                      size=(scale, scale),
                                      swapRB=True,
                                      crop=False)
    tableDetectNet.setInput(inputBlob / 255.0)
    outputName = tableDetectNet.getUnconnectedOutLayersNames()
    outputs = tableDetectNet.forward(outputName)
    class_ids = []
    confidences = []
    boxes = []
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > thresh:
                center_x = int(detection[0] * scale / fx)
                center_y = int(detection[1] * scale / fy)
                width = int(detection[2] * scale / fx)
                height = int(detection[3] * scale / fy)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                if class_id == 1:
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    xmin, ymin, xmax, ymax = left, top, left + width, top + height
                    xmin = max(xmin, 1)
                    ymin = max(ymin, 1)
                    xmax = min(xmax, img_width - 1)
                    ymax = min(ymax, img_height - 1)
                    boxes.append([xmin, ymin, xmax, ymax])

    boxes = np.array(boxes)

    confidences = np.array(confidences)
    if len(boxes) > 0:
        boxes, confidences = nms_box(boxes,
                                     confidences,
                                     score_threshold=thresh,
                                     nms_threshold=NMSthresh)

    boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img)
    return boxes, adBoxes, confidences
예제 #2
0
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3):
    """
    表格检测
    :param img: GBR, 要检测的图片
    :param sc: 预处理后图像的目标尺寸,一般有几个建议的值
    :param thresh: 置信度阈值,大于此置信度的才保留
    :param NMSthresh: 极大值抑制阈值
    :return:
    """
    scale = sc[0]
    #获取img的前2位,图片的高度和宽度
    img_height, img_width = img.shape[:2]
    # 输入的Blob bbox, 新的宽度和原宽度的比值, 新的高度和原高度的比值
    inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale))
    # 对输入图像进行预处理,均值,缩放,通道交互[H,W,C]-->[B,C,H,W]
    inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False);
    #设置模型的输入
    tableDetectNet.setInput(inputBlob / 255.0)
    # 返回没有连接的输出的layer的名字,
    outputName = tableDetectNet.getUnconnectedOutLayersNames()
    # 运行前向计算,计算OutputName的layers的输出, outputs输出结果的列表
    outputs = tableDetectNet.forward(outputName)
    #存放类别id,置信度,bbox
    class_ids = []
    confidences = []
    boxes = []
    #对于多个输出结果过滤
    for output in outputs:
        #处理每个结果, detection输出格式是[centerx,centery,w,h,xxxx, class1_confidence, class2_confidence]
        # centerx 是bbox中心点坐标,w,h是bbox的宽和高
        for detection in output:
            #第5个和第6个是对每个类别的预测的置信度
            scores = detection[5:]
            #置信度最大的index是对应的是类别id
            class_id = np.argmax(scores)
            #获取对应的置信度
            confidence = scores[class_id]
            #检查置信度是否大于阈值
            if confidence > thresh:
                #还原到原图像的x,y,w,h
                center_x = int(detection[0] * scale / fx)
                center_y = int(detection[1] * scale / fy)
                width = int(detection[2] * scale / fx)
                height = int(detection[3] * scale / fy)
                #bbox左顶点(x,y),这里用left是x,top是y
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                # 如果类别id是1
                if class_id == 1:
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    #计算bbox左上角和右下角的点的坐标
                    xmin, ymin, xmax, ymax = left, top, left + width, top + height
                    xmin = max(xmin, 1)
                    ymin = max(ymin, 1)
                    xmax = min(xmax, img_width - 1)
                    ymax = min(ymax, img_height - 1)
                    boxes.append([xmin, ymin, xmax, ymax])
    #bboxes的列表
    boxes = np.array(boxes)
    #对应的confidences列表
    confidences = np.array(confidences)
    #NMS非极大值抑制过滤bbox
    if len(boxes) > 0:
        boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh)

    boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img)
    return boxes, adBoxes, confidences