def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3): """ 表格检测 img:GBR """ scale = sc[0] img_height, img_width = img.shape[:2] inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale)) inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False) tableDetectNet.setInput(inputBlob / 255.0) outputName = tableDetectNet.getUnconnectedOutLayersNames() outputs = tableDetectNet.forward(outputName) class_ids = [] confidences = [] boxes = [] for output in outputs: for detection in output: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > thresh: center_x = int(detection[0] * scale / fx) center_y = int(detection[1] * scale / fy) width = int(detection[2] * scale / fx) height = int(detection[3] * scale / fy) left = int(center_x - width / 2) top = int(center_y - height / 2) if class_id == 1: class_ids.append(class_id) confidences.append(float(confidence)) xmin, ymin, xmax, ymax = left, top, left + width, top + height xmin = max(xmin, 1) ymin = max(ymin, 1) xmax = min(xmax, img_width - 1) ymax = min(ymax, img_height - 1) boxes.append([xmin, ymin, xmax, ymax]) boxes = np.array(boxes) confidences = np.array(confidences) if len(boxes) > 0: boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh) boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img) return boxes, adBoxes, confidences
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3): """ 表格检测 :param img: GBR, 要检测的图片 :param sc: 预处理后图像的目标尺寸,一般有几个建议的值 :param thresh: 置信度阈值,大于此置信度的才保留 :param NMSthresh: 极大值抑制阈值 :return: """ scale = sc[0] #获取img的前2位,图片的高度和宽度 img_height, img_width = img.shape[:2] # 输入的Blob bbox, 新的宽度和原宽度的比值, 新的高度和原高度的比值 inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale)) # 对输入图像进行预处理,均值,缩放,通道交互[H,W,C]-->[B,C,H,W] inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False); #设置模型的输入 tableDetectNet.setInput(inputBlob / 255.0) # 返回没有连接的输出的layer的名字, outputName = tableDetectNet.getUnconnectedOutLayersNames() # 运行前向计算,计算OutputName的layers的输出, outputs输出结果的列表 outputs = tableDetectNet.forward(outputName) #存放类别id,置信度,bbox class_ids = [] confidences = [] boxes = [] #对于多个输出结果过滤 for output in outputs: #处理每个结果, detection输出格式是[centerx,centery,w,h,xxxx, class1_confidence, class2_confidence] # centerx 是bbox中心点坐标,w,h是bbox的宽和高 for detection in output: #第5个和第6个是对每个类别的预测的置信度 scores = detection[5:] #置信度最大的index是对应的是类别id class_id = np.argmax(scores) #获取对应的置信度 confidence = scores[class_id] #检查置信度是否大于阈值 if confidence > thresh: #还原到原图像的x,y,w,h center_x = int(detection[0] * scale / fx) center_y = int(detection[1] * scale / fy) width = int(detection[2] * scale / fx) height = int(detection[3] * scale / fy) #bbox左顶点(x,y),这里用left是x,top是y left = int(center_x - width / 2) top = int(center_y - height / 2) # 如果类别id是1 if class_id == 1: class_ids.append(class_id) confidences.append(float(confidence)) #计算bbox左上角和右下角的点的坐标 xmin, ymin, xmax, ymax = left, top, left + width, top + height xmin = max(xmin, 1) ymin = max(ymin, 1) xmax = min(xmax, img_width - 1) ymax = min(ymax, img_height - 1) boxes.append([xmin, ymin, xmax, ymax]) #bboxes的列表 boxes = np.array(boxes) #对应的confidences列表 confidences = np.array(confidences) #NMS非极大值抑制过滤bbox if len(boxes) > 0: boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh) boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img) return boxes, adBoxes, confidences