def ocr_image_region(image: np.ndarray, region: TextRegion) -> Tuple[str, float]: # ------------------------- # deproject image rect = region[0].vertices warped = perspective.four_point_transform(image, rect) # ------------------------- # get text from image ocr_image = warped ocr_image = cv.cvtColor(ocr_image, cv.COLOR_RGB2GRAY) img_w = ocr_image.shape[1] img_h = ocr_image.shape[0] tess_rect = Rect(0, 0, img_w, img_h) img_bpp = 1 img_bpl = int(img_bpp * img_w) text, textconf = get_blob_ocr_result( (ocr_image.tobytes(), int(img_w), int(img_h), img_bpp, img_bpl), tess_rect) # do inverted 2nd pass and pick better result ocr_inv = cv.bitwise_not(ocr_image) text1, textconf1 = get_blob_ocr_result( (ocr_inv.tobytes(), int(img_w), int(img_h), img_bpp, img_bpl), tess_rect) if textconf1[0] > textconf[0]: textconf = textconf1 text = text1 return (text, textconf[0])
def find_rects(image: np.ndarray) -> List[np.ndarray]: """Detects rectangular contours in an image @returns list containing arrays of rect corners """ gray = cv.cvtColor(image.copy(), cv.COLOR_RGB2GRAY) gray = cv.GaussianBlur(gray, (5, 5), 0) #edged = cv.Canny(gray, 70, 180) # this numbers is hand picked guess from a few photos edged = auto_canny(gray) contours = cv.findContours(edged.copy(), cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) contours = contours[0] if len(contours) == 2 else contours[1] contours.sort(key=cv.contourArea, reverse=True) contours = [cnt for cnt in contours if cv.contourArea(cnt) > 15 ] # 15px contour area, basically cnt>=4x4 rects = list(map(cv.minAreaRect, contours)) boxes = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), rects)) boxes = Rect.nms_merge(boxes) return boxes or list()
def detect_objects(image: np.ndarray, nnconfig: ObjDetectNetConfig, threshold: float = 0.2) -> List[DetectedObject]: """Detects all objects in an image Accepts opencv compatible image and single tuple with NN weight and description file paths Returns array of all objects as tuple containing class id, confidence value and rect tuple """ image_height, image_width, _ = image.shape net = cv.dnn.readNetFromTensorflow(nnconfig.model, nnconfig.config) net.setInput( cv.dnn.blobFromImage(image, 1, (300, 300), (127.5, 127.5, 127.5), swapRB=True, crop=False)) output = net.forward() results = [] for detection in output[0, 0, :, :]: confidence = float(detection[2]) if confidence < threshold: continue class_id = int(detection[1]) # look only for people #if class_id != 1: # continue #print('{} {:.3f} {}'.format(class_id , confidence, classname)) left = detection[3] * image_width top = detection[4] * image_height right = detection[5] * image_width bottom = detection[6] * image_height results.append( DetectedObject(class_id, confidence, Rect(left, top, right, bottom))) return results
def detect( self, image: np.ndarray, config: TextDetectorConfig = TextDetectorConfig() ) -> List[Tuple[TextRegion, float]]: results = [] # Get frame height and width inpWidth = adjust_dimension(image.shape[1]) inpHeight = adjust_dimension(image.shape[0]) # keep for now, can be used to remap region in the future rW = inpWidth / float(inpWidth) rH = inpHeight / float(inpHeight) net = self._makenet() confThreshold = config.confThreshold nmsThreshold = config.nmsThreshold outNames = self._outputs # Create a 4D blob from frame. blob = cv.dnn.blobFromImage(image, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) # Run the model net.setInput(blob) outs = net.forward(outNames) t, _ = net.getPerfProfile() # Get scores and geometry scores = outs[0] geometry = outs[1] [boxes, confidences] = decode(scores, geometry, confThreshold) # Apply NMS #im = image.copy() indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect vertices = cv.boxPoints(boxes[i[0]]) #cv.drawContours(im, [np.int0(vertices)], -1, (0, 255, 0), 2) #cv.imshow(WINDOW_LABEL, im) #cv.waitKey() # scale the bounding box coordinates based on the respective ratios for j in range(4): vertices[j][0] *= rW vertices[j][1] *= rH #for j in range(4): # p1 = (vertices[j][0], vertices[j][1]) # p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) # #cv.line(frame, p1, p2, (0, 255, 0), 1) xmin = vertices[0][0] xmax = 0 ymin = vertices[0][1] ymax = 0 for p in vertices: xmin = int(min(xmin, p[0])) ymin = int(min(ymin, p[1])) xmax = int(max(xmax, p[0])) ymax = int(max(ymax, p[1])) results.append((TextRegion(vertices, Rect(xmin, ymin, xmax, ymax)), confidences[i[0]])) return results
def is_rects_overlap(a: Rect, b: Rect) -> bool: return a.overlaps(b)
def detect_text_areas(image, model, *args, **kwargs) -> List[Tuple[TextRegion, float]]: results = [] # Get frame height and width sourceSize = kwargs['sourceSize'] width_ = sourceSize[0] height_ = sourceSize[1] inpWidth = image.shape[1] inpHeight = image.shape[0] rW = width_ / float(inpWidth) rH = height_ / float(inpHeight) if not kwargs.get('scale', True): rW = 1.0 rH = 1.0 net = model confThreshold = kwargs['confThreshold'] nmsThreshold = kwargs['nmsThreshold'] outNames = kwargs['outNames'] # Create a 4D blob from frame. blob = cv.dnn.blobFromImage(image, 1.0, (inpWidth, inpHeight), (123.68, 116.78, 103.94), True, False) # Run the model net.setInput(blob) outs = net.forward(outNames) t, _ = net.getPerfProfile() # Get scores and geometry scores = outs[0] geometry = outs[1] [boxes, confidences] = decode(scores, geometry, confThreshold) # Apply NMS #im = image.copy() indices = cv.dnn.NMSBoxesRotated(boxes, confidences, confThreshold, nmsThreshold) for i in indices: # get 4 corners of the rotated rect vertices = cv.boxPoints(boxes[i[0]]) #cv.drawContours(im, [np.int0(vertices)], -1, (0, 255, 0), 2) #cv.imshow(WINDOW_LABEL, im) #cv.waitKey() # scale the bounding box coordinates based on the respective ratios for j in range(4): vertices[j][0] *= rW vertices[j][1] *= rH #for j in range(4): # p1 = (vertices[j][0], vertices[j][1]) # p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1]) # #cv.line(frame, p1, p2, (0, 255, 0), 1) xmin = vertices[0][0] xmax = 0 ymin = vertices[0][1] ymax = 0 for p in vertices: xmin = int(min(xmin, p[0])) ymin = int(min(ymin, p[1])) xmax = int(max(xmax, p[0])) ymax = int(max(ymax, p[1])) results.append((TextRegion(vertices, Rect(xmin, ymin, xmax, ymax)), confidences[i[0]])) return results
def find_rects_white(image: np.ndarray) -> List[np.ndarray]: """Detects rectangular contours in an image @returns list containing arrays of rect corners """ raise NotImplementedError() #gray = norm_color_test(image.copy()) gray = cv.cvtColor(image.copy(), cv.COLOR_RGB2GRAY) gray = cv.GaussianBlur(gray, (5, 5), 0) #edged = cv.Canny(gray, 70, 180) # this numbers is hand picked guess from a few photos edged = auto_canny( gray) # this numbers is hand picked guess from a few photos # split to HSV, then pick up rouhly any white color zeroing out the rest hsv = cv.cvtColor(image.copy(), cv.COLOR_RGB2HSV) h, s, v = cv.split(hsv) h[h < 145] = 0 h[h > 165] = 0 #h = cv.GaussianBlur(h, (5, 5), 0) normed = cv.normalize(h, None, 0, 255, cv.NORM_MINMAX, cv.CV_8UC1) kernel = cv.getStructuringElement(shape=cv.MORPH_ELLIPSE, ksize=(5, 5)) opened = cv.morphologyEx(normed, cv.MORPH_OPEN, kernel) # now find white regions contours whites = cv.findContours(opened, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)[0] whites.sort(key=cv.contourArea, reverse=True) whites = [cnt for cnt in whites if cv.contourArea(cnt) > 15 ] # 15px contour area, basically cnt>=4x4 whiterects = [] for i in whites: rect = cv.minAreaRect(i) w, h = rect[1] if w * h > 150: # 150px area, or rougly 12x12 pixels whiterects.append(rect) #cv.drawContours(image, whites, -1, COLORS[2 % len(COLORS)], 2) #cv.imshow('test', image) #cv.waitKey() whites = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), whiterects)) #cv.imshow('test', edged) #cv.waitKey() contours = cv.findContours(edged.copy(), cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) contours = contours[0] if len(contours) == 2 else contours[1] contours.sort(key=cv.contourArea, reverse=True) contours = [cnt for cnt in contours if cv.contourArea(cnt) > 15 ] # 15px contour area, basically cnt>=4x4 rects = list(map(cv.minAreaRect, contours)) boxes = list(map(lambda r: Rect.from_cvrect(*r[0], *r[1]), rects)) # filter non overlapping contours for i in reversed(range(len(boxes))): overlaps = False for wbox in whites: if wbox.overlaps(boxes[i]): overlaps = True break if not overlaps: boxes.pop(i) boxes = Rect.nms_merge(boxes) for i in range(len(contours)): #peri = cv.arcLength(contours[i], True) #approx = cv.approxPolyDP(contours[i], 0.02 * peri, True) rect = cv.minAreaRect(contours[i]) box = cv.boxPoints(rect) box = np.int0(box) #cv.drawContours(image, [box], -1, COLORS[i % len(COLORS)], 2) #cv.putText(image, f'{i}: {cv.contourArea(contours[i])}px', (int(rect[0][0]), int(rect[0][1])), cv.FONT_HERSHEY_SIMPLEX, 0.6, COLORS[i % len(COLORS)], 1) #cv.drawContours(image, contours, -1, COLORS[1], 2) for b in boxes: cv.line(image, (int(b.xmin), int(b.ymin)), (int(b.xmax), int(b.ymin)), (0, 255, 255), 2) cv.line(image, (int(b.xmin), int(b.ymax)), (int(b.xmax), int(b.ymax)), (0, 255, 255), 2) cv.line(image, (int(b.xmin), int(b.ymin)), (int(b.xmin), int(b.ymax)), (0, 255, 255), 2) cv.line(image, (int(b.xmax), int(b.ymin)), (int(b.xmax), int(b.ymax)), (0, 255, 255), 2) stacked = np.hstack((cv.cvtColor(edged, cv.COLOR_GRAY2RGB), cv.cvtColor(opened, cv.COLOR_GRAY2RGB), image)) cv.namedWindow('test', 0) cv.imshow('test', stacked) cv.waitKey() cv.imwrite('dump.jpg', stacked) return boxes or list()