def tight_word(gray, rect): def _get_row(timg): v, groups, widths = preprocess.line_shadow(timg) index = np.argmax(widths) return groups[index] def _get_col(timg): v, groups, widths = preprocess.column_shadow(timg) index = np.argmax(widths) return groups[index] x, y, w, h = rect img = gray[y:y + h, x:x + w] threshold = utils.custom_threshold(img) # if w > h/1.8: # 先垂直再水平 # x0, x1 = _get_col(threshold) # y0, y1 = _get_row(threshold[0:h, x0:x1]) # else: # 先水平再垂直 y0, y1 = _get_row(threshold) x0, x1 = _get_col(threshold[y0:y1 + 1, 0:w]) y2, y3 = _get_row(threshold[y0:y1 + 1, x0:x1 + 1]) cv2.rectangle(threshold, (x0, y0 + y2), (x1 + 1, y0 + y2 + y3 + 1), (255, 255, 255), 2) # 用矩形显示最终字符 cv2.imshow('tight{}'.format(x), threshold) return (x0 + x, y0 + y2 + y, x1 - x0 + 1, y3 - y2 + 1)
def rect_boundary(grayImg): # 数字区域定位 thresh = utils.custom_threshold(grayImg) # 二值化 # 取最中间的一段图像做水平投影,取出黑色占比最大的一段 height, width = grayImg.shape[:2] middleImg = grayImg[0:height, (width/2-50):(width/2+50)] cv2.imshow('middle', middleImg)
def pre_process(img, show=False): img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 把输入图像灰度化 img = utils.custom_threshold(img) if show: cv2.imshow("thresh", img) # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) # 形态学处理:定义矩形结构 # img = cv2.erode(img, kernel, iterations=2) # 腐蚀 # if show: # cv2.imshow("erode", img) # # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) # 形态学处理:定义矩形结构 # img = cv2.dilate(img, kernel, iterations=2) # if show: # cv2.imshow("dilate", img) return img
def pre_process(img, show=False): # img = preprocess.convert_red_to_black(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 把输入图像灰度化 gris = cv2.GaussianBlur(img, (3, 3), 0) # 高斯滤波 img = utils.custom_threshold(gris) if show: cv2.imshow("thresh", img) # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) # 形态学处理:定义矩形结构 # img = cv2.erode(img, kernel, iterations=2) # 腐蚀 # if show: # cv2.imshow("erode", img) # # kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4)) # 形态学处理:定义矩形结构 # img = cv2.dilate(img, kernel, iterations=2) # if show: # cv2.imshow("dilate", img) return img
import cv2 import numpy as np import utils img = cv2.imread('./test0309/36.jpg') gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) cv2.imshow('gray', gray) edge_output = cv2.Canny(gray, 50, 150) cv2.imshow("Canny Edge", edge_output) blur = cv2.blur(gray, (3, 3)) cv2.imshow('blur', blur) gauss = cv2.GaussianBlur(gray, (3, 3), 0) cv2.imshow('guass', gauss) kernel = np.ones((5, 5), np.float32) / 25 # gray = cv2.filter2D(gray, -1, kernel) gray = cv2.medianBlur(gray, 5) threshold = utils.custom_threshold(gray) cv2.imshow('threshold', threshold) cv2.imshow('filter2D', gray) cv2.waitKey(0)
def img_to_words(img, show=False, words=6): # # 将图片分割成字符 # 步骤1:识别出数字区域 (x1, y1, w1, h1) = _rect_digital(img) digital1 = img[y1:y1 + h1, x1:x1 + w1] if show: cv2.imshow('first digital area', digital1) # 步骤1:取出数字区域 (x2, y2, w2, h2) = _rect_digital(digital1) x = x1 + x2 y = y1 + y2 w = w2 h = h2 # 步骤2:图像预处理 # closed = pre_process(img, show) gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 把输入图像灰度化 gray = cv2.GaussianBlur(gray, (3, 3), 0) # 高斯滤波 closed = utils.custom_threshold(gray) closed = closed[y:y + h, x:x + w] # 步骤3:进行列分割,分割的图像基于数字区 xWords = column_slit2(closed, words=words, show=show) # xWords = column_split(closed, show) # 垂直的字符位置 cv2.imshow('closed', closed) if len(xWords) == 0: print 'column_split return empty' return [] tmp = [] # 最终的字符的(x,y,w,h) # 步骤4:将列分割的结果进行行分割,分解出最终的数字 wordHeights = [] for i in range(0, len(xWords)): xPos = xWords[i] # 对每个字符进行水平分割 wordRect = closed[0:h, xPos[0]:xPos[1] + 1] # wordRect = utils.custom_threshold(wordRect) yPos = line_split(wordRect, show_window=None) if yPos is None: continue # 对字符再做一次垂直头像,取出左右不必要的空间 r1 = wordRect[yPos[0]:yPos[1] + 1, 0:wordRect.shape[1]] xPos2 = word_column_split(r1) if xPos2 is None: continue # x1 = xPos[0] + x # y1 = yPos[0] + y # w1 = xPos[1] - xPos[0] # h1 = yPos[1] - yPos[0] x1 = xPos[0] + xPos2[0] + x y1 = yPos[0] + y w1 = xPos2[1] - xPos2[0] + 1 h1 = yPos[1] - yPos[0] + 1 tmp.append((x1, y1, w1, h1)) wordHeights.append(h1) # 步骤5:对切割结果过滤,去除不可能是数字的部分:取字符高度的中位数,去除高度小于高度中位数1/3的字符 heightMedium = np.average(wordHeights) wordRects = [] for t in tmp: (x1, y1, w1, h1) = t if h1 >= heightMedium / 2: wordRects.append(t) # 步骤6:通过字符的宽度判断是否包含多个字符 wordRects = _validate_by_width(closed, wordRects, words=words) # 步骤7:通过间隔判断不合理的数据 wordRects = _validate_by_interval(wordRects) # 步骤8:重新二值化后再次收缩字符范围 for i in range(0, len(wordRects)): r = wordRects[i] r1 = tight_word(gray, r) wordRects[i] = r1 if show: for (x1, y1, w1, h1) in wordRects: cv2.rectangle(img, (x1, y1), (x1 + w1, y1 + h1), (0, 0, 255), 2) # 用矩形显示最终字符 cv2.imshow('words', img) return wordRects # 返回每个字符的(x,y,w,h)
resizedHeight = int(oriHeight / (oriWidth / float(800))) # 2、大小归一化,宽度固定为800 img = cv2.resize(img, (800, resizedHeight)) # 将图片宽度固定为800 # 3、字符分割 wordRects = wordSplit.img_to_words(img, show) # 字符分割 # 4、图像灰化后颜色反转 gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 把输入图像灰度化 utils.color_reverse(gray) gris = cv2.GaussianBlur(gray, (3, 3), 0) # 高斯滤波 chars = [] index = 1 for (x, y, w, h) in wordRects: if w == 0 or h == 0: continue roi = gray[y:y + h, x:x + w] roi = utils.custom_threshold(roi) cv2.imshow('roi{}'.format(index), roi) roi = cv2.resize(roi, (width, height)) index += 1 roi_small = roi.reshape((1, width * height)) roi_small = np.float32(roi_small) retval, results, neigh_resp, dists = model.findNearest(roi_small, k=1) responseNmber = int((results[0][0])) if responseNmber > 9: print u'识别到刻度' # continue cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2) cv2.putText(img, str(responseNmber), (x + 10, y + 25), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2) if show: