def parseWord(img, returnBounds=False): img1 = cv2.cvtColor(img, cv.CV_BGR2GRAY) if returnBounds: # TRAINING parameters, with nicely spaced characters unused, img1 = cv2.threshold(img1, 200, 255, cv.CV_THRESH_BINARY_INV) img1 = cv2.GaussianBlur(img1, (5, 5), 0) img1 = cv2.dilate(img1, (2, 2), 1) element = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5)) img1 = cv2.morphologyEx(img1, cv.CV_MOP_CLOSE, element) # cv2.imshow('MORPH', img1) # cv2.waitKey(0) else: img1 = cv2.GaussianBlur(img1, (1, 1), 0) # img1 = cv2.Laplacian(img1, cv2.CV_8U) # img1 = cv2.equalizeHist(img1) unused, img1 = cv2.threshold(img1, 200, 255, cv.CV_THRESH_BINARY_INV) # img1 = cv2.dilate(img1, (1, 1), 1) if DEBUG: cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img1) # Blur in the horizontal direction to get lines element = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1)) img1 = cv2.morphologyEx(img1, cv.CV_MOP_CLOSE, element) if DEBUG: cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, img1) # Use RETR_EXTERNAL to remove boxes that are completely contained by the word contours, hierarchy = cv2.findContours(img1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) boundRects = [] for i in xrange(len(contours)): contourPoly = cv2.approxPolyDP(contours[i], 3, True) boundRect = cv2.boundingRect(contourPoly) boundRects.append(boundRect) # Pad the character bounding boxes height = img.shape[0] width = img.shape[1] boundRects = sorted(boundRects, key=lambda x: x[0]) pad = 1 adjustedRects = [] minX = width maxX = 0 for rect in boundRects: adjustedRect = (rect[0] - pad, rect[1] - pad, rect[2] + pad * 2, rect[3] + pad * 2) adjustedRects.append(adjustedRect) minX = min(minX, adjustedRect[0]) maxX = max(maxX, adjustedRect[0] + adjustedRect[2]) if DEBUG: # for rect in adjustedRects: # cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0)) cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img) print '%d char bounding boxes initially found in %s' % (len(adjustedRects), IMAGE_FILE) # for rect in adjustedRects: # cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0)) # cv2.imshow('IMG', img) # cv2.waitKey(0) # Extract characters from word # chars = [] # for rect in adjustedRects: # [x, y, w, h] = rect # char = img[0:height, x:(x+w)] # chars.append(char) latex = [] img = itl_char.getCroppedImage(itl_char.getBinaryImage(img)) width = img.shape[1] np.set_printoptions(threshold=np.inf, linewidth=np.inf) columnSums = np.sum(img, axis=0) currentX = 0 MIN_WIDTH = 2 MIN_CHAR_WIDTH = 4 MAX_CHAR_WIDTH = 15 THRESHOLD = .8 while width - currentX >= MIN_WIDTH: if columnSums[currentX] == 0 or columnSums[currentX + 1] == 0: currentX += 1 continue start = MIN_CHAR_WIDTH end = MAX_CHAR_WIDTH if (currentX + 2 < width and columnSums[currentX + 2] == 0) or\ (currentX + 3 < width and columnSums[currentX + 3] == 0): start = MIN_WIDTH end = MIN_CHAR_WIDTH + 1 scores = [False] * (MAX_CHAR_WIDTH + 1) values = [None] * (MAX_CHAR_WIDTH + 1) brokeSpace = False goodCharScore = False for w in xrange(start, end): rightX = currentX + w charSlice = img[0:height, currentX:rightX] # print charSlice val, score = itl_char.parseCharacter(charSlice, getScore=True, isBinary=True) scores[w] = score if score > THRESHOLD and w >= MIN_CHAR_WIDTH: goodCharScore = True values[w] = val if rightX >= width or columnSums[rightX] == 0: brokeSpace = True break maxRunLength = 0 maxRunBestScore = 0.0 maxRunBestWidth = 0 currentRunLength = 0 currentRunBestScore = 0.0 currentRunBestWidth = 0 if brokeSpace: maxRunBestWidth = w maxRunBestScore = scores[w] bestVal = values[w] else: for w in xrange(start, end + 1): if scores[w] < THRESHOLD or values[w][0] in '.,:;': currentRunLength = 0 currentRunBestScore = 0.0 continue currentRunLength += 1 if scores[w] > currentRunBestScore: currentRunBestScore = scores[w] currentRunBestWidth = w if currentRunLength > maxRunLength or\ (currentRunLength == maxRunLength and currentRunBestScore > maxRunBestScore): maxRunLength = currentRunLength maxRunBestScore = currentRunBestScore maxRunBestWidth = currentRunBestWidth if maxRunBestWidth == 0: break bestVal = values[maxRunBestWidth] # print '*****', maxRunBestWidth, maxRunBestScore, bestVal # Add to chars array currentX += maxRunBestWidth latex.append(bestVal) if returnBounds: return chars # latex = [] # for i in xrange(len(chars)): # char = chars[i] # charLatex = itl_char.parseCharacter(char) # latex.append(charLatex) # if DEBUG: # cv2.imshow('%d' % i, char) # cv2.waitKey(0) return latex
def parseEquation(img): img_gray = cv2.cvtColor(img, cv.CV_BGR2GRAY) #img_inv = 255 - img_gray # img_lap = cv2.Laplacian(img_gray, cv2.CV_8U) # if DEBUG: # cv2.imwrite(IMAGE_NAME + '-lap.' + EXTENSION, img_lap) unused, img_threshold = cv2.threshold(img_gray, 220, 255, cv.CV_THRESH_BINARY_INV) if DEBUG: cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img_threshold) # Blur in the horizontal direction to get lines morph_size = (0, 0) # element = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size) # morphed = cv2.morphologyEx(img_threshold, cv.CV_MOP_CLOSE, element) # if DEBUG: # cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, morphed) # Use RETR_EXTERNAL to remove boxes that are completely contained by the word contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) boundRects = [] for i in xrange(len(contours)): contourPoly = cv2.approxPolyDP(contours[i], 0.25, True) boundRect = cv2.boundingRect(contourPoly) if boundRect[2] * boundRect[3] > 1: boundRects.append( (boundRect[0] - morph_size[0], boundRect[1] - morph_size[1], boundRect[2] + morph_size[0], boundRect[3])) # # Filter bounding rectangles that are not an entire line # # Take the maximum height among all bounding boxes # # Remove those boxes that have height less than 25% of the maximum # maxHeight = -1 # for rect in boundRects: # maxHeight = max(rect[3], maxHeight) # heightThresh = .25 * maxHeight # boundRects = [rect for rect in boundRects if rect[3] > heightThresh] # if DEBUG: # for rect in boundRects: # cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0)) # cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img) # print '%d words found in %s' % (len(boundRects), IMAGE_FILE) sortedRects = sorted(boundRects, key=lambda x: (x[0], x[1])) # words = [] # for rect in sortedRects: # [x, y, w, h] = rect # word = img[y:(y+h), x:(x+w)] # words.append(word) # for word in words: # # TODO do something here # if DEBUG: # cv2.imshow('Word', word) # cv2.waitKey(0) # keys = ['n', '-', '-', '\\sum', '\\infty', '-', '\\infty', '|', '\\langle', # 'f', ',', '-', 'e', '\\sqrt', 'i', 'n', '2', '\\pi', 'x', '\\rangle', # '|', '2', '-', '-', '|', '|', 'f', '|', '|', '2' # ] keys = [] for j in xrange(len(sortedRects)): x, y, w, h = sortedRects[j] img_bb = img[y:y + h, x:x + w] key = itl_char.parseCharacter(img_bb) keys.append(translateKey(key)) L = [] for j in xrange(len(keys)): L.append(buildSymbol(keys[j], *(sortedRects[j]))) L = handleSpecialCases(L) tree, baseline = findSymbolTree(L) for i in xrange(len(L)): c = L[i].centroid() rounded = (int(round(c[0])), int(round(c[1]))) if i in baseline: cv2.circle(img, rounded, 2, (255, 0, 0), 1) else: cv2.circle(img, rounded, 2, (0, 0, 255), 1) for i in xrange(len(L)): for j in tree[i]: c1 = L[i].centroid() c2 = L[j].centroid() r1 = (int(round(c1[0])), int(round(c1[1]))) r2 = (int(round(c2[0])), int(round(c2[1]))) if i in baseline and j in baseline: cv2.line(img, r1, r2, (255, 0, 0)) else: cv2.line(img, r1, r2, (0, 0, 255)) cv2.imwrite(IMAGE_NAME + '-mst.' + EXTENSION, img) node = buildLaTeXTree(L) node_str = node.str() print 'latex string:', node_str return node_str
def parseEquation(img): img_gray = cv2.cvtColor(img, cv.CV_BGR2GRAY) #img_inv = 255 - img_gray # img_lap = cv2.Laplacian(img_gray, cv2.CV_8U) # if DEBUG: # cv2.imwrite(IMAGE_NAME + '-lap.' + EXTENSION, img_lap) unused, img_threshold = cv2.threshold(img_gray, 220, 255, cv.CV_THRESH_BINARY_INV) if DEBUG: cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img_threshold) # Blur in the horizontal direction to get lines morph_size = (0, 0) # element = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size) # morphed = cv2.morphologyEx(img_threshold, cv.CV_MOP_CLOSE, element) # if DEBUG: # cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, morphed) # Use RETR_EXTERNAL to remove boxes that are completely contained by the word contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) boundRects = [] for i in xrange(len(contours)): contourPoly = cv2.approxPolyDP(contours[i], 0.25, True) boundRect = cv2.boundingRect(contourPoly) if boundRect[2] * boundRect[3] > 1: boundRects.append((boundRect[0]-morph_size[0], boundRect[1]-morph_size[1], boundRect[2]+ morph_size[0], boundRect[3])) # # Filter bounding rectangles that are not an entire line # # Take the maximum height among all bounding boxes # # Remove those boxes that have height less than 25% of the maximum # maxHeight = -1 # for rect in boundRects: # maxHeight = max(rect[3], maxHeight) # heightThresh = .25 * maxHeight # boundRects = [rect for rect in boundRects if rect[3] > heightThresh] # if DEBUG: # for rect in boundRects: # cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0)) # cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img) # print '%d words found in %s' % (len(boundRects), IMAGE_FILE) sortedRects = sorted(boundRects, key=lambda x:(x[0], x[1])) # words = [] # for rect in sortedRects: # [x, y, w, h] = rect # word = img[y:(y+h), x:(x+w)] # words.append(word) # for word in words: # # TODO do something here # if DEBUG: # cv2.imshow('Word', word) # cv2.waitKey(0) # keys = ['n', '-', '-', '\\sum', '\\infty', '-', '\\infty', '|', '\\langle', # 'f', ',', '-', 'e', '\\sqrt', 'i', 'n', '2', '\\pi', 'x', '\\rangle', # '|', '2', '-', '-', '|', '|', 'f', '|', '|', '2' # ] keys = [] for j in xrange(len(sortedRects)): x, y, w, h = sortedRects[j] img_bb = img[y:y+h, x:x+w] key = itl_char.parseCharacter(img_bb) keys.append(translateKey(key)) L = [] for j in xrange(len(keys)): L.append(buildSymbol(keys[j], *(sortedRects[j]))) L = handleSpecialCases(L) tree, baseline = findSymbolTree(L) for i in xrange(len(L)): c = L[i].centroid() rounded = (int(round(c[0])), int(round(c[1]))) if i in baseline: cv2.circle(img, rounded, 2, (255, 0, 0), 1) else: cv2.circle(img, rounded, 2, (0, 0, 255), 1) for i in xrange(len(L)): for j in tree[i]: c1 = L[i].centroid() c2 = L[j].centroid() r1 = (int(round(c1[0])), int(round(c1[1]))) r2 = (int(round(c2[0])), int(round(c2[1]))) if i in baseline and j in baseline: cv2.line(img, r1, r2, (255, 0, 0)) else: cv2.line(img, r1, r2, (0, 0, 255)) cv2.imwrite(IMAGE_NAME + '-mst.' + EXTENSION, img) node = buildLaTeXTree(L) node_str = node.str() print 'latex string:', node_str return node_str