Пример #1
0
def parseWord(img, returnBounds=False):
    img1 = cv2.cvtColor(img, cv.CV_BGR2GRAY)

    if returnBounds:
        # TRAINING parameters, with nicely spaced characters
        unused, img1 = cv2.threshold(img1, 200, 255, cv.CV_THRESH_BINARY_INV)
        img1 = cv2.GaussianBlur(img1, (5, 5), 0)
        img1 = cv2.dilate(img1, (2, 2), 1)
        element = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        img1 = cv2.morphologyEx(img1, cv.CV_MOP_CLOSE, element)
        # cv2.imshow('MORPH', img1)
        # cv2.waitKey(0)
    else:
        img1 = cv2.GaussianBlur(img1, (1, 1), 0)
        # img1 = cv2.Laplacian(img1, cv2.CV_8U)
        # img1 = cv2.equalizeHist(img1)
        unused, img1 = cv2.threshold(img1, 200, 255, cv.CV_THRESH_BINARY_INV)
        # img1 = cv2.dilate(img1, (1, 1), 1)
        if DEBUG:
            cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img1)
        # Blur in the horizontal direction to get lines
        element = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
        img1 = cv2.morphologyEx(img1, cv.CV_MOP_CLOSE, element)

    if DEBUG:
        cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, img1)

    # Use RETR_EXTERNAL to remove boxes that are completely contained by the word
    contours, hierarchy = cv2.findContours(img1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    boundRects = []
    for i in xrange(len(contours)):
        contourPoly = cv2.approxPolyDP(contours[i], 3, True)
        boundRect = cv2.boundingRect(contourPoly)
        boundRects.append(boundRect)

    # Pad the character bounding boxes
    height = img.shape[0]
    width = img.shape[1]
    boundRects = sorted(boundRects, key=lambda x: x[0])
    pad = 1
    adjustedRects = []
    minX = width
    maxX = 0
    for rect in boundRects:
        adjustedRect = (rect[0] - pad, rect[1] - pad, rect[2] + pad * 2, rect[3] + pad * 2)
        adjustedRects.append(adjustedRect)
        minX = min(minX, adjustedRect[0])
        maxX = max(maxX, adjustedRect[0] + adjustedRect[2])

    if DEBUG:
        # for rect in adjustedRects:
        #     cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0))
        cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img)
        print '%d char bounding boxes initially found in %s' % (len(adjustedRects), IMAGE_FILE)

    # for rect in adjustedRects:
    #     cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0))
    # cv2.imshow('IMG', img)
    # cv2.waitKey(0)

    # Extract characters from word
    # chars = []
    # for rect in adjustedRects:
    #     [x, y, w, h] = rect
    #     char = img[0:height, x:(x+w)]
    #     chars.append(char)
    latex = []
    img = itl_char.getCroppedImage(itl_char.getBinaryImage(img))
    width = img.shape[1]
    np.set_printoptions(threshold=np.inf, linewidth=np.inf)
    columnSums = np.sum(img, axis=0)
    currentX = 0
    MIN_WIDTH = 2
    MIN_CHAR_WIDTH = 4
    MAX_CHAR_WIDTH = 15
    THRESHOLD = .8
    while width - currentX >= MIN_WIDTH:
        if columnSums[currentX] == 0 or columnSums[currentX + 1] == 0:
            currentX += 1
            continue
        start = MIN_CHAR_WIDTH
        end = MAX_CHAR_WIDTH
        if (currentX + 2 < width and columnSums[currentX + 2] == 0) or\
           (currentX + 3 < width and columnSums[currentX + 3] == 0):
            start = MIN_WIDTH
            end = MIN_CHAR_WIDTH + 1
        scores = [False] * (MAX_CHAR_WIDTH + 1)
        values = [None] * (MAX_CHAR_WIDTH + 1)
        brokeSpace = False
        goodCharScore = False
        for w in xrange(start, end):
            rightX = currentX + w
            charSlice = img[0:height, currentX:rightX]
            # print charSlice
            val, score = itl_char.parseCharacter(charSlice, getScore=True, isBinary=True)
            scores[w] = score
            if score > THRESHOLD and w >= MIN_CHAR_WIDTH:
                goodCharScore = True
            values[w] = val
            if rightX >= width or columnSums[rightX] == 0:
                brokeSpace = True
                break

        maxRunLength = 0
        maxRunBestScore = 0.0
        maxRunBestWidth = 0
        currentRunLength = 0
        currentRunBestScore = 0.0
        currentRunBestWidth = 0
        if brokeSpace:
            maxRunBestWidth = w
            maxRunBestScore = scores[w]
            bestVal = values[w]
        else:
            for w in xrange(start, end + 1):
                if scores[w] < THRESHOLD or values[w][0] in '.,:;':
                    currentRunLength = 0
                    currentRunBestScore = 0.0
                    continue
                currentRunLength += 1
                if scores[w] > currentRunBestScore:
                    currentRunBestScore = scores[w]
                    currentRunBestWidth = w
                if currentRunLength > maxRunLength or\
                   (currentRunLength == maxRunLength and currentRunBestScore > maxRunBestScore):
                    maxRunLength = currentRunLength
                    maxRunBestScore = currentRunBestScore
                    maxRunBestWidth = currentRunBestWidth

        if maxRunBestWidth == 0:
            break

        bestVal = values[maxRunBestWidth]
        # print '*****', maxRunBestWidth, maxRunBestScore, bestVal
        # Add to chars array
        currentX += maxRunBestWidth
        latex.append(bestVal)

    if returnBounds:
        return chars

    # latex = []
    # for i in xrange(len(chars)):
    #     char = chars[i]
    #     charLatex = itl_char.parseCharacter(char)
    #     latex.append(charLatex)
        # if DEBUG:
        #     cv2.imshow('%d' % i, char)
        #     cv2.waitKey(0)

    return latex
Пример #2
0
def parseEquation(img):
    img_gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
    #img_inv = 255 - img_gray
    # img_lap = cv2.Laplacian(img_gray, cv2.CV_8U)
    # if DEBUG:
    #     cv2.imwrite(IMAGE_NAME + '-lap.' + EXTENSION, img_lap)
    unused, img_threshold = cv2.threshold(img_gray, 220, 255,
                                          cv.CV_THRESH_BINARY_INV)
    if DEBUG:
        cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img_threshold)
    # Blur in the horizontal direction to get lines
    morph_size = (0, 0)
    # element = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
    # morphed = cv2.morphologyEx(img_threshold, cv.CV_MOP_CLOSE, element)
    # if DEBUG:
    #     cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, morphed)
    # Use RETR_EXTERNAL to remove boxes that are completely contained by the word
    contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL,
                                           cv2.CHAIN_APPROX_NONE)
    boundRects = []
    for i in xrange(len(contours)):
        contourPoly = cv2.approxPolyDP(contours[i], 0.25, True)
        boundRect = cv2.boundingRect(contourPoly)
        if boundRect[2] * boundRect[3] > 1:
            boundRects.append(
                (boundRect[0] - morph_size[0], boundRect[1] - morph_size[1],
                 boundRect[2] + morph_size[0], boundRect[3]))

    # # Filter bounding rectangles that are not an entire line
    # # Take the maximum height among all bounding boxes
    # # Remove those boxes that have height less than 25% of the maximum
    # maxHeight = -1
    # for rect in boundRects:
    #     maxHeight = max(rect[3], maxHeight)
    # heightThresh = .25 * maxHeight
    # boundRects = [rect for rect in boundRects if rect[3] > heightThresh]

    # if DEBUG:
    # for rect in boundRects:
    #     cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0))
    # cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img)
    # print '%d words found in %s' % (len(boundRects), IMAGE_FILE)

    sortedRects = sorted(boundRects, key=lambda x: (x[0], x[1]))
    # words = []
    # for rect in sortedRects:
    #     [x, y, w, h] = rect
    #     word = img[y:(y+h), x:(x+w)]
    #     words.append(word)

    # for word in words:
    #     # TODO do something here
    #     if DEBUG:
    #         cv2.imshow('Word', word)
    #         cv2.waitKey(0)
    # keys = ['n', '-', '-', '\\sum', '\\infty', '-', '\\infty', '|', '\\langle',
    #     'f', ',', '-', 'e', '\\sqrt', 'i', 'n', '2', '\\pi', 'x', '\\rangle',
    #     '|', '2', '-', '-', '|', '|', 'f', '|', '|', '2'
    # ]
    keys = []
    for j in xrange(len(sortedRects)):
        x, y, w, h = sortedRects[j]
        img_bb = img[y:y + h, x:x + w]
        key = itl_char.parseCharacter(img_bb)
        keys.append(translateKey(key))
    L = []
    for j in xrange(len(keys)):
        L.append(buildSymbol(keys[j], *(sortedRects[j])))
    L = handleSpecialCases(L)
    tree, baseline = findSymbolTree(L)

    for i in xrange(len(L)):
        c = L[i].centroid()
        rounded = (int(round(c[0])), int(round(c[1])))
        if i in baseline:
            cv2.circle(img, rounded, 2, (255, 0, 0), 1)
        else:
            cv2.circle(img, rounded, 2, (0, 0, 255), 1)
    for i in xrange(len(L)):
        for j in tree[i]:
            c1 = L[i].centroid()
            c2 = L[j].centroid()
            r1 = (int(round(c1[0])), int(round(c1[1])))
            r2 = (int(round(c2[0])), int(round(c2[1])))
            if i in baseline and j in baseline:
                cv2.line(img, r1, r2, (255, 0, 0))
            else:
                cv2.line(img, r1, r2, (0, 0, 255))
    cv2.imwrite(IMAGE_NAME + '-mst.' + EXTENSION, img)

    node = buildLaTeXTree(L)
    node_str = node.str()
    print 'latex string:', node_str
    return node_str
Пример #3
0
def parseEquation(img):
    img_gray = cv2.cvtColor(img, cv.CV_BGR2GRAY)
    #img_inv = 255 - img_gray
    # img_lap = cv2.Laplacian(img_gray, cv2.CV_8U)
    # if DEBUG:
    #     cv2.imwrite(IMAGE_NAME + '-lap.' + EXTENSION, img_lap)
    unused, img_threshold = cv2.threshold(img_gray, 220, 255, cv.CV_THRESH_BINARY_INV)
    if DEBUG:
        cv2.imwrite(IMAGE_NAME + '-thresh.' + EXTENSION, img_threshold)
    # Blur in the horizontal direction to get lines
    morph_size = (0, 0)
    # element = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
    # morphed = cv2.morphologyEx(img_threshold, cv.CV_MOP_CLOSE, element)
    # if DEBUG:
    #     cv2.imwrite(IMAGE_NAME + '-morph.' + EXTENSION, morphed)
    # Use RETR_EXTERNAL to remove boxes that are completely contained by the word
    contours, hierarchy = cv2.findContours(img_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    boundRects = []
    for i in xrange(len(contours)):
        contourPoly = cv2.approxPolyDP(contours[i], 0.25, True)
        boundRect = cv2.boundingRect(contourPoly)
        if boundRect[2] * boundRect[3] > 1:
            boundRects.append((boundRect[0]-morph_size[0], boundRect[1]-morph_size[1], boundRect[2]+ morph_size[0], boundRect[3]))

    # # Filter bounding rectangles that are not an entire line
    # # Take the maximum height among all bounding boxes
    # # Remove those boxes that have height less than 25% of the maximum
    # maxHeight = -1
    # for rect in boundRects:
    #     maxHeight = max(rect[3], maxHeight)
    # heightThresh = .25 * maxHeight
    # boundRects = [rect for rect in boundRects if rect[3] > heightThresh]

    # if DEBUG:
        # for rect in boundRects:
        #     cv2.rectangle(img, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (0, 255, 0))
        # cv2.imwrite(IMAGE_NAME + '-bounds.' + EXTENSION, img)
        # print '%d words found in %s' % (len(boundRects), IMAGE_FILE)

    sortedRects = sorted(boundRects, key=lambda x:(x[0], x[1]))
    # words = []
    # for rect in sortedRects:
    #     [x, y, w, h] = rect
    #     word = img[y:(y+h), x:(x+w)]
    #     words.append(word)

    # for word in words:
    #     # TODO do something here
    #     if DEBUG:
    #         cv2.imshow('Word', word)
    #         cv2.waitKey(0)
    # keys = ['n', '-', '-', '\\sum', '\\infty', '-', '\\infty', '|', '\\langle',
    #     'f', ',', '-', 'e', '\\sqrt', 'i', 'n', '2', '\\pi', 'x', '\\rangle',
    #     '|', '2', '-', '-', '|', '|', 'f', '|', '|', '2'
    # ]
    keys = []
    for j in xrange(len(sortedRects)):
        x, y, w, h = sortedRects[j]
        img_bb = img[y:y+h, x:x+w]
        key = itl_char.parseCharacter(img_bb)
        keys.append(translateKey(key))
    L = []
    for j in xrange(len(keys)):
        L.append(buildSymbol(keys[j], *(sortedRects[j])))
    L = handleSpecialCases(L)
    tree, baseline = findSymbolTree(L)

    for i in xrange(len(L)):
        c = L[i].centroid()
        rounded = (int(round(c[0])), int(round(c[1])))
        if i in baseline:
            cv2.circle(img, rounded, 2, (255, 0, 0), 1)
        else:
            cv2.circle(img, rounded, 2, (0, 0, 255), 1)
    for i in xrange(len(L)):
        for j in tree[i]:
            c1 = L[i].centroid()
            c2 = L[j].centroid()
            r1 = (int(round(c1[0])), int(round(c1[1])))
            r2 = (int(round(c2[0])), int(round(c2[1])))
            if i in baseline and j in baseline:
                cv2.line(img, r1, r2, (255, 0, 0))
            else:
                cv2.line(img, r1, r2, (0, 0, 255))
    cv2.imwrite(IMAGE_NAME + '-mst.' + EXTENSION, img)

    node = buildLaTeXTree(L)
    node_str = node.str()
    print 'latex string:',  node_str
    return node_str