Пример #1
0
def getText(image):
    chatfont = FontData("w:/app/poker/ps-chat.fontd")
    for top,base,bottom in scrape.guess_lines(image):
        
        def hasInk(a,b):
            return image.getpixel((a,b+top+1)) == 0
        
        yield scrape.getstring(w, bottom-top, chatfont, hasInk, 
                               )#train=True)
Пример #2
0
def glyphs(img, font, cutoff=1, train=False):
    """
    This is slightly more "advanced" OCR that I used 
    when trying to re-assemble joe's  book
    
    It attempts to deal with anti-aliased text.
    
    However, the method it uses is rather silly.
    
    generates (x,y, width, height, char) tuples for each glyph in an image.
    glyphs may contain several characters because this system cannot
    compensate for ligatures/kerning. This doesn't really
    work too well yet.
    """
    for top, baseLine, bottom in guess_lines(img.convert("1")):

        scanH = top - bottom  # 16 #top-bottom
        fakeTop = top  # baseLine - 14
        fakeBot = bottom  # baseLine + 3

        def hasInk(a, b):
            return img.getpixel((a, b + fakeTop)) <= cutoff  # re-alias for clarity

        w, h = img.size

        x = 0
        for charW, bmp in scan_line(w, scanH, hasInk):

            if bmp > 0:

                icon = iconify(img.copy().crop((x, top, x + charW, top + scanH)))
                code = icon2num(icon)

                if code in font:
                    char = font[code]
                elif train:
                    print bmp
                    print convert.glint_to_strings(bmp, scanH)
                    char = raw_input("what is it?")
                    font[code] = char
                else:
                    char = code

                yield x, top, charW, scanH, char

            elif bmp == 0 and not train:
                yield x, top, charW, scanH, " "

            x += charW