def ocr(img,idioma): ocr_img = Image.fromarray(img) ocr = Tesseract(lang=idioma) ocr.set_image(ocr_img) pattern = re.compile('[a-zA-Z0-9]') text = ocr.get_utf8_text() text = text.splitlines() text = [x for x in text if x != ''] text = [x for x in text if pattern.search(x)] ocr.clear() return (text)
def ocrImage(tagDest,tessdataPrefix,lang,charWhitelist,pageMode): destOcrImg = "/tmp/"+genymotion_vm_name+"-"+tagDest+".png" print "OCR : "+str(destOcrImg) #OCR Def tr = Tesseract(tessdataPrefix, lang) tr.set_variable("tessedit_char_whitelist", charWhitelist) tr.set_page_seg_mode(pageMode) #OCR image = Image.open(destOcrImg) tr.set_image(image) return tr.get_utf8_text()