def extract_image_data(data, languages=None): """Extract text from a binary string of data.""" if TESSDATA_PREFIX is None: raise ValueError("Env TESSDATA_PREFIX is not set, OCR will not work.") key, text = get_cache(data) if text is not None: return text try: img = Image.open(StringIO(data)) except Exception as ex: log.debug("Failed to parse image internally: %r", ex) return "" # TODO: play with contrast and sharpening the images. try: languages = _get_languages(languages) extractor = Tesseract(TESSDATA_PREFIX, lang=languages) extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD) text = extractor.ocr_image(img) log.debug("OCR done: %s, %s characters extracted", languages, len(text)) set_cache(key, text) return text except Exception as ex: log.exception(ex) return ""
def extract_image_data(data, languages=None): """Extract text from a binary string of data.""" if TESSDATA_PREFIX is None: raise ValueError('Env TESSDATA_PREFIX is not set, OCR will not work.') key, text = get_cache(data) if text is not None: return text try: img = Image.open(StringIO(data)) except Exception as ex: log.debug('Failed to parse image internally: %r', ex) return '' # TODO: play with contrast and sharpening the images. try: languages = _get_languages(languages) extractor = Tesseract(TESSDATA_PREFIX, lang=languages) extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD) text = extractor.ocr_image(img) log.debug('OCR done: %s, %s characters extracted', languages, len(text)) set_cache(key, text) return text except Exception as ex: log.exception(ex) return ''
def parse_img(): im = Image.open("./temp.jpg") # the second one im = im.filter(ImageFilter.MedianFilter()) enhancer = ImageEnhance.Contrast(im) im = enhancer.enhance(2) im = im.convert('1') im.save('./temp2.jpg') tr = Tesseract(os.environ["TESSDATA_PREFIX"],"eng") text = tr.ocr_image(Image.open('./temp2.jpg')) return redirect('http://mailsnail.tech/api/notify')
def __init__(self): cwd = os.path.dirname(os.path.realpath(__file__)) os.environ['TESSDATA_PREFIX'] = cwd self.tr = Tesseract(lang='deu') self.gs = goslate.Goslate() self.trained_paper = False self.paper_row_nw = None self.paper_row_se = None self.paper_col_nw = None self.paper_col_se = None self.paper_hist = None self.paper = None self.words = None self.translations = [] self.pointed_locations = deque(maxlen=20)
def tesseract(): global semaphore while(True): if not os.path.exists("./output.png"): break semaphore.acquire() img = Image.open("output.png") tr = Tesseract("/usr/local/share") text = tr.ocr_image(img) print text ''' subprocess.call(["tesseract", "output.png","out"]) ''' semaphore.release()
def extract_image_data(data, languages=None): """Extract text from a binary string of data.""" tessdata_prefix = get_config('TESSDATA_PREFIX') if tessdata_prefix is None: raise IngestorException("TESSDATA_PREFIX is not set, OCR won't work.") languages = get_languages_iso3(languages) text = Cache.get_ocr(data, languages) if text is not None: return text img = Image.open(StringIO(data)) # TODO: play with contrast and sharpening the images. extractor = Tesseract(tessdata_prefix, lang=languages) extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD) text = extractor.ocr_image(img) log.debug('OCR done: %s, %s characters extracted', languages, len(text)) Cache.set_ocr(data, languages, text) return text
def extract_image_data(data, languages=None): """Extract text from a binary string of data.""" tessdata_prefix = get_config('TESSDATA_PREFIX') if tessdata_prefix is None: raise IngestorException("TESSDATA_PREFIX is not set, OCR won't work.") languages = get_languages_iso3(languages) text = Cache.get_ocr(data, languages) if text is not None: return text try: img = Image.open(StringIO(data)) except DecompressionBombWarning as dce: log.debug("Image too large: %", dce) return None except IOError as ioe: log.info("Unknown image format: %r", ioe) return None # TODO: play with contrast and sharpening the images. extractor = Tesseract(tessdata_prefix, lang=languages) extractor.set_image(img) extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD) text = extractor.get_text() or '' text = text.decode(encoding="UTF-8") # extractor.clear() log.debug('OCR done: %s, %s characters extracted', languages, len(text)) Cache.set_ocr(data, languages, text) return text
def ocrImage(tagDest,tessdataPrefix,lang,charWhitelist,pageMode): destOcrImg = "/tmp/"+genymotion_vm_name+"-"+tagDest+".png" print "OCR : "+str(destOcrImg) #OCR Def tr = Tesseract(tessdataPrefix, lang) tr.set_variable("tessedit_char_whitelist", charWhitelist) tr.set_page_seg_mode(pageMode) #OCR image = Image.open(destOcrImg) tr.set_image(image) return tr.get_utf8_text()
def pages(self): for page in range(self.file.numPages): img = WandImage(filename=self.path + ('[%s]' % page), resolution=self.config['wand_resolution']) img.compression_quality = self.config['wand_compression_quality'] temp = NamedTemporaryFile(suffix='.jpg') # Passing temp as file kwargs does not work for some reason. # So we just pass the filename. img.save(filename=temp.name) # Reopen the image file as PIL object img = Image.open(temp.name) # Run tesseract tr = Tesseract() result = tr.ocr_image(img) temp.close() yield result
def extract_image_data(data, languages=None): """Extract text from a binary string of data.""" tessdata_prefix = get_config('TESSDATA_PREFIX') if tessdata_prefix is None: raise IngestorException("TESSDATA_PREFIX is not set, OCR won't work.") languages = get_languages_iso3(languages) text = Cache.get_ocr(data, languages) if text is not None: return text try: img = Image.open(StringIO(data)) except DecompressionBombWarning as dce: log.debug("Image too large: %", dce) return None except IOError as ioe: log.info("Unknown image format: %r", ioe) return None # TODO: play with contrast and sharpening the images. extractor = Tesseract(tessdata_prefix, lang=languages) extractor.set_page_seg_mode(PageSegMode.PSM_AUTO_OSD) text = extractor.ocr_image(img) extractor.clear() log.debug('OCR done: %s, %s characters extracted', languages, len(text)) Cache.set_ocr(data, languages, text) return text
def ocr_text(img): '''Perform OCR on the image.''' tr = Tesseract(lang='eng') tr.clear() pil_image = pil.Image.fromarray(img) tr.set_image(pil_image) utf8_text = tr.get_text() return utf8_text
def index(request): #from tesserwrap import Tesseract #from PIL import Image img = Image.open("/home/df/projects/django/nuspyp/tesseracttest/test.png") tr = Tesseract() tr.ocr_image(img) img2 = dog( filename='/home/df/projects/django/nuspyp/tesseracttest/source.pdf') single_image = img2.sequence[0] tr.ocr_image(single_image) return HttpResponse(tr.get_text())
def __init__(self): cwd = os.path.dirname(os.path.realpath(__file__)) os.environ["TESSDATA_PREFIX"] = cwd self.tr = Tesseract(lang="deu") self.gs = goslate.Goslate() self.trained_paper = False self.paper_row_nw = None self.paper_row_se = None self.paper_col_nw = None self.paper_col_se = None self.paper_hist = None self.paper = None self.words = None self.translations = [] self.pointed_locations = deque(maxlen=20)
def ocr(img,idioma): ocr_img = Image.fromarray(img) ocr = Tesseract(lang=idioma) ocr.set_image(ocr_img) pattern = re.compile('[a-zA-Z0-9]') text = ocr.get_utf8_text() text = text.splitlines() text = [x for x in text if x != ''] text = [x for x in text if pattern.search(x)] ocr.clear() return (text)
def ocr(info): cv2.imwrite('../fig/info.jpg', info) img = Image.open('../fig/info.jpg') tr = Tesseract(datadir='../data', lang='eng') text = tr.ocr_image(img) print(text)
from tesserwrap import Tesseract from PIL import Image tr = Tesseract("/usr/local/share") # this is slow im = Image.open("test2.png") text = tr.ocr_image(im) print text words = text.split() for thing in words: if thing == "Arlington": print "found ittt"
import cv2 import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.axes_grid1 import ImageGrid import math from tesserwrap import Tesseract from PIL import Image tr = Tesseract("/usr/local/share") def auto_canny(image, sigma=0.33): v = np.median(image) lower = int(max(0, (1.0 - sigma) * v)) upper = int(min(255, (1.0 + sigma) * v)) edged = cv2.Canny(image, lower, upper) return edged img = cv2.imread("image.png") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (3, 3), 0) threshold = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2) wide = cv2.Canny(threshold, 10, 200) tight = cv2.Canny(threshold, 225, 250) auto = auto_canny(threshold) #cv2.imshow('my_image', img) #cv2.imshow("Edges", np.hstack([wide, tight, auto])) #cv2.imshow("Wide",wide) #cv2.imshow("Tight",tight)
def ocr_text(img): tr = Tesseract(lang='eng') tr.clear() pil_image = pil.Image.fromarray(img) # Turn off OCR word dictionaries tr.set_variable('load_system_dawg', "F") tr.set_variable('load_freq_dawg', "F") tr.set_variable('-psm', "7") # treat image as single line tr.set_variable('tessedit_char_whitelist', "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") tr.set_image(pil_image) utf8_text = tr.get_text() return unicode(utf8_text)
def handleFrameForTaskB(self, frame, regionCoordinates): try: coordinates = list() for point in regionCoordinates: coordinates.append( [point[0] * frame.shape[1], point[1] * frame.shape[0]]) coordinates = np.int0(coordinates) frame = cv2.drawContours(frame, [coordinates], 0, (0, 255, 0), 2) warped = four_point_transform(frame, coordinates) shrunk = cv2.cvtColor(warped[:, int(warped.shape[1] / 10):], cv2.COLOR_BGR2GRAY) scale = 6 shrunk = cv2.resize( shrunk, (shrunk.shape[1] * scale, shrunk.shape[0] * scale), interpolation=cv2.INTER_CUBIC) _, shrunk = cv2.threshold(shrunk, 100, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) shrunk = 255 - cv2.dilate( 255 - shrunk, np.ones((1, 1)), iterations=1) num, features = cv2.connectedComponents(255 - shrunk) plate = str() corners = list() for i in range(0, num): area = np.sum((features == i)) if area > scale**2 * 2 * 25 and area < scale * 4 * 500: rows = np.any(features == i, axis=1) cols = np.any(features == i, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] corners.append([rmin, cmin, rmax, cmax]) corners = np.array(corners) idx = np.argsort(corners[:, 1]) sorted_corners = corners[idx] for corner in sorted_corners: minx = corner[0] - 2 miny = corner[1] - 2 maxx = corner[2] + 2 maxy = corner[3] + 2 if minx < 0: minx = 0 if miny < 0: miny = 0 snip = features[minx:maxx, miny:maxy] if snip.shape[1] > snip.shape[0]: continue snip = cv2.erode(snip.astype(np.uint8), np.ones((5, 5)), iterations=1) im = Image.fromarray(np.uint8(snip)) tr = Tesseract(datadir="/usr/share/tessdata") letter = tr.ocr_image(im).rstrip() for l in letter: if l.isalnum(): letter = l plate += letter.capitalize() alphs = "".join(itertools.takewhile(str.isalpha, plate)) nums = plate[len(alphs):] if len(alphs) == 2: plate = alphs[0] + "-" + alphs[1] + "-" + nums elif len(alphs) == 5: plate = alphs[:3] + "-" + alphs[3:] + "-" + nums else: diffs = list() alphscorners = sorted_corners[:len(alphs)] for i in range(len(alphscorners)): if sorted_corners[i][1] == alphscorners[-1][1]: break diffs.append(sorted_corners[i + 1][1] - sorted_corners[i][3]) cuts = np.array(diffs) > np.mean(diffs) rev_cuts = cuts[::-1] for i in range(len(cuts[::-1])): if (rev_cuts[i] == 1): alphs = alphs[:len(cuts) - i] + "-" + alphs[len(cuts) - i:] plate = alphs + "-" + nums if len(plate) < 5: return None elif len(plate) > 11: return None elif plate.count("-") > 2: return None elif plate.count("-") < 2: return None else: return plate except Exception as exception: return None
class PaperDetection: def __init__(self): cwd = os.path.dirname(os.path.realpath(__file__)) os.environ['TESSDATA_PREFIX'] = cwd self.tr = Tesseract(lang='deu') self.gs = goslate.Goslate() self.trained_paper = False self.paper_row_nw = None self.paper_row_se = None self.paper_col_nw = None self.paper_col_se = None self.paper_hist = None self.paper = None self.words = None self.translations = [] self.pointed_locations = deque(maxlen=20) def draw_paper_rect(self, frame): rows,cols,_ = frame.shape self.paper_row_nw = rows/5 self.paper_row_se = 4*rows/5 self.paper_col_nw = 2*cols/5 self.paper_col_se = 3*cols/5 cv2.rectangle(frame,(self.paper_col_nw,self.paper_row_nw),(self.paper_col_se,self.paper_row_se), (0,255,0),1) black = np.zeros(frame.shape, dtype=frame.dtype) frame_final = np.vstack([frame, black]) return frame_final def train_paper(self, frame): self.set_paper_hist(frame) self.trained_paper = True def get_paper(self, frame): paper_masked = image_analysis.apply_hist_mask(frame, self.paper_hist) contours = image_analysis.contours(paper_masked) max_contour = image_analysis.max_contour(contours) paper = image_analysis.contour_interior(frame, max_contour) return paper def set_paper(self, frame): self.paper = self.get_paper(frame) def paper_copy(self): paper = self.paper.copy() return paper def set_ocr_text(self, frame): paper = self.get_paper(frame) thresh = image_analysis.gray_threshold(paper, 100) paper_img = Image.fromarray(thresh) self.tr.set_image(paper_img) self.tr.get_text() self.words = self.tr.get_words() for w in self.words: translation = self.translate(w.value) self.translations.append(translation) def set_paper_hist(self, frame): hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) roi = hsv[self.paper_row_nw:self.paper_row_se, self.paper_col_nw:self.paper_col_se] self.paper_hist = cv2.calcHist([roi], [0, 1], None, [180, 256], [0, 180, 0, 256]) cv2.normalize(self.paper_hist, self.paper_hist, 0, 255, cv2.NORM_MINMAX) def get_word_at_point(self, point): for i, w in enumerate(self.words): x_nw,y_nw,x_se,y_sw = w.box x,y = point if x > x_nw and x < x_se and y > y_nw and y < y_sw: return self.translations[i] def get_word_index(self, point): for i, w in enumerate(self.words): x_nw,y_nw,x_se,y_sw = w.box x,y = point if x > x_nw and x < x_se and y > y_nw and y < y_sw: return i def translate(self, word): translated_word = self.gs.translate(word,'en',source_language='de') return translated_word def update_pointed_locations(self, point): index = self.get_word_index(point) if index != None: self.pointed_locations.append(index) def get_most_common_word(self): index = self.most_common_location() if index != None: word = self.translations[index].encode('ascii', errors='backslashreplace') return word def most_common_location(self): values = set(self.pointed_locations) index = None maxi = 0 for i in values: num = self.pointed_locations.count(i) if num > maxi: index = i frequency = float(self.pointed_locations.count(index))/float(self.pointed_locations.maxlen) if frequency > 0.25: return index else: return None
import cv2 import numpy as np from matplotlib import pyplot as plt from mpl_toolkits.axes_grid1 import ImageGrid import math from tesserwrap import Tesseract from PIL import Image tr = Tesseract("/usr/local/share") def auto_canny(image, sigma=0.33): v = np.median(image) lower = int(max(0, (1.0 - sigma) * v)) upper = int(min(255, (1.0 + sigma) * v)) edged = cv2.Canny(image, lower, upper) return edged img = cv2.imread("image.png") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (3, 3), 0) threshold = cv2.adaptiveThreshold(blurred,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,11,2) wide = cv2.Canny(threshold, 10, 200) tight = cv2.Canny(threshold, 225, 250) auto = auto_canny(threshold) #cv2.imshow('my_image', img) #cv2.imshow("Edges", np.hstack([wide, tight, auto])) #cv2.imshow("Wide",wide) #cv2.imshow("Tight",tight) #cv2.imshow("Auto",auto) bin, contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
#much stuff got commented in the end, staying here for a while for educational reasons only #need to add connection to video from robot import numpy as np import vision_definitions from time import sleep import cv2 #from pytesser import * from tesserwrap import Tesseract from PIL import Image from naoqi import ALProxy ocr = Tesseract() ocr.set_variable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ") #since we use upper cased text only #ocr.set_variable("classify_enable_learning", "0") #ocr.set_variable("classify_enable_adaptive_matcher", "0") #cap = cv2.VideoCapture(0) #connecting to the robot IP = "192.168.0.238" #speech module tts = ALProxy("ALTextToSpeech", IP, 9559) cameraid=0 camProxy = ALProxy("ALVideoDevice", IP, 9559) resolution = vision_definitions.kVGA colorSpace = vision_definitions.kBGRColorSpace videoClient = camProxy.subscribe("python_client", resolution, colorSpace, 5) camProxy.setParam(vision_definitions.kCameraSelectID, cameraid) #9137743885
class PaperDetection: def __init__(self): cwd = os.path.dirname(os.path.realpath(__file__)) os.environ["TESSDATA_PREFIX"] = cwd self.tr = Tesseract(lang="deu") self.gs = goslate.Goslate() self.trained_paper = False self.paper_row_nw = None self.paper_row_se = None self.paper_col_nw = None self.paper_col_se = None self.paper_hist = None self.paper = None self.words = None self.translations = [] self.pointed_locations = deque(maxlen=20) def draw_paper_rect(self, frame): rows, cols, _ = frame.shape self.paper_row_nw = rows / 5 self.paper_row_se = 4 * rows / 5 self.paper_col_nw = 2 * cols / 5 self.paper_col_se = 3 * cols / 5 cv2.rectangle( frame, (self.paper_col_nw, self.paper_row_nw), (self.paper_col_se, self.paper_row_se), (0, 255, 0), 1 ) black = np.zeros(frame.shape, dtype=frame.dtype) frame_final = np.vstack([frame, black]) return frame_final def train_paper(self, frame): self.set_paper_hist(frame) self.trained_paper = True def get_paper(self, frame): paper_masked = image_analysis.apply_hist_mask(frame, self.paper_hist) contours = image_analysis.contours(paper_masked) max_contour = image_analysis.max_contour(contours) paper = image_analysis.contour_interior(frame, max_contour) return paper def set_paper(self, frame): self.paper = self.get_paper(frame) def paper_copy(self): paper = self.paper.copy() return paper def set_ocr_text(self, frame): paper = self.get_paper(frame) thresh = image_analysis.gray_threshold(paper, 100) paper_img = Image.fromarray(thresh) self.tr.set_image(paper_img) self.tr.get_text() self.words = self.tr.get_words() for w in self.words: translation = self.translate(w.value) self.translations.append(translation) def set_paper_hist(self, frame): hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) roi = hsv[self.paper_row_nw : self.paper_row_se, self.paper_col_nw : self.paper_col_se] self.paper_hist = cv2.calcHist([roi], [0, 1], None, [180, 256], [0, 180, 0, 256]) cv2.normalize(self.paper_hist, self.paper_hist, 0, 255, cv2.NORM_MINMAX) def get_word_at_point(self, point): for i, w in enumerate(self.words): x_nw, y_nw, x_se, y_sw = w.box x, y = point if x > x_nw and x < x_se and y > y_nw and y < y_sw: return self.translations[i] def get_word_index(self, point): for i, w in enumerate(self.words): x_nw, y_nw, x_se, y_sw = w.box x, y = point if x > x_nw and x < x_se and y > y_nw and y < y_sw: return i def translate(self, word): translated_word = self.gs.translate(word, "en", source_language="de") return translated_word def update_pointed_locations(self, point): index = self.get_word_index(point) if index != None: self.pointed_locations.append(index) def get_most_common_word(self): index = self.most_common_location() if index != None: word = self.translations[index].encode("ascii", errors="backslashreplace") return word def most_common_location(self): values = set(self.pointed_locations) index = None maxi = 0 for i in values: num = self.pointed_locations.count(i) if num > maxi: index = i frequency = float(self.pointed_locations.count(index)) / float(self.pointed_locations.maxlen) if frequency > 0.25: return index else: return None