示例#1
0
def ocr():
    api = tesseract.TessBaseAPI()
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    mImgFile = "eurotext.jpg"
    pixImage = tesseract.pixRead(mImgFile)
    api.SetImage(pixImage)
    text = api.GetUTF8Text()
    conf = api.MeanTextConf()
    print(text, len(text))
    print("Cofidence Level: %d %%" % conf)
    print("Confidences of All words")
    header("Method 1", "*" * 10)
    confOfText = api.AllWordConfidences()

    print(confOfText)
    print("Number of Words:")
    print("counted by tesseract: %d" % len(confOfText))
    print("counted by me: %d[%d]" % (countWords(text), countWords2(text)))
    if len(confOfText) != countWords(text):
        print("Why the words counted by tesseract are different from mine!!!!")
    header("Method 2", "*" * 10)
    confs = tesseract.AllWordConfidences(api)
    print(confs, len(confs))
def getConfidence(mask, angle, me):
    aDeg = math.degrees(angle)
    rotated = rotate(mask, 90.0 - aDeg, False, (me[1], me[0]))

    #rotated = rotate(mask, 90.0-adeg, False)
    io.imsave("temp3_0.png", rotated)

    #using the OCR
    gifTempFile = "temp3_1.gif"
    tifTempFile = "temp3_2.tif"
    im = Image.open('temp3_0.png')
    im.save(gifTempFile, "GIF")
    original = Image.open(gifTempFile)
    bg = original.resize(im.size, Image.NEAREST)
    bg.save(tifTempFile)

    pixImage = tesseract.pixRead(tifTempFile)
    api.SetImage(pixImage)
    outText = api.GetUTF8Text()
    conf = api.MeanTextConf()

    if showProcess:
        wait = input("PRESS ENTER TO CONTINUE.")

    if conf <= 10:
        conf = 10
    return conf
示例#3
0
def ocr():
	api = tesseract.TessBaseAPI()
	api.Init(".","eng",tesseract.OEM_DEFAULT)
	api.SetPageSegMode(tesseract.PSM_AUTO)

	mImgFile = "eurotext.jpg"
	pixImage=tesseract.pixRead(mImgFile)
	api.SetImage(pixImage)
	text=api.GetUTF8Text()
	conf=api.MeanTextConf()
	print(text,len(text))
	print("Cofidence Level: %d %%"%conf)
	print("Confidences of All words")
	header("Method 1","*"*10)
	confOfText=api.AllWordConfidences()

	print(confOfText)
	print("Number of Words:")
	print("counted by tesseract: %d"%len(confOfText)) 
	print("counted by me: %d[%d]"%(countWords(text), countWords2(text)))
	if len(confOfText)!=countWords(text):
		print("Why the words counted by tesseract are different from mine!!!!")
	header("Method 2","*"*10)
	confs=tesseract.AllWordConfidences(api)
	print(confs, len(confs))
示例#4
0
def gettext(imagepath):
    api = tesseract.TessBaseAPI()
    api.SetOutputName("outputName")
    api.Init(".", "eng", tesseract.OEM_DEFAULT)
    api.SetPageSegMode(tesseract.PSM_AUTO)

    pixImage = tesseract.pixRead(imagepath)
    api.SetImage(pixImage)
    outText = api.GetUTF8Text()
    print("OCR output:\n%s" % outText)
    api.End()
    return outText
示例#5
0
 def imageToText(self,imageLocation,imageTextLoc):
     if not self.api:
         self.api = tesseract.TessBaseAPI()
         #self.api.SetOutputName("outputName")
         self.api.Init("includes", "eng", tesseract.OEM_DEFAULT)
         self.api.SetPageSegMode(tesseract.PSM_AUTO)
         
     pixImage = tesseract.pixRead(imageLocation)
     self.api.SetImage(pixImage)
     outText = self.api.GetUTF8Text()
     GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Saving Text to File")
     
     textFile = open(imageTextLoc, 'w')
     textFile.write(str(outText))
     textFile.close()
     
     GObject.idle_add(self.addRowData, self.listStoreCounter, 2, "Text Saved to File")
     #self.api.End()
     (outText,pixImage) = (None,None)
示例#6
0
    def imageToText(self, imageLocation, imageTextLoc):
        if not self.api:
            self.api = tesseract.TessBaseAPI()
            #self.api.SetOutputName("outputName")
            self.api.Init("includes", "eng", tesseract.OEM_DEFAULT)
            self.api.SetPageSegMode(tesseract.PSM_AUTO)

        pixImage = tesseract.pixRead(imageLocation)
        self.api.SetImage(pixImage)
        outText = self.api.GetUTF8Text()
        GObject.idle_add(self.addRowData, self.listStoreCounter, 2,
                         "Saving Text to File")

        textFile = open(imageTextLoc, 'w')
        textFile.write(str(outText))
        textFile.close()

        GObject.idle_add(self.addRowData, self.listStoreCounter, 2,
                         "Text Saved to File")
        #self.api.End()
        (outText, pixImage) = (None, None)
示例#7
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#from __future__ import print_function
import tesseract
import gc
import pprint

api = tesseract.TessBaseAPI()
api.SetOutputName("outputName");
#api.Init(".","eng")
api.Init(".","eng",tesseract.OEM_DEFAULT)
api.SetPageSegMode(tesseract.PSM_AUTO)
mImgFile = "eurotext.jpg"

print("Method 1: Leptonica->pixRead")
pixImage=tesseract.pixRead(mImgFile)
print("Type of pixiamge=",type(pixImage))
print("repr(pixiamge)=",repr(pixImage))
api.SetImage(pixImage)
outText=api.GetUTF8Text()
print(("OCR output:\n%s"%outText));
api.End()
outText=None
tesseract.pixDestroy(pixImage)


    rotated = rotate(mask, 90.0 - adeg, False, (me[1], me[0]))

    #rotated = rotate(mask, 90.0-adeg, False)
    io.imsave("rotated3/" + str(i) + ".png", rotated)

    #using the OCR
    gifTempFile = "o1.gif"
    tifTempFile = "o2.tif"
    im = Image.open('rotated3/' + str(i) + '.png')
    im.save(gifTempFile, "GIF")
    original = Image.open(gifTempFile)
    bg = original.resize(im.size, Image.NEAREST)
    bg.save(tifTempFile)

    pixImage = tesseract.pixRead(tifTempFile)
    api.SetImage(pixImage)
    outText = api.GetUTF8Text()
    outText = outText.replace("\n", "")
    outText = outText.replace("\t", "")
    conf = api.MeanTextConf()

    f.write(str(i) + "\t" + outText + "\t" + str(conf) + "\n")

f.close()
api.End()
'''
mask = mask * 255
io.imsave('bing.png', mask)

block_size = 40
示例#9
0
        api = tesseract.TessBaseAPI()
        api.Init(tesslangpath, "eng", tesseract.OEM_DEFAULT)
        api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR)
        api.SetVariable("classify_enable_learning", "0")
        api.SetVariable("classify_enable_adaptive_matcher", "0")

    image_dict = OrderedDict()
    segment_text_list = []
    cluster_pattern_list = []

    # classify
    for fname in test_images:
        test_image = ImageFile(fname)
        test_classes, test_segments = ocr.ocr(test_image, show_steps=verbose)
        if use_tesseract:
            tesseract_image = tesseract.pixRead(fname)
            tesseract_classes = []
            cluster_list = []
            for segment in test_segments:
                cluster_segments = prim.get_cluster(segment, test_segments)
                if len(cluster_segments) == 10:
                    add = True
                    for list_cluster in cluster_list:
                        add = add and not tesseract_utils.is_cluster_match(
                            cluster_segments, list_cluster)
                        if not add:
                            break
                    if add:
                        cluster_list.append(cluster_segments)
                pattern = tesseract_utils.get_pattern(cluster_segments)
                cluster_pattern_list.append([cluster_segments, pattern])
示例#10
0
        api = tesseract.TessBaseAPI()
        api.Init(tesslangpath, "eng", tesseract.OEM_DEFAULT)
        api.SetPageSegMode(tesseract.PSM_SINGLE_CHAR)        
        api.SetVariable("classify_enable_learning", "0")
        api.SetVariable("classify_enable_adaptive_matcher", "0")
        
    image_dict = OrderedDict()
    segment_text_list = []
    cluster_pattern_list = []

    # classify
    for fname in test_images:
        test_image = ImageFile(fname)
        test_classes, test_segments = ocr.ocr(test_image, show_steps=verbose)
        if use_tesseract:
            tesseract_image = tesseract.pixRead(fname)
            tesseract_classes = []
            cluster_list = []
            for segment in test_segments:
                cluster_segments = prim.get_cluster(segment, test_segments)
                if len(cluster_segments) == 10:
                    add = True
                    for list_cluster in cluster_list:
                        add = add and not tesseract_utils.is_cluster_match(cluster_segments, list_cluster)
                        if not add:
                            break
                    if add:
                        cluster_list.append(cluster_segments)
                pattern = tesseract_utils.get_pattern(cluster_segments)
                cluster_pattern_list.append([cluster_segments, pattern])
                whitelist = tesseract_utils.get_whitelist(segment, cluster_segments, pattern)
示例#11
0
def process(image):
    global tess_api
    sz = 500
    ratio = (sz + 0.0) / image.shape[1]
    dim = (sz, int(image.shape[0] * ratio))
    orig = image
    image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
    if Param.Value("bilateral"):
        bila = cv2.bilateralFilter(image, 11, 17, 17)
    else:
        bila = image
    gray = cv2.cvtColor(bila, cv2.COLOR_BGR2GRAY)

    canny = cv2.Canny(gray, 25, 50)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    if Param.Value("dilate"):
        canny = cv2.dilate(canny, kernel, iterations=Param.Value("dilate"))
    if Param.Value("erode"):
        canny = cv2.erode(canny, kernel, iterations=Param.Value("erode"))

    goodCnts = []
    badCnts = []
    if Param.Value('contours'):
        (cnts, hier) = cv2.findContours(canny.copy(), cv2.RETR_TREE,
                                        cv2.CHAIN_APPROX_SIMPLE)
        # loop over our contours
        for c in cnts:
            if cv2.contourArea(c) < 1000:
                continue
            # approximate the contour
            peri = cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, 0.01 * peri, True)
            #    print len(approx)

            good = False
            # if our approximated contour has four points, then
            # we can assume that we have found our screen
            if len(approx) == 4:
                d01 = np.sum((approx[0] - approx[1])**2)
                d12 = np.sum((approx[1] - approx[2])**2)
                wh_ratio = np.sqrt(np.divide(float(d01), d12))
                if wh_ratio > 1.5 and wh_ratio < 3:
                    good = True
                elif 1 / wh_ratio > 1.5 and 1 / wh_ratio < 3:
                    approx = np.roll(approx, 1, axis=0)
                    good = True

            if good:
                goodCnts.append(approx)
            else:
                badCnts.append(approx)

        cv2.drawContours(image, goodCnts, -1, (0, 255, 0), 2)
        if Param.Value('all_contours'):
            cv2.drawContours(image, badCnts, -1, (128, 128, 0), 1)

    warped = None

    if Param.Value('lines_p') and len(goodCnts) > 0:
        lines = cv2.HoughLinesP(canny,
                                1,
                                np.pi / 180,
                                Param.Value('lines_threshold'),
                                minLineLength=Param.Value('lines_minlength'),
                                maxLineGap=Param.Value('lines_maxgap'))
        if lines is None:
            lines = []
        else:
            lines = lines[0]
        goodLines = []
        goodBoxes = []
        badLines = []
        for line in lines:
            line = np.array([[line[0], line[1]], [line[2], line[3]]])
            length2 = np.sum((line[0] - line[1])**2)
            ok = False
            for cnt in goodCnts:
                minLength2 = np.sum((cnt[0] - cnt[3])**2) * 2
                if length2 < minLength2:
                    continue
                d = [geoPointLineDist(point[0], line) for point in cnt]
                if d[0] < 10 and d[3] < 10:
                    goodLines.append(line)
                    goodBoxes.append(cnt)
                elif d[1] < 10 and d[2] < 10:
                    goodLines.append(line)
                    goodBoxes.append(np.roll(cnt, 2, axis=0))
                elif Param.Value('show_bad_lines'):
                    badLines.append(line)

        if Param.Value('show_bad_lines'):
            for line in goodLines:
                cv2.line(image, line[0], line[1], (0, 0, 255), 1)
            for line in badLines:
                cv2.line(image, line[0], line[1], (0, 0, 255), 1)

        if len(goodLines) > 0:
            g = np.array(goodLines)
            longestIndex = np.argmax(
                np.sum((g[:, 1:2, :] - g[:, 0:1, :])**2, axis=2))
            longestLine = goodLines[longestIndex]
            cv2.line(image, tuple(longestLine[0]), tuple(longestLine[1]),
                     (200, 200, 0), 2)
            box = goodBoxes[longestIndex]
            tl = np.array(geoClosestPoint(box[0][0], longestLine, False))
            bl = np.array(geoClosestPoint(box[3][0], longestLine, False))
            tr = box[1][0]
            br = box[2][0]

            h = bl - tl
            tl = tl + h * 3.1
            bl = tl + h
            h = br - tr
            tr = tr + h * 3.1
            br = tr + h

            w = tr - tl
            tl = tl - w * 1.1
            tr = tr + w * 0.6
            w = br - bl
            bl = bl - w * 1.1
            br = br + w * 0.6

            tl = tl / ratio
            tr = tr / ratio
            bl = bl / ratio
            br = br / ratio

            width = int(np.sqrt(np.sum((tl - tr)**2)))
            height = int(np.sqrt(np.sum((tl - bl)**2)))

            dst = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
                            [0, height - 1]],
                           dtype="float32")

            rect = np.array([tl, tr, br, bl], dtype="float32")
            M = cv2.getPerspectiveTransform(rect, dst)
            warped = cv2.warpPerspective(orig, M, (width, height))

    if Param.Value('lines'):
        lines = cv2.HoughLines(canny, 1, np.pi / 180,
                               Param.Value('lines_threshold'))
        for rho, theta in (lines[0] if lines is not None else []):
            a = np.cos(theta)
            b = np.sin(theta)
            x0 = a * rho
            y0 = b * rho
            x1 = int(
                x0 + 1000 * (-b)
            )  # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3
            y1 = int(
                y0 + 1000 * (a)
            )  # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0
            x2 = int(
                x0 - 1000 * (-b)
            )  # But we need integers, so use int() function after that, ie int(np.around(x))
            y2 = int(y0 - 1000 * (a))
            cv2.line(image, (x1, y1), (x2, y2), (0, 128, 128), 2)

    Param.DisplayAll(image)
    cv2.imshow("Image", display([image, bila, gray, canny]))
    if warped is not None:
        if Param.Value('ocr'):
            warped = cv2.bilateralFilter(warped, 11, 17, 17)
            if tess_api is None:
                tess_api = tesseract.TessBaseAPI()
                tess_api.Init(".", "eng", tesseract.OEM_DEFAULT)
                tess_api.SetVariable("tessedit_char_whitelist", "0123456789+>")
                tess_api.SetPageSegMode(tesseract.PSM_AUTO)
            cv2.imwrite("ocr.png", warped)  # SetCvImage segfauls. F**k it.
            pixImage = tesseract.pixRead("ocr.png")
            tess_api.SetImage(pixImage)
            outText = tess_api.GetUTF8Text()
            for line in outText.split("\n"):
                if len(line) > 40:
                    print line
                    esr = fixEsr(line)
                    if esr is None:
                        esr = fixEsr(re.sub(r'\d (\d)', '+ \\1', line))
                    print esr
                    if esr is not None:
                        cv2.putText(warped, esr, (10, 20),
                                    cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 128),
                                    2)
                        cv2.imshow("Image2", warped)
示例#12
0
def process(image):
  global tess_api
  sz = 500
  ratio = (sz + 0.0) / image.shape[1]
  dim = (sz, int(image.shape[0] * ratio))
  orig = image
  image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
  if Param.Value("bilateral"):
    bila = cv2.bilateralFilter(image, 11, 17, 17)
  else:
    bila = image
  gray = cv2.cvtColor(bila, cv2.COLOR_BGR2GRAY)

  canny = cv2.Canny(gray, 25, 50)

  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
  if Param.Value("dilate"):
    canny = cv2.dilate(canny, kernel, iterations = Param.Value("dilate"))
  if Param.Value("erode"):
    canny = cv2.erode(canny, kernel, iterations = Param.Value("erode"))
  
  goodCnts = []
  badCnts = []
  if Param.Value('contours'):
    (cnts, hier) = cv2.findContours(canny.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # loop over our contours
    for c in cnts:
      if cv2.contourArea(c) < 1000:
        continue
      # approximate the contour
      peri = cv2.arcLength(c, True)
      approx = cv2.approxPolyDP(c, 0.01 * peri, True)
  #    print len(approx)
   
      good = False
      # if our approximated contour has four points, then
      # we can assume that we have found our screen
      if len(approx) == 4:
        d01 = np.sum((approx[0]-approx[1])**2)
        d12 = np.sum((approx[1]-approx[2])**2)
        wh_ratio = np.sqrt(np.divide(float(d01), d12))
        if wh_ratio > 1.5 and wh_ratio < 3:
          good = True
        elif 1/wh_ratio > 1.5 and 1/wh_ratio < 3:
          approx = np.roll(approx, 1, axis=0)
          good = True

      if good:
        goodCnts.append(approx)
      else:
        badCnts.append(approx)

    cv2.drawContours(image, goodCnts, -1, (0, 255, 0), 2)
    if Param.Value('all_contours'):
      cv2.drawContours(image, badCnts, -1, (128, 128, 0), 1)

      
  warped = None
    
  if Param.Value('lines_p') and len(goodCnts) > 0:
    lines = cv2.HoughLinesP(canny, 1, np.pi/180, Param.Value('lines_threshold'), minLineLength = Param.Value('lines_minlength'), maxLineGap = Param.Value('lines_maxgap'))
    if lines is None:
      lines = []
    else:
      lines = lines[0]
    goodLines = []
    goodBoxes = []
    badLines = []
    for line in lines:        
      line = np.array([[line[0], line[1]], [line[2], line[3]]])
      length2 = np.sum((line[0]-line[1])**2)
      ok = False
      for cnt in goodCnts:
        minLength2 = np.sum((cnt[0]-cnt[3])**2) * 2
        if length2 < minLength2:
          continue
        d = [geoPointLineDist(point[0], line) for point in cnt]
        if d[0] < 10 and d[3] < 10:
          goodLines.append(line)
          goodBoxes.append(cnt)
        elif d[1] < 10 and d[2] < 10:
          goodLines.append(line)
          goodBoxes.append(np.roll(cnt, 2, axis=0))
        elif Param.Value('show_bad_lines'):
          badLines.append(line)

    if Param.Value('show_bad_lines'):
      for line in goodLines:
        cv2.line(image, line[0], line[1], (0,0,255), 1)
      for line in badLines:
        cv2.line(image, line[0], line[1], (0,0,255), 1)

    if len(goodLines) > 0:
      g = np.array(goodLines)
      longestIndex = np.argmax(np.sum((g[:,1:2,:] - g[:,0:1,:])**2, axis=2))
      longestLine = goodLines[longestIndex]
      cv2.line(image, tuple(longestLine[0]), tuple(longestLine[1]), (200, 200, 0), 2)
      box = goodBoxes[longestIndex]
      tl = np.array(geoClosestPoint(box[0][0], longestLine, False))
      bl = np.array(geoClosestPoint(box[3][0], longestLine, False))
      tr = box[1][0]
      br = box[2][0]

      h = bl - tl
      tl = tl + h * 3.1
      bl = tl + h
      h = br - tr
      tr = tr + h * 3.1
      br = tr + h

      w = tr - tl
      tl = tl - w * 1.1
      tr = tr + w * 0.6
      w = br - bl
      bl = bl - w * 1.1
      br = br + w * 0.6

      tl = tl/ratio
      tr = tr/ratio
      bl = bl/ratio
      br = br/ratio

      width = int(np.sqrt(np.sum((tl-tr)**2)))
      height = int(np.sqrt(np.sum((tl-bl)**2)))

      dst = np.array([
          [0, 0],
          [width - 1, 0],
          [width - 1, height - 1],
          [0, height - 1]], dtype = "float32")

      rect = np.array([tl, tr, br, bl], dtype = "float32")
      M = cv2.getPerspectiveTransform(rect, dst)
      warped = cv2.warpPerspective(orig, M, (width, height))
      
  if Param.Value('lines'):
    lines = cv2.HoughLines(canny, 1, np.pi/180, Param.Value('lines_threshold'))
    for rho,theta in (lines[0] if lines is not None else []):
      a = np.cos(theta)
      b = np.sin(theta)
      x0 = a*rho
      y0 = b*rho
      x1 = int(x0 + 1000*(-b))   # Here i have used int() instead of rounding the decimal value, so 3.8 --> 3
      y1 = int(y0 + 1000*(a))    # But if you want to round the number, then use np.around() function, then 3.8 --> 4.0
      x2 = int(x0 - 1000*(-b))   # But we need integers, so use int() function after that, ie int(np.around(x))
      y2 = int(y0 - 1000*(a))
      cv2.line(image, (x1,y1), (x2,y2), (0,128,128), 2)

  Param.DisplayAll(image)
  cv2.imshow("Image", display([image, bila, gray, canny]))
  if warped is not None:
    if Param.Value('ocr'):
      warped = cv2.bilateralFilter(warped, 11, 17, 17)
      if tess_api is None:
        tess_api = tesseract.TessBaseAPI()
        tess_api.Init(".","eng",tesseract.OEM_DEFAULT)
        tess_api.SetVariable("tessedit_char_whitelist", "0123456789+>")
        tess_api.SetPageSegMode(tesseract.PSM_AUTO)
      cv2.imwrite("ocr.png", warped)   # SetCvImage segfauls. F**k it.
      pixImage=tesseract.pixRead("ocr.png")
      tess_api.SetImage(pixImage)
      outText=tess_api.GetUTF8Text()
      for line in outText.split("\n"):
        if len(line) > 40:
          print line
          esr = fixEsr(line)
          if esr is None:
            esr = fixEsr(re.sub(r'\d (\d)', '+ \\1', line))
          print esr
          if esr is not None:
            cv2.putText(warped, esr, (10, 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 128), 2)
            cv2.imshow("Image2", warped)