def get(self): # Obtenemos el captcha url = "http://consultas.curp.gob.mx/CurpSP/imagenCatcha" file = StringIO(urllib.urlopen(url).read()) original = Image.open(file) # Convertimos formato PIL a CV2 cv_img = np.asarray(original)[:, :, ::].copy() # Convertimos imagen a scala de grises. gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) # Aplicamos filtro Canny para eliminar lineas. edges = cv2.Canny(gray, 60, 200, apertureSize=3) # Obtenemos las lineas. lines = cv2.HoughLinesP(edges, 1, np.pi / 180, 1, None, 0, 0) # Dibujamos las lineas encontradas en color blanco. for x1, y1, x2, y2 in lines[0]: cv2.line(cv_img, (x1, y1), (x2, y2), (255, 255, 255), 2) # Creamos una copia de nuestra imagen limpia sin lineas. processed = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) # Aplicamos un desenfoque gaussiano. blur = cv2.GaussianBlur(processed, (3, 3), 0) # Aplicamos threshold. threshold = cv2.threshold(blur, 128, 255, cv2.THRESH_BINARY)[1] # Aplicamos transformación morfologica. kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6, 6)) morph = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, kernel) # Convertimos nuestra imagen final procesada a PIL. pil_img = Image.fromarray(morph) # Iniciamos tesseract y leemos la imagen. tesseract = tesserwrap.tesseract() tesseract.set_variable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz") tesseract.set_page_seg_mode(8) text = tesseract.ocr_image(pil_img) self.write(text.strip())
#!/usr/bin/env python #-*- coding:utf-8 -*- import sys import StringIO import requests import PIL.Image import tesserwrap #: https://github.com/gregjurman/tesserwrap tesseract = tesserwrap.tesseract() def distinguish_captcha(image_url, show_origin_image=True): #: preprocess image_bytes = requests.get(image_url).content origin_image = PIL.Image.open(StringIO.StringIO(image_bytes)) image = origin_image.point(lambda p: p * 1.5)\ .point(lambda p: 255 if p > 200 else 0)\ .convert("1") #: distinguish the text text = tesseract.ocr_image(image) #: show the origin image if show_origin_image: origin_image.show() return text.strip() def main(): url = raw_input("Please input the url of captcha:\n > ").strip()
def test_deprecator(self): tr = tesserwrap.tesseract()
def __init__(self): self.tw = tesseract('./', 'dod')