Exemplo n.º 1
0
    def img2str(self, captcha_body):
        with BytesIO(captcha_body) as captcha_filelike, Image.open(
                captcha_filelike) as img:
            new_img = img.convert('L')  # 转换为RGBA
            pix = new_img.load()  # 转换为像素

            # 处理上下黑边框,size[0]即图片长度
            for x in range(new_img.size[0]):
                pix[x, 0] = pix[x, new_img.size[1] - 1] = 255

            # 处理左右黑边框,size[1]即图片高度
            for y in range(new_img.size[1]):
                pix[0, y] = pix[new_img.size[0] - 1, y] = 255

            # 二值化处理,这个阈值为140比较合适
            threshold = 140  # 阈值
            table = []
            for i in range(256):
                if i < threshold:
                    table.append(0)
                else:
                    table.append(1)

            new_img = new_img.point(table, '1')

            # 识别图片上的值
            text = get_text_from_image(
                new_img,
                psm=7,
                tessedit_char_whitelist=self.captcha_char_whitelist).replace(
                    ' ', '')
            new_img.close()

            return text
Exemplo n.º 2
0
def parse_capatcha(captcha_body):
    with BytesIO(captcha_body) as captcha_filelike, Image.open(captcha_filelike) as img:
        # img.show()

        # 构造算子为32位浮点三维矩阵kernel:[(1 / 20, 1 / 20, 1 / 20, 1 / 20, 1 / 20)
        #                      (1 / 20, 1 / 20, 1 / 20, 1 / 20, 1 / 20)
        #                      (1 / 20, 1 / 20, 1 / 20, 1 / 20, 1 / 20)
        #                      (1 / 20, 1 / 20, 1 / 20, 1 / 20, 1 / 20)
        #                      (1 / 20, 1 / 20, 1 / 20, 1 / 20, 1 / 20)]
        # kernel = numpy.ones((5, 5), numpy.float32) / 19
        # sobelX = numpy.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])
        # sobelY = numpy.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
        # kernel = numpy.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])

        # 做卷积去噪点
        eroded = numpy.array(img)
        eroded = cv2.fastNlMeansDenoisingColored(eroded)

        mask_img_arr = numpy.zeros((eroded.shape[0], eroded.shape[1]), numpy.uint8)
        dst_img = numpy.array(img)
        cv2.inpaint(eroded, mask_img_arr, 10, cv2.INPAINT_TELEA, dst=dst_img)

        # 图像灰度化处理
        eroded = cv2.cvtColor(eroded, cv2.COLOR_BGR2GRAY)

        # 图像二值化处理
        ret, eroded = cv2.threshold(eroded, 125, 255, cv2.THRESH_BINARY)

        dest_img = Image.fromarray(eroded)
        code = get_text_from_image(dest_img,
                                   tessedit_char_whitelist=captcha_char_whitelist).replace(' ', '')
        dest_img.close()

    return code
Exemplo n.º 3
0
def img2str(captcha_body):
    captcha_char_whitelist = digits
    with Image.open(captcha_body) as img:
        new_img = img.convert('L')  # 转换为RGBA
        pix = new_img.load()  # 转换为像素

        # # 处理上下黑边框,size[0]即图片长度
        # for x in range(new_img.size[0]):
        #     pix[x, 0] = pix[x, new_img.size[1] - 1] = 255

        # # 处理左右黑边框,size[1]即图片高度
        # for y in range(new_img.size[1]):
        #     pix[0, y] = pix[new_img.size[0] - 1, y] = 255

        # 二值化处理,这个阈值为140比较合适
        threshold = 180  # 阈值  # 201
        table = []
        for i in range(256):
            if i < threshold:
                table.append(0)
            else:
                table.append(1)

        new_img = new_img.point(table, '1')
        # 保存图片下来方便后面训练
        # new_img.save("captcha/" + str(int(time())) + ".jpg")
        # 识别图片上的值
        text = get_text_from_image(
            new_img, psm=7,
            tessedit_char_whitelist=captcha_char_whitelist).replace(' ', '')

        new_img.close()

        return text
 def test_options(self):
     quick_fox_image = get_test_image('quickfox.png')
     text = get_text_from_image(
         quick_fox_image,
         psm=10  #single character
     )
     assert isinstance(text, six.text_type)
     assert len(text) == 1
 def test_options(self):
     quick_fox_image = get_test_image('quickfox.png')
     text = get_text_from_image(
         quick_fox_image, 
         psm=10  #single character
         )
     assert isinstance(text, six.text_type)
     assert len(text) == 1
 def test_configs(self):
     allowed_chars = "0123456789-"
     white_list_set = set(allowed_chars)
     alphanum_image = get_test_image('alphanumeric.png')
     #default with alphas
     text = get_text_from_image(alphanum_image)
     assert isinstance(text, six.text_type)
     assert not all(char in white_list_set for char in text if char != ' ')
     #digits config file
     text = get_text_from_image(alphanum_image, config_name='digits')
     assert isinstance(text, six.text_type)
     assert all(char in white_list_set for char in text if char != ' ')
     #manual config
     allowed_chars = "123"
     white_list_set = set(allowed_chars)
     text = get_text_from_image(
         alphanum_image,
         tessedit_char_whitelist=allowed_chars,
     )
     assert isinstance(text, six.text_type)
     assert all(char in white_list_set for char in text if char != ' ')
 def test_configs(self):
     allowed_chars = "0123456789-"
     white_list_set = set(allowed_chars)
     alphanum_image = get_test_image('alphanumeric.png')
     #default with alphas
     text = get_text_from_image(alphanum_image)
     assert isinstance(text, six.text_type)
     assert not all(char in white_list_set for char in text if char != ' ')
     #digits config file
     text = get_text_from_image(alphanum_image, config_name='digits')
     assert isinstance(text, six.text_type)
     assert all(char in white_list_set for char in text if char != ' ')
     #manual config
     allowed_chars = "123"
     white_list_set = set(allowed_chars)
     text = get_text_from_image(
         alphanum_image,
         tessedit_char_whitelist=allowed_chars,
         )
     assert isinstance(text, six.text_type)
     assert all(char in white_list_set for char in text if char != ' ')
Exemplo n.º 8
0
def recognize_captcha_by_tesseract(img,
                                   digits_only=False,
                                   letters_only=False,
                                   del_noise=False):
    if digits_only:
        whitelist = digits
    elif letters_only:
        whitelist = ascii_letters
    else:
        whitelist = all_chars

    img = convert_img_2_baw(img)
    if del_noise:
        img = del_img_noise(img)

    text = get_text_from_image(img, tessedit_char_whitelist=whitelist)
    return blank_pattern.sub("", text)
 def test_simple_sentence(self):
     actual_text = "The quick brown fox jumps over the lazy dog"
     quick_fox_image = get_test_image('quickfox.bmp')
     text = get_text_from_image(quick_fox_image)
     assert isinstance(text, six.text_type)
     assert check_similarity_ratio(text, actual_text)
 def test_blank_image(self):
     blank_image = Image.new("RGB", (100, 100), color=(255, 255, 255))
     text = get_text_from_image(blank_image)
     assert isinstance(text, six.text_type)
     assert len(text) == 0
 def test_simple_sentence(self):
     actual_text = "The quick brown fox jumps over the lazy dog"
     quick_fox_image = get_test_image('quickfox.bmp')
     text = get_text_from_image(quick_fox_image)
     assert isinstance(text, six.text_type)
     assert check_similarity_ratio(text, actual_text)
 def test_blank_image(self):
     blank_image = Image.new("RGB", (100,100), color=(255, 255, 255))
     text = get_text_from_image(blank_image)
     assert isinstance(text, six.text_type)
     assert len(text) == 0
Exemplo n.º 13
0
def extract_text_by_google(imagePath):
    with Image.open(imagePath) as img:
        return get_text_from_image(img)