def __init__(captcha, file_, process=True): captcha.image = Image.prep(Image.open(file_).convert("RGB")) captcha.mask = Image.prep(Image.new("1", captcha.dimensions, False)) captcha.characters = [] captcha.value = None if process: captcha.process()
def chunk_image_mask(captcha, chunk, ignore_color=False): """Returns a B&W image of the pixels in a chunk, cropped to fit. The pixels that fit into the crop but are not in the chunk are masked, but their colour values are preserved.""" min_x = None max_x = None min_y = None max_y = None for index in chunk: x, y = index if min_x is None or x < min_x: min_x = x if max_x is None or x > max_x: max_x = x if min_y is None or y < min_y: min_y = y if max_y is None or y > max_y: max_y = y image = Image.prep(Image.new("1", (max_x - min_x + 1, max_y - min_y + 1))) for x in range(image.width): for y in range(image.height): image.data[x, y] = (min_x + x, min_y + y) in chunk return image
def masked(captcha): """Returns an RGBA image based on original with masked areas transparent. They keep their original color values, their alpha is just zeroed.""" image = Image.prep(captcha.image.convert("RGBA")) for index in captcha: if captcha[index] is None: r, g, b, a = image.data[index] image.data[index] = r, g, b, False return image
def interpret_characters(captcha): """Attempts to return the string of characters represented by the character images.""" max_height = max(i.height for i in captcha.characters) # TODO: Should this be a constant? # We put the characters in an image, each CHARACTER_PADDING from the bottom. width = sum(i.width for i in captcha.characters) + captcha.CHARACTER_PADDING * (len(captcha.characters) + 1) height = max_height + captcha.CHARACTER_PADDING * 2 image = Image.prep(Image.new("L", (width, height), 0)) x_offset = captcha.CHARACTER_PADDING for character in captcha.characters: y_offset = height - captcha.CHARACTER_PADDING - character.height image.paste(character, (x_offset, y_offset, x_offset + character.width, y_offset + character.height)) x_offset += character.width + captcha.CHARACTER_PADDING image = Image.prep(image.filter(ImageFilter.MaxFilter(3)).filter(ImageFilter.ModeFilter(3))) return ocr(image)
def scale_characters(captcha): max_height = max(i.height for i in captcha.characters) # TODO: Should this be a constant? max_width = max(i.width for i in captcha.characters) # :-/ scaled_characters = [] for character in captcha.characters: width = character.width * (max_height / character.height) if width > max_width: width = max_width height = int(character.height * (width / character.width)) else: width = int(width) height = max_height scaled_characters.append( Image.prep(character.convert("L").resize((width, height), Image.BICUBIC).convert("1")) ) captcha.characters = scaled_characters
def align_characters(captcha): """Rotates character images to the correct alignment. This is determined by finding the orientation within MAX_ROTATION rotations with the minimum area that produces an image taller than it is wide.""" new_characters = [] for character in captcha.characters: best_width = None best_area = None for angle in range(captcha.MIN_ROTATION, captcha.MAX_ROTATION + 1): rotated = Image.prep(character.rotate(angle, Image.NEAREST, expand=True)) min_x = 0 max_x = rotated.width - 1 min_y = 0 max_y = rotated.height - 1 for x in range(rotated.width): if any(rotated.data[x, y] for y in range(rotated.height)): break else: min_x = x for _x in range(rotated.width): x = rotated.width - 1 - _x if any(rotated.data[x, y] for y in range(rotated.height)): break else: max_x = x for y in range(rotated.height): if any(rotated.data[x, y] for x in range(rotated.width)): break else: min_y = y for _y in range(rotated.height): y = rotated.height - 1 - _y if any(rotated.data[x, y] for x in range(rotated.width)): break else: max_y = y width = max_x - min_x + 1 height = max_y - min_y + 1 area = (width ** 1.2) * height if best_area is None or (area < best_area and width < height): best_area = area best_image = rotated best_box = (min_x, min_y, max_x, max_y) new_characters.append(Image.prep(best_image.crop(best_box))) captcha.characters = new_characters
def interpret_characters(captcha): """Attempts to return the string of characters represented by the character images.""" max_height = max(i.height for i in captcha.characters) # TODO: Should this be a constant? # We put the characters in an image, each CHARACTER_PADDING from the bottom. width = (sum(i.width for i in captcha.characters) + captcha.CHARACTER_PADDING * (len(captcha.characters) + 1)) height = max_height + captcha.CHARACTER_PADDING * 2 image = Image.prep(Image.new("L", (width, height), 0)) x_offset = captcha.CHARACTER_PADDING for character in captcha.characters: y_offset = height - captcha.CHARACTER_PADDING - character.height image.paste(character, (x_offset, y_offset, x_offset + character.width, y_offset + character.height)) x_offset += character.width + captcha.CHARACTER_PADDING image = Image.prep(image .filter(ImageFilter.MaxFilter(3)) .filter(ImageFilter.ModeFilter(3)) ) return(ocr(image))
def chunk_image_mask(captcha, chunk, ignore_color=False): """Returns a B&W image of the pixels in a chunk, cropped to fit. The pixels that fit into the crop but are not in the chunk are masked, but their colour values are preserved.""" min_x = None max_x = None min_y = None max_y = None for index in chunk: x, y = index if min_x is None or x < min_x: min_x = x if max_x is None or x > max_x: max_x = x if min_y is None or y < min_y: min_y = y if max_y is None or y > max_y: max_y = y image = Image.prep(Image.new("1", (max_x - min_x + 1, max_y - min_y + 1))) for x in range(image.width): for y in range(image.height): image.data[x, y] = (min_x + x, min_y + y) in chunk return(image)
def masked(captcha): """Returns an RGBA image based on original with masked areas transparent. They keep their original color values, their alpha is just zeroed.""" image = Image.prep(captcha.image.convert("RGBA")) for index in captcha: if captcha[index] is None: r, g, b, a = image.data[index] image.data[index] = r, g, b, False return(image)
def scale_characters(captcha): max_height = max(i.height for i in captcha.characters) # TODO: Should this be a constant? max_width = max(i.width for i in captcha.characters) # :-/ scaled_characters = [] for character in captcha.characters: width = character.width * (max_height / character.height) if width > max_width: width = max_width height = int(character.height * (width / character.width)) else: width = int(width) height = max_height scaled_characters.append(Image.prep (character .convert("L") .resize((width, height), Image.BICUBIC).convert("1"))) captcha.characters = scaled_characters