Exemplo n.º 1
0
def main():
    c.clear_temp()

    img = dataset_manager.get_training_image()
    recognizer = captcha_recognizer.CaptchaRecognizer()
    mpimg.imsave(c.temp_path('00.origin.png'), img)

    # 1
    img_01 = time_func(
        'remove_noise_with_hsv',
        lambda: recognizer.remove_noise_with_hsv(img)
    )
    mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

    # 2
    img_02 = time_func(
        'remove_noise_with_neighbors',
        lambda: repeat(recognizer.remove_noise_with_neighbors, 2)(img_01)
    )
    mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

    img_03a = time_func(
        'skeletonize',
        lambda: morph.skeletonize(img_02)
    )
    mpimg.imsave(c.temp_path('03a.skeleton.png'), img_03a, cmap=cm_greys)
    def recognize(self, img, save_intermediate=False, verbose=False, reconstruct=False, force_partition=True):
        seq = []
        char_images, weak_confidence = self.partition(img, save_intermediate, verbose, force_partition)
        if reconstruct:
            captcha_learn.reconstruct_model()
        if char_images is not None and len(char_images) == self.character_num:
            success = True

            def predict():
                nonlocal seq
                for i in range(len(char_images)):
                    seq.append(captcha_learn.predict(char_images[i]))

            time_func("predict" if verbose else None, predict)
            seq = "".join(seq)
        else:
            success = False
        return success, seq, weak_confidence
Exemplo n.º 3
0
    def recognize(self, img, save_intermediate=False, verbose=False,
                  reconstruct=False, force_partition=True):
        seq = []
        char_images, weak_confidence = self.partition(img, save_intermediate,
                                                      verbose,force_partition)
        if reconstruct:
            captcha_learn.reconstruct_model()
        if char_images is not None and len(char_images) == self.character_num:
            success = True

            def predict():
                nonlocal seq
                for i in range(len(char_images)):
                    seq.append(captcha_learn.predict(char_images[i]))

            time_func('predict' if verbose else None, predict)
            seq = ''.join(seq)
        else:
            success = False
        return success, seq, weak_confidence
    def partition(self, img, save_intermediate=False, verbose=False, force_partition=True):
        weak_confidence = 0
        if save_intermediate:
            mpimg.imsave(c.temp_path("00.origin.png"), img)

        # step 1
        # remove noise with hsv
        img_01 = time_func("remove_noise_with_hsv" if verbose else None, lambda: self.remove_noise_with_hsv(img))
        if save_intermediate:
            mpimg.imsave(c.temp_path("01.hsv.png"), img_01, cmap=cm_greys)

        # step 2
        # remove noise with neighbors
        img_02 = time_func(
            "remove_noise_with_neighbors" if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01),
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path("02.neighbor.png"), img_02, cmap=cm_greys)

        # step 3
        # partition stage 1
        labels, object_slices = time_func(
            "segment_with_label" if verbose else None, lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print("{} connected components found".format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path("03.00000.png"), labels)

        # step 4
        # Arrange the segments from left to right and probably partition stage 2
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        for i in list(sort_index):
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)
        if force_partition and len(char_images) == self.character_num - 1:
            weak_confidence = 1
            char_images = self.force_partition(char_images)

        # step 5
        # Check if segmentation was successful and get characters
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print("Heights {}".format(heights))
                print("Widths {}".format(widths))
            # noinspection PyTypeChecker
            if (
                np.all(heights >= self.char_height_min)
                and np.all(heights <= self.char_height_max)
                and np.all(widths >= self.char_width_min)
                and np.all(widths <= self.char_width_max)
            ):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(c.temp_path("03.char.{}.png".format(i + 1)), char_images[i], cmap=cm_greys)
                return char_images, weak_confidence

        if verbose:
            print("Warning: partition failed!")
        return None, weak_confidence
Exemplo n.º 5
0
    def partition(self, img, save_intermediate=False, verbose=False,
                  force_partition=True):
        weak_confidence = 0
        if save_intermediate:
            mpimg.imsave(c.temp_path('00.origin.png'), img)

        # step 1
        # remove noise with hsv
        img_01 = time_func(
            'remove_noise_with_hsv' if verbose else None,
            lambda: self.remove_noise_with_hsv(img)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

        # step 2
        # remove noise with neighbors
        img_02 = time_func(
            'remove_noise_with_neighbors' if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

        # step 3
        # partition stage 1
        labels, object_slices = time_func(
            'segment_with_label' if verbose else None,
            lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print('{} connected components found'.format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path('03.00000.png'), labels)

        # step 4
        # Arrange the segments from left to right and probably partition stage 2
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        for i in list(sort_index):
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)
        if force_partition and len(char_images) == self.character_num - 1:
            weak_confidence = 1
            char_images = self.force_partition(char_images)

        # step 5
        # Check if segmentation was successful and get characters
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print('Heights {}'.format(heights))
                print('Widths {}'.format(widths))
            # noinspection PyTypeChecker
            if (np.all(heights >= self.char_height_min) and
                    np.all(heights <= self.char_height_max) and
                    np.all(widths >= self.char_width_min) and
                    np.all(widths <= self.char_width_max)):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(
                            c.temp_path('03.char.{}.png'.format(i + 1)),
                            char_images[i], cmap=cm_greys)
                return char_images, weak_confidence

        if verbose:
            print('Warning: partition failed!')
        return None, weak_confidence
Exemplo n.º 6
0
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func("fetch" if num == 1 else None, lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition
            )
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition,
                )
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition
                )
        if success:
            print(seq)
            result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq))
            if num == 1:
                print("Recognized seq is {}".format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print("Fail: ", fail)
    print("Right weak: ", right_weak)
    print("Right strong: ", right_strong)
    print("Right total: ", right_total)
    print("Wrong weak: ", wrong_weak)
    print("Wrong strong: ", wrong_strong)
    print("Wrong total: ", wrong_total)
    print("Total success rate: ", (right_weak + right_strong) / num)
    print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        "Success rate when strongly confident: ",
        right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0,
    )
    print(
        "Success rate when weakly confident: ",
        right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0,
    )
    time_end = time.time()
    print("Time used to test recognize http is: ", time_end - time_start)
Exemplo n.º 7
0
def test_recognize_http(show_img=False,
                        num=1,
                        reconstruct=False,
                        force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func('fetch' if num == 1 else None,
                          lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image,
                save_intermediate=True,
                verbose=True,
                reconstruct=reconstruct,
                force_partition=force_partition)
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition)
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=False,
                    force_partition=force_partition)
        if success:
            print(seq)
            result = time_func('verify' if num == 1 else None,
                               lambda: provider.verify(seq))
            if num == 1:
                print('Recognized seq is {}'.format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print('Fail: ', fail)
    print('Right weak: ', right_weak)
    print('Right strong: ', right_strong)
    print('Right total: ', right_total)
    print('Wrong weak: ', wrong_weak)
    print('Wrong strong: ', wrong_strong)
    print('Wrong total: ', wrong_total)
    print('Total success rate: ', (right_weak + right_strong) / num)
    print('Success rate when confident: ',
          (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        'Success rate when strongly confident: ',
        right_strong / (right_strong + wrong_strong)
        if right_strong + wrong_strong > 0 else 0)
    print(
        'Success rate when weakly confident: ', right_weak /
        (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0)
    time_end = time.time()
    print('Time used to test recognize http is: ', time_end - time_start)
    def partition(self, img, save_intermediate=False, verbose=False):
        if save_intermediate:
            mpimg.imsave(c.temp_path('00.origin.png'), img)

        # 1
        img_01 = time_func(
            'remove_noise_with_hsv' if verbose else None,
            lambda: self.remove_noise_with_hsv(img)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('01.hsv.png'), img_01, cmap=cm_greys)

        # 2
        img_02 = time_func(
            'remove_noise_with_neighbors' if verbose else None,
            lambda: repeat(self.remove_noise_with_neighbors, 2)(img_01)
        )
        if save_intermediate:
            mpimg.imsave(c.temp_path('02.neighbor.png'), img_02, cmap=cm_greys)

        # 3
        labels, object_slices = time_func(
            'segment_with_label' if verbose else None,
            lambda: self.segment_with_label(img_02)
        )
        if verbose:
            print('{} connected components found'.format(len(object_slices)))
        if save_intermediate:
            mpimg.imsave(c.temp_path('03.00000.png'), labels)
        # Arrange the segments from left to right
        xmin_arr = np.array([s[1].start for s in object_slices])
        sort_index = xmin_arr.argsort()
        char_images = []
        # noinspection PyTypeChecker
        for i in sort_index:
            char_image = img_02.copy()
            char_image[labels != i + 1] = 0
            char_image = char_image[object_slices[i]]
            char_images.append(char_image)

        # Check if segmentation was successful
        if len(char_images) == self.character_num:
            shapes = np.array(list(map(np.shape, char_images)))
            heights, widths = shapes[:, 0], shapes[:, 1]
            if verbose:
                print('Heights {}'.format(heights))
                print('Widths {}'.format(widths))
            # noinspection PyTypeChecker
            if (np.all(heights >= self.char_height_min) and
                    np.all(heights <= self.char_height_max) and
                    np.all(widths >= self.char_width_min) and
                    np.all(widths <= self.char_width_max)):

                if save_intermediate:
                    for i in range(len(char_images)):
                        mpimg.imsave(
                            c.temp_path('03.char.{}.png'.format(i + 1)),
                            char_images[i], cmap=cm_greys)
                return char_images
        if verbose:
            print('Warning: partition failed!')
        return None