Exemplo n.º 1
0
 def test_generate_text_with_unknown_orientation(self):
     try:
         FakeTextDataGenerator.generate(
             12,
             "TEST TEST TEST",
             "tests/font.ttf",
             "tests/out/",
             32,
             "jpg",
             0,
             False,
             0,
             False,
             1,
             0,
             0,
             False,
             0,
             -1,
             0,
             "#010101",
             100,
             2,
             0,
             (5, 5, 5, 5),
             0,
             0,
         )
         raise Exception("Unknown orientation did not throw")
     except ValueError:
         pass
    def test_generate_data_with_word_split(self):
        FakeTextDataGenerator.generate(
            14,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "png",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            True,
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_14.png",
                "tests/expected_results/TEST TEST TEST_14.png",
                delete_diff_file=True,
            ) < 0.01)
Exemplo n.º 3
0
    def test_generate_data_with_fit(self):
        FakeTextDataGenerator.generate(
            13,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (0, 0, 0, 0),
            1,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_13.jpg") == md5(
                "tests/expected_results/TEST TEST TEST_13.jpg"))

        os.remove("tests/out/TEST TEST TEST_13.jpg")
Exemplo n.º 4
0
 def test_raise_if_handwritten_and_vertical(self):
     try:
         FakeTextDataGenerator.generate(
             9,
             "TEST TEST TEST",
             "tests/font.ttf",
             "tests/out/",
             64,
             "jpg",
             0,
             False,
             0,
             False,
             1,
             0,
             0,
             True,
             0,
             1000,
             2,
             "#010101",
             1,
             1,
             0,
             (5, 5, 5, 5),
             0,
             0,
             False,
             os.path.join(
                 os.path.split(os.path.realpath(__file__))[0],
                 "trdg/images"),
         )
         raise Exception("Vertical handwritten did not throw")
     except ValueError:
         pass
Exemplo n.º 5
0
    def test_generate_data_with_cosine_distorsion(self):
        FakeTextDataGenerator.generate(
            5,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            3,
            False,
            1,
            2,
            2,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_5.jpg") == md5(
                "tests/expected_results/TEST TEST TEST_5.jpg"))

        os.remove("tests/out/TEST TEST TEST_5.jpg")
Exemplo n.º 6
0
    def test_generate_data_with_extension(self):
        FakeTextDataGenerator.generate(
            1,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            32,
            "png",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_1.png") == md5(
                "tests/expected_results/TEST TEST TEST_1.png"))

        os.remove("tests/out/TEST TEST TEST_1.png")
Exemplo n.º 7
0
    def test_generate_data_with_skew_angle(self):
        FakeTextDataGenerator.generate(
            2,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            15,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_2.jpg") == md5(
                "tests/expected_results/TEST TEST TEST_2.jpg"))

        os.remove("tests/out/TEST TEST TEST_2.jpg")
Exemplo n.º 8
0
 def test_raise_if_invalid_orientation(self):
     try:
         FakeTextDataGenerator.generate(
             20,
             "TEST TEST TEST",
             "tests/font.ttf",
             "tests/out/",
             64,
             "jpg",
             0,
             False,
             0,
             False,
             1,
             0,
             0,
             False,
             0,
             1000,
             2,
             "#010101",
             3,
             1,
             0,
             (5, 5, 5, 5),
             0,
             0,
             False,
             os.path.join(
                 os.path.split(os.path.realpath(__file__))[0],
                 "trdg/images"),
         )
         raise Exception("Invalid orientation did not throw")
     except ValueError:
         pass
Exemplo n.º 9
0
 def test_generate_text_with_unknown_orientation(self):
     try:
         FakeTextDataGenerator.generate(
             12,
             "TEST TEST TEST",
             "tests/font.ttf",
             "tests/out/",
             32,
             "jpg",
             0,
             False,
             0,
             False,
             1,
             0,
             0,
             False,
             0,
             -1,
             0,
             "#010101",
             100,
             2,
             0,
             (5, 5, 5, 5),
             0,
             0,
             False,
             os.path.join(
                 os.path.split(os.path.realpath(__file__))[0],
                 "trdg/images"),
         )
         raise Exception("Unknown orientation did not throw")
     except ValueError:
         pass
Exemplo n.º 10
0
    def test_generate_data_with_quasicrystal_background_from_generate(self):
        FakeTextDataGenerator.generate(
            19,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "png",
            0,
            False,
            0,
            False,
            2,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            True,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        os.remove("tests/out/TEST TEST TEST_19.png")
Exemplo n.º 11
0
 def test_raise_if_handwritten_and_vertical(self):
     try:
         FakeTextDataGenerator.generate(
             9,
             "TEST TEST TEST",
             "tests/font.ttf",
             "tests/out/",
             64,
             "jpg",
             0,
             False,
             0,
             False,
             1,
             0,
             0,
             True,
             0,
             1000,
             2,
             "#010101",
             1,
             1,
             0,
             (5, 5, 5, 5),
             0,
             0,
         )
         raise Exception("Vertical handwritten did not throw")
     except ValueError:
         pass
Exemplo n.º 12
0
    def test_generate_data_with_right_alignment(self):
        FakeTextDataGenerator.generate(
            8,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            1000,
            2,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_8.jpg") == md5(
                "tests/expected_results/TEST TEST TEST_8.jpg"))

        os.remove("tests/out/TEST TEST TEST_8.jpg")
Exemplo n.º 13
0
    def test_generate_vertical_text_with_variable_space(self):
        FakeTextDataGenerator.generate(
            12,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            32,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            1,
            2,
            0,
            (5, 5, 5, 5),
            0,
            0,
        )

        self.assertTrue(
            md5("tests/out/TEST TEST TEST_12.jpg") == md5(
                "tests/expected_results/TEST TEST TEST_12.jpg"))

        os.remove("tests/out/TEST TEST TEST_12.jpg")
Exemplo n.º 14
0
def get_image(text, font, height, width, background_type, color, blur):
    return FakeTextDataGenerator.generate(
        index=-1,
        text=text,
        font=font,
        out_dir=None,
        size=height,
        extension=None,
        skewing_angle=0,
        random_skew=False,
        blur=blur,
        random_blur=True,
        background_type=background_type,
        distorsion_type=0,
        distorsion_orientation=0,
        is_handwritten=False,
        name_format=0,
        width=width,
        alignment=1,
        text_color=color,
        orientation=0,
        space_width=1.0,
        character_spacing=0,
        margins=(5, 5, 5, 5),
        fit=False,
        output_mask=False,
        word_split=False,
        image_dir=images_dir,
    )
Exemplo n.º 15
0
    def test_generate_data_with_tesseract_output_bounding_box(self):
        FakeTextDataGenerator.generate(
            22,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
            output_bboxes=2,
        )

        self.assertLess(
            diff(
                "tests/out/TEST TEST TEST_22.jpg",
                "tests/expected_results/TEST TEST TEST_22.jpg",
                delete_diff_file=True,
            ), 0.11)

        os.remove("tests/out/TEST TEST TEST_22.jpg")
        os.remove("tests/out/TEST TEST TEST_22.box")
Exemplo n.º 16
0
    def test_generate_data_with_arabic_text(self):
        FakeTextDataGenerator.generate(
            21,
            "اختبار اختبار اختبار",
            "tests/font_ar.ttf",
            "tests/out/",
            64,
            "png",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            1,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            True,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/21_اختبار اختبار اختبار.png",
                "tests/expected_results/21_اختبار اختبار اختبار.png",
                delete_diff_file=True,
            ), 0.05)

        os.remove("tests/out/21_اختبار اختبار اختبار.png")
Exemplo n.º 17
0
    def test_generate_data_with_wrong_name_format(self):
        FakeTextDataGenerator.generate(
            18,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "png",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            3,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            True,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/TEST TEST TEST_18.png",
                "tests/expected_results/TEST TEST TEST_18.png",
                delete_diff_file=True,
            ), 0.05)

        os.remove("tests/out/TEST TEST TEST_18.png")
Exemplo n.º 18
0
    def test_generate_vertical_text_with_variable_space(self):
        FakeTextDataGenerator.generate(
            12,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            32,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            1,
            2,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/TEST TEST TEST_12.jpg",
                "tests/expected_results/TEST TEST TEST_12.jpg",
                delete_diff_file=True,
            ), 0.05)

        os.remove("tests/out/TEST TEST TEST_12.jpg")
Exemplo n.º 19
0
    def test_generate_data_with_right_alignment(self):
        FakeTextDataGenerator.generate(
            8,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            1000,
            2,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/TEST TEST TEST_8.jpg",
                "tests/expected_results/TEST TEST TEST_8.jpg",
                delete_diff_file=True,
            ), 0.05)

        os.remove("tests/out/TEST TEST TEST_8.jpg")
Exemplo n.º 20
0
    def test_generate_data_with_hindi_text(self):
        FakeTextDataGenerator.generate(
            22,
            "परीक्षा परीक्षा परीक्षा",
            "tests/font_hi.ttf",
            "tests/out/",
            64,
            "png",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            1,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            True,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/22_परीक्षा परीक्षा परीक्षा.png",
                "tests/expected_results/22_परीक्षा परीक्षा परीक्षा.png",
                delete_diff_file=True,
            ), 0.17)

        os.remove("tests/out/22_परीक्षा परीक्षा परीक्षा.png")
Exemplo n.º 21
0
    def test_generate_data_with_cosine_distorsion(self):
        FakeTextDataGenerator.generate(
            5,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            3,
            False,
            1,
            2,
            2,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_5.jpg",
                "tests/expected_results/TEST TEST TEST_5.jpg",
                delete_diff_file=True,
            ) < 0.01)

        os.remove("tests/out/TEST TEST TEST_5.jpg")
Exemplo n.º 22
0
    def test_generate_data_with_fit(self):
        FakeTextDataGenerator.generate(
            13,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (0, 0, 0, 0),
            1,
            0,
            False,
            os.path.join(
                os.path.split(os.path.realpath(__file__))[0], "trdg/images"),
        )

        self.assertLess(
            diff(
                "tests/out/TEST TEST TEST_13.jpg",
                "tests/expected_results/TEST TEST TEST_13.jpg",
                delete_diff_file=True,
            ), 0.19)

        os.remove("tests/out/TEST TEST TEST_13.jpg")
    def test_generate_data_with_sine_distorsion(self):
        FakeTextDataGenerator.generate(
            4,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            3,
            False,
            1,
            1,
            2,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_4.jpg",
                "tests/expected_results/TEST TEST TEST_4.jpg",
                delete_diff_file=True,
            ) < 0.01)

        os.remove("tests/out/TEST TEST TEST_4.jpg")
    def test_generate_vertical_text(self):
        FakeTextDataGenerator.generate(
            10,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            32,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            1,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_10.jpg",
                "tests/expected_results/TEST TEST TEST_10.jpg",
                delete_diff_file=True,
            ) < 0.01)

        os.remove("tests/out/TEST TEST TEST_10.jpg")
    def test_generate_horizontal_text_with_variable_space(self):
        FakeTextDataGenerator.generate(
            11,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            32,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            -1,
            0,
            "#010101",
            0,
            4,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_11.jpg",
                "tests/expected_results/TEST TEST TEST_11.jpg",
                delete_diff_file=True,
            ) < 0.01)

        os.remove("tests/out/TEST TEST TEST_11.jpg")
    def test_generate_data_with_center_alignment(self):
        FakeTextDataGenerator.generate(
            7,
            "TEST TEST TEST",
            "tests/font.ttf",
            "tests/out/",
            64,
            "jpg",
            0,
            False,
            0,
            False,
            1,
            0,
            0,
            False,
            0,
            800,
            1,
            "#010101",
            0,
            1,
            0,
            (5, 5, 5, 5),
            0,
            0,
            False,
        )

        self.assertTrue(
            diff(
                "tests/out/TEST TEST TEST_7.jpg",
                "tests/expected_results/TEST TEST TEST_7.jpg",
                delete_diff_file=True,
            ) < 0.01)

        os.remove("tests/out/TEST TEST TEST_7.jpg")
Exemplo n.º 27
0
def _generate_and_recognize(idx, text_lines, proc_params, image_dir, idx0):

    idx_global = idx + idx0

    original_text = text_lines[idx].strip()

    import string
    has_printable_chars = len(
        [c for c in original_text if c in string.printable]) > 0

    if has_printable_chars:

        from trdg.data_generator import FakeTextDataGenerator

        font = proc_params.fonts[idx_global % len(proc_params.fonts)]

        if isinstance(proc_params.distorsion_type, list):
            import random
            distorsion_type = random.choice(proc_params.distorsion_type)
        else:
            distorsion_type = proc_params.distorsion_type

        img = FakeTextDataGenerator.generate(
            idx,
            original_text,
            font=font,
            out_dir=None,
            extension=None,
            name_format=0,
            size=proc_params.size,
            skewing_angle=proc_params.skewing_angle,
            random_skew=proc_params.random_skew,
            blur=proc_params.blur,
            random_blur=proc_params.random_blur,
            background_type=proc_params.background_type,
            distorsion_type=distorsion_type,
            distorsion_orientation=proc_params.distorsion_orientation,
            is_handwritten=proc_params.is_handwritten,
            width=proc_params.width,
            alignment=proc_params.alignment,
            text_color=proc_params.text_color,
            orientation=proc_params.orientation,
            space_width=proc_params.space_width,
            character_spacing=proc_params.character_spacing,
            margins=proc_params.margins,
            fit=proc_params.fit,
            output_mask=proc_params.output_mask,
            image_dir=image_dir,
            word_split=False)

        if proc_params.save_img:
            # img.save(os.path.join(image_dir, f"{idx_global:05d}.jpg")) # keep all images
            img.save(os.path.join(image_dir, f"{idx:05d}.jpg")
                     )  # keep last N images (ring buffer like saving)

        from tesserocr import PyTessBaseAPI, PSM, OEM

        oem = OEM.LSTM_ONLY if _is_tesseract4() else OEM.DEFAULT
        psm = PSM.RAW_LINE if _is_tesseract4() else PSM.SINGLE_LINE

        with PyTessBaseAPI(psm=psm, oem=oem) as ocr:

            ocr.SetImage(img)

            try:
                recognized_text = ocr.GetUTF8Text()
                recognized_text = recognized_text.strip().replace("\n",
                                                                  "").replace(
                                                                      "\t", "")
            except RuntimeError:
                recognized_text = ""  # set empty recognized text - it will be ignored in the latter stage
    else:
        recognized_text = ""  # set empty recognized text - it will be ignored in the latter stage

    return (img, original_text, recognized_text)