def batch_generator(self, queue):
        """Takes a queue and enqueue batches in it
        """

        generator = GeneratorFromDict(language=self.language)
        while True:
            batch = []
            while len(batch) < self.batch_size:
                img, lbl = generator.next()
                batch.append((
                    resize_image(np.array(img.convert("L")),
                                 self.max_image_width)[0],
                    lbl,
                    label_to_array(lbl, self.char_vector),
                ))

            raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch)

            batch_y = np.reshape(np.array(raw_batch_y), (-1))

            batch_dt = sparse_tuple_from(
                np.reshape(np.array(raw_batch_la), (-1)))

            raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

            batch_x = np.reshape(
                np.array(raw_batch_x),
                (len(raw_batch_x), self.max_image_width, 32, 1))
            if queue.qsize() < 20:
                queue.put((batch_y, batch_dt, batch_x))
            else:
                pass
    def __getitem__(self, index):
        try:
            img, label = self.generator.next()
        except StopIteration:
            self.generator = GeneratorFromDict(**self.args)
            img, label = self.generator.next()

        return img, label
示例#3
0
 def test_generator_from_dict(self):
     generator = GeneratorFromDict()
     i = 0
     while i < 100:
         img, lbl = next(generator)
         self.assertTrue(img.size[1] == 32, "Shape is not right")
         i += 1
示例#4
0
def gen(index):
    print("PID {0}: {1}".format(index, os.getpid()))
    count = numImageGen // numProcess
    if index == 0:
        count = count + (numImageGen % numProcess)
    generator[index] = GeneratorFromDict(count=count)
    with open("filenames.txt", 'a') as fileObj:
        for img, lbl in generator[index]:
            fileObj.write(img[1] + " " + lbl + "\n")
 def __init__(self, count, textlength, dictpath):
     self.count = count
     fonts_dir = "/home/ldl/桌面/python-notebook/My_trdg/trdg/fonts/cn"
     fonts = [os.path.join(fonts_dir, i) for i in os.listdir(fonts_dir)]
     # dictpath = "/home/ldl/桌面/论文/文本识别/TextRecognitionDataGenerator/trdg/mydicts/all_4068.txt"
     img_dir = "/home/ldl/桌面/论文/文本识别/TextRecognitionDataGenerator/trdg/images"
     self.args = dict(count=self.count,
                      length=textlength,
                      allow_variable=True,
                      fonts=fonts,
                      language=dictpath,
                      size=64,
                      blur=2,
                      random_blur=True,
                      image_dir=img_dir,
                      background_type=[0, 1, 2, 3],
                      distorsion_type=[0, 1, 2],
                      text_color="#000000,#FF8F8F",
                      image_mode="L",
                      char_cat="",
                      space_width=[1, 2, 3, 4],
                      character_spacing=[0, 1, 2, 3, 4, 5])
     self.generator = GeneratorFromDict(**self.args)
 def __init__(self, batch_size=1, alphabet=string.printable):
     super(FakeTextImageGenerator).__init__()
     self.batch_size = batch_size
     self.alphabet = alphabet
     self.alphabet_size = len(alphabet)
     self.height = 32
     self.generator = GeneratorFromDict(
         length=5,
         allow_variable=True,
         language="en",
         size=32,
         background_type=1,
         fit=True,
         text_color="#000000,#888888",
     )
示例#7
0
    def __init__(self, dir, transform=None, artificial=False):
        super().__init__()

        self.names = os.listdir(dir)
        self.images_names = get_files_with_extension(dir, self.names, ".jpg")
        self.annotation_names = get_files_with_extension(
            dir, self.names, ".txt")
        self.transform = transform

        self.images = []
        self.texts = []
        self.texts_length = []
        self.texts_encoded = []

        print("Loading train data ...")
        for (image_name, annotation_name) in tqdm(
                zip(self.images_names, self.annotation_names)):
            images, texts, texts_length, texts_encoded = self.load_train_data(
                image_name, annotation_name)
            self.images += images
            self.texts_length += texts_length
            self.texts += texts
            self.texts_encoded += texts_encoded

        # generate additional train data
        print(f"Train data: {len(self.texts)}")
        if artificial:
            generator = GeneratorFromDict(count=len(self.images), width=256)

            for image, text in generator:
                self.texts.append(text)
                text = ''.join(c for c in text if c in char_list)
                self.texts_length.append(len(text))
                self.texts_encoded.append(encode_to_labels(text))

                image = image.convert("L")
                self.images.append(image)

            print(f"Train data with artificial data: {len(self.texts)}")

        max_text_length = max(map(len, self.texts_encoded))
        print(f"max_length: {max_text_length}")
        # do not pad with zero
        self.texts_encoded = np.array(
            [t + [1] * (max_text_length - len(t)) for t in self.texts_encoded])
示例#8
0
 def test_generator_from_dict_stops(self):
     generator = GeneratorFromDict(count=1)
     next(generator)
     self.assertRaises(StopIteration, generator.next)
    def __init__(self, args, training=True, semi_amount=0.0):
        """
        Initialize the constructor.
        """
        self.args = args
        self.is_training = training
        self.batch_size = args.batch_size
        self.current_iter = 0

        self.fake_generators = [
            GeneratorFromRandom(
                count=-1,
                length=1,
                language="fr",
                size=64,
                background_type=1,
                skewing_angle=2,
                margins=(2, 1, 1, 1),
                use_letters=False,
                use_symbols=False,
                use_numbers=True,
                random_skew=True,
                text_color='#000000,#888888',
            ),
            GeneratorFromRandom(
                count=-1,
                length=1,
                language="fr",
                size=48,
                background_type=1,
                skewing_angle=2,
                margins=(2, 1, 1, 1),
                use_letters=True,
                use_symbols=True,  # false
                use_numbers=False,
                random_skew=True,
                text_color='#000000,#888888',
            ),
            GeneratorFromRandom(
                count=-1,
                length=3,
                language="fr",
                size=24,
                background_type=3,
                skewing_angle=2,
                fit=True,
                random_skew=True,
                text_color='#000000,#888888',
            ),
            GeneratorFromRandom(
                count=-1,
                length=3,
                language="fr",
                size=32,
                background_type=3,
                skewing_angle=2,
                space_width=2,
                use_symbols=False,
                margins=(8, 8, 8, 8),
                random_skew=False,
            ),
            GeneratorFromRandom(
                count=-1,
                length=2,
                language="fr",
                size=55,
                background_type=1,
                skewing_angle=3,
                use_symbols=True,  # false
                fit=True,
                random_skew=True,
                text_color='#0171ff',
            ),
            GeneratorFromRandom(
                count=-1,
                length=3,
                language="fr",
                size=43,
                background_type=1,
                skewing_angle=2,
                margins=(4, 2, 10, 6),
                random_skew=False,
                text_color='#000000,#888888',
            ),
            GeneratorFromRandom(
                count=-1,
                length=5,
                language="fr",
                size=37,
                space_width=3,
                background_type=1,
                use_symbols=False,
                fit=True,
                text_color='#000000,#888888',
            ),
            GeneratorFromRandom(
                count=-1,
                length=5,
                language="fr",
                size=28,
                background_type=1,
                use_symbols=False,
                fit=True,
                text_color='#000000,#888888',
            ),
        ]
        self.dict_generators = [
            GeneratorFromDict(
                length=5,
                allow_variable=True,
                language="fr",
                size=32,
                background_type=1,
                fit=True,
                text_color='#000000,#888888',
            ),
            GeneratorFromDict(
                length=5,
                allow_variable=True,
                language="fr",
                size=32,
                background_type=1,
                margins=(7, 4, 6, 4),
                text_color='#000000,#888888',
            ),
            GeneratorFromDict(
                length=5,
                allow_variable=True,
                language="fr",
                size=32,
                background_type=1,
                fit=True,
                text_color='#000000,#888888',
            ),
        ]
        self.classic_gen = [
            GeneratorFromDict(
                length=3,
                allow_variable=True,
                language="fr",
                size=32,
                background_type=0,
                fit=True,
            ),
            GeneratorFromRandom(
                count=-1,
                length=5,
                language="fr",
                size=28,
                background_type=0,
                use_symbols=False,
                fit=True,
            ),
        ]
        self.height = 32
        self.width = None

        self.alphabet = ALPHABET
        # self.alphabet = string.printable
        self.alphabet_size = len(self.alphabet)
示例#10
0
def gen(index):
    print("PID {0}: {1}".format(index, os.getpid()))
    generator[index - 1] = GeneratorFromDict(count=1000)
    with open("filenames.txt", 'w') as fileObj:
        for img, lbl in generator[index - 1]:
            pass
from trdg.generators import (
    GeneratorFromDict,
    GeneratorFromRandom,
    GeneratorFromStrings,
    GeneratorFromWikipedia,
)


generator = GeneratorFromDict(
    language=,
    count=2, 
    blur=2,
    random_blur=True
)

for ret, lbl in generator:
    img = ret[0]
    box = ret[1]
    img.show()
    print(box)