def batch_generator(self, queue): """Takes a queue and enqueue batches in it """ generator = GeneratorFromDict(language=self.language) while True: batch = [] while len(batch) < self.batch_size: img, lbl = generator.next() batch.append(( resize_image(np.array(img.convert("L")), self.max_image_width)[0], lbl, label_to_array(lbl, self.char_vector), )) raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from( np.reshape(np.array(raw_batch_la), (-1))) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, 32, 1)) if queue.qsize() < 20: queue.put((batch_y, batch_dt, batch_x)) else: pass
def __getitem__(self, index): try: img, label = self.generator.next() except StopIteration: self.generator = GeneratorFromDict(**self.args) img, label = self.generator.next() return img, label
def test_generator_from_dict(self): generator = GeneratorFromDict() i = 0 while i < 100: img, lbl = next(generator) self.assertTrue(img.size[1] == 32, "Shape is not right") i += 1
def gen(index): print("PID {0}: {1}".format(index, os.getpid())) count = numImageGen // numProcess if index == 0: count = count + (numImageGen % numProcess) generator[index] = GeneratorFromDict(count=count) with open("filenames.txt", 'a') as fileObj: for img, lbl in generator[index]: fileObj.write(img[1] + " " + lbl + "\n")
def __init__(self, count, textlength, dictpath): self.count = count fonts_dir = "/home/ldl/桌面/python-notebook/My_trdg/trdg/fonts/cn" fonts = [os.path.join(fonts_dir, i) for i in os.listdir(fonts_dir)] # dictpath = "/home/ldl/桌面/论文/文本识别/TextRecognitionDataGenerator/trdg/mydicts/all_4068.txt" img_dir = "/home/ldl/桌面/论文/文本识别/TextRecognitionDataGenerator/trdg/images" self.args = dict(count=self.count, length=textlength, allow_variable=True, fonts=fonts, language=dictpath, size=64, blur=2, random_blur=True, image_dir=img_dir, background_type=[0, 1, 2, 3], distorsion_type=[0, 1, 2], text_color="#000000,#FF8F8F", image_mode="L", char_cat="", space_width=[1, 2, 3, 4], character_spacing=[0, 1, 2, 3, 4, 5]) self.generator = GeneratorFromDict(**self.args)
def __init__(self, batch_size=1, alphabet=string.printable): super(FakeTextImageGenerator).__init__() self.batch_size = batch_size self.alphabet = alphabet self.alphabet_size = len(alphabet) self.height = 32 self.generator = GeneratorFromDict( length=5, allow_variable=True, language="en", size=32, background_type=1, fit=True, text_color="#000000,#888888", )
def __init__(self, dir, transform=None, artificial=False): super().__init__() self.names = os.listdir(dir) self.images_names = get_files_with_extension(dir, self.names, ".jpg") self.annotation_names = get_files_with_extension( dir, self.names, ".txt") self.transform = transform self.images = [] self.texts = [] self.texts_length = [] self.texts_encoded = [] print("Loading train data ...") for (image_name, annotation_name) in tqdm( zip(self.images_names, self.annotation_names)): images, texts, texts_length, texts_encoded = self.load_train_data( image_name, annotation_name) self.images += images self.texts_length += texts_length self.texts += texts self.texts_encoded += texts_encoded # generate additional train data print(f"Train data: {len(self.texts)}") if artificial: generator = GeneratorFromDict(count=len(self.images), width=256) for image, text in generator: self.texts.append(text) text = ''.join(c for c in text if c in char_list) self.texts_length.append(len(text)) self.texts_encoded.append(encode_to_labels(text)) image = image.convert("L") self.images.append(image) print(f"Train data with artificial data: {len(self.texts)}") max_text_length = max(map(len, self.texts_encoded)) print(f"max_length: {max_text_length}") # do not pad with zero self.texts_encoded = np.array( [t + [1] * (max_text_length - len(t)) for t in self.texts_encoded])
def test_generator_from_dict_stops(self): generator = GeneratorFromDict(count=1) next(generator) self.assertRaises(StopIteration, generator.next)
def __init__(self, args, training=True, semi_amount=0.0): """ Initialize the constructor. """ self.args = args self.is_training = training self.batch_size = args.batch_size self.current_iter = 0 self.fake_generators = [ GeneratorFromRandom( count=-1, length=1, language="fr", size=64, background_type=1, skewing_angle=2, margins=(2, 1, 1, 1), use_letters=False, use_symbols=False, use_numbers=True, random_skew=True, text_color='#000000,#888888', ), GeneratorFromRandom( count=-1, length=1, language="fr", size=48, background_type=1, skewing_angle=2, margins=(2, 1, 1, 1), use_letters=True, use_symbols=True, # false use_numbers=False, random_skew=True, text_color='#000000,#888888', ), GeneratorFromRandom( count=-1, length=3, language="fr", size=24, background_type=3, skewing_angle=2, fit=True, random_skew=True, text_color='#000000,#888888', ), GeneratorFromRandom( count=-1, length=3, language="fr", size=32, background_type=3, skewing_angle=2, space_width=2, use_symbols=False, margins=(8, 8, 8, 8), random_skew=False, ), GeneratorFromRandom( count=-1, length=2, language="fr", size=55, background_type=1, skewing_angle=3, use_symbols=True, # false fit=True, random_skew=True, text_color='#0171ff', ), GeneratorFromRandom( count=-1, length=3, language="fr", size=43, background_type=1, skewing_angle=2, margins=(4, 2, 10, 6), random_skew=False, text_color='#000000,#888888', ), GeneratorFromRandom( count=-1, length=5, language="fr", size=37, space_width=3, background_type=1, use_symbols=False, fit=True, text_color='#000000,#888888', ), GeneratorFromRandom( count=-1, length=5, language="fr", size=28, background_type=1, use_symbols=False, fit=True, text_color='#000000,#888888', ), ] self.dict_generators = [ GeneratorFromDict( length=5, allow_variable=True, language="fr", size=32, background_type=1, fit=True, text_color='#000000,#888888', ), GeneratorFromDict( length=5, allow_variable=True, language="fr", size=32, background_type=1, margins=(7, 4, 6, 4), text_color='#000000,#888888', ), GeneratorFromDict( length=5, allow_variable=True, language="fr", size=32, background_type=1, fit=True, text_color='#000000,#888888', ), ] self.classic_gen = [ GeneratorFromDict( length=3, allow_variable=True, language="fr", size=32, background_type=0, fit=True, ), GeneratorFromRandom( count=-1, length=5, language="fr", size=28, background_type=0, use_symbols=False, fit=True, ), ] self.height = 32 self.width = None self.alphabet = ALPHABET # self.alphabet = string.printable self.alphabet_size = len(self.alphabet)
def gen(index): print("PID {0}: {1}".format(index, os.getpid())) generator[index - 1] = GeneratorFromDict(count=1000) with open("filenames.txt", 'w') as fileObj: for img, lbl in generator[index - 1]: pass
from trdg.generators import ( GeneratorFromDict, GeneratorFromRandom, GeneratorFromStrings, GeneratorFromWikipedia, ) generator = GeneratorFromDict( language=, count=2, blur=2, random_blur=True ) for ret, lbl in generator: img = ret[0] box = ret[1] img.show() print(box)