def get_next_batch_for_srgan(batch_size=128): inputs_images = [] targets_images = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(3, 70) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) #删除了2 text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint) image = utils_pil.resize_by_height(image, image_height) image = utils_pil.convert_to_gray(image) targets_image = image.copy() targets_image = np.asarray(targets_image) targets_image = (255. - targets_image) / 255. # targets_image = np.reshape(targets_image,[-1]) # targets_image = np.pad(targets_image,(0, image_size*image_size-np.size(targets_image)),"constant") # targets_image = np.reshape(targets_image, [image_size,image_size]) targets_images.append(targets_image) _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5])) image = utils_pil.resize_by_height(image, _h) image = utils_pil.resize_by_height(image, image_height, random.random() > 0.5) # image,_,_,_,_ = utils_pil.random_space(image) image = utils_font.add_noise(image) image = np.asarray(image) # image = utils.resize(image, height=image_height) image = image * random.uniform(0.3, 1) if random.random() > 0.5: image = (255. - image) / 255. else: image = image / 255. # image = np.reshape(image,[-1]) # image = np.pad(image,(0, image_size*image_size-np.size(image)),"constant") # image = np.reshape(image, [image_size,image_size]) inputs_images.append(image) inputs = np.zeros([batch_size, image_size, image_size]) for i in range(batch_size): inputs[i, :] = utils.square_img(inputs_images[i], np.zeros([image_size, image_size])) targets = np.zeros([batch_size, image_size, image_size]) for i in range(batch_size): targets[i, :] = utils.square_img(targets_images[i], np.zeros([image_size, image_size])) return inputs, targets
def get_next_batch_for_srgan(batch_size=128): inputs_images = [] targets_images = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(4, 5) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0,1,2,4]) font_hint = random.choice([0,1,2,3,4,5]) #删除了2 text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint) image = utils_pil.resize_by_height(image, image_height) image = utils_pil.convert_to_gray(image) targets_image = image.copy() _h = random.randint(9, image_height // random.choice([1,1.5,2,2.5])) image = utils_pil.resize_by_height(image, _h) image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) targets_image = np.asarray(targets_image) # targets_image = utils.resize(targets_image, height=image_height) # targets_image = utils_pil.convert_to_bw(targets_image) targets_images.append((255. - targets_image) / 255.) image = utils_font.add_noise(image) image = np.asarray(image) # image = utils.resize(image, height=image_height) image = image * random.uniform(0.3, 1) if random.random()>0.5: image = (255. - image) / 255. else: image = image / 255. inputs_images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if targets_image.shape[1] > max_width_image: max_width_image = targets_image.shape[1] # max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(batch_size): image_vec = utils.img2vec(targets_images[i], height=image_height, width=max_width_image, flatten=False) targets[i,:] = np.transpose(image_vec) return inputs, targets
def get_next_batch_for_res(batch_size=128): images = [] codes = [] max_width_image = 0 info = "" for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) if random.random()>0.5: font_size = random.randint(8, 49) else: font_size = random.randint(8, 15) font_mode = random.choice([0,1,2,4]) font_hint = random.choice([0,1,2,3,4,5]) #删除了2 text = random.sample(CHARS, 12) text = text+text+[" "," "] random.shuffle(text) text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint ) image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) image = np.asarray(image) image = utils.resize(image, height=image_height) if random.random()>0.5: image = (255. - image) / 255. else: image = image / 255. images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint) max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE) return inputs, sparse_labels, seq_len, info
def get_next_batch_for_res(batch_size=128): inputs_images = [] codes = [] max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example==None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature['font_size'].int64_list.value[0] font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0] text = str(dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) image = utils_pil.convert_to_gray(image) w, h = size if h > image_height: image = utils_pil.resize_by_height(image, image_height) image = utils_pil.resize_by_height(image, image_height-random.randint(1,5)) image, _ = utils_pil.random_space2(image, image, image_height) image = utils_font.add_noise(image) image = np.asarray(image) image = utils.resize(image, image_height, MAX_IMAGE_WIDTH) if random.random()>0.5: image = image / 255. else: image = (255. - image) / 255. if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))]) seq_len[i]=len(text)+1 # 凑成4的整数倍 # if max_width_image % 4 > 0: # max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度 if max_width_image < MAX_IMAGE_WIDTH: max_width_image = MAX_IMAGE_WIDTH # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH)) inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1)) # print(inputs.shape, len(codes)) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) seq_len = np.ones(batch_size) * SEQ_LENGTH # print(inputs.shape, seq_len.shape, [len(l) for l in labels]) return inputs, sparse_labels, seq_len, info
def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \ max_width=4096, height=32, need_pad_width_to_max_width=False): inputs_images = [] codes = [] # 当前这一批图片中的最大宽度 max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example == None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str( dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature[ 'font_size'].int64_list.value[0] font_mode = dataset_example.features.feature[ 'font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature[ 'font_hint'].int64_list.value[0] text = str( dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) # 图旋转灰度 image = utils_pil.convert_to_gray(image) w, h = size if h > height: image = utils_pil.resize_by_height(image, height) # 随机移动图片位置 image = utils_pil.resize_by_height(image, height - random.randint(1, 5)) image, _ = utils_pil.random_space2(image, image, height) # 增加噪点 image = utils_font.add_noise(image) # 转为 opencv 格式 image = np.asarray(image) # 默认按高度缩放,如果宽度超过了最大宽度,就按宽度缩放 image = utils.resize(image, height, max_width) # 随机反色并归一化 if random.random() > 0.5: image = image / 255. else: image = (255. - image) / 255. # 记下当前的最大图片宽度 if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([ font_name, str(font_size), str(font_mode), str(font_hint), str(len(text)) ]) # 凑成4的整数倍 if max_width_image % 4 > 0: max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度,懒得去缩放,直接报异常 if max_width_image > max_width: raise Exception("img width must %s <= %s " % (max_width_image, max_width)) if need_pad_width_to_max_width: max_width_image = max_width inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.reshape(image_vec, (image_height, max_width_image, 1)) labels = [np.asarray(i) for i in codes] sparse_labels = None onehot_labels = None if has_sparse: sparse_labels = utils.sparse_tuple_from(labels) sparse_labels = np.array(sparse_labels) if has_onehot: onehot_labels = [] for label in labels: label_one_hot = np.eye(CLASSES_NUMBER)[label] onehot_labels.append(label_one_hot) onehot_labels = np.array(onehot_labels) return inputs, np.array(labels), sparse_labels, onehot_labels, info
def get_next_batch(batch_size=128): images = [] to_images = [] codes = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) # text = random.sample(CHARS, 12) # text = text+text # random.shuffle(text) # text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) image = utils_pil.resize_by_height(image, image_height) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5])) image = utils_pil.resize_by_height(image, _h, random.random() > 0.5) image = utils_pil.resize_by_height(image, image_height, random.random() > 0.5) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) targets[i, :] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE) return inputs, targets, sparse_labels, seq_len
def get_next_batch_for_res(batch_size=128, add_noise=True, _font_name=None, _font_size=None, _font_mode=None, _font_hint=None): inputs_images = [] codes = [] max_width_image = 0 info = [] for i in range(batch_size): font_name = _font_name font_size = _font_size font_mode = _font_mode font_hint = _font_hint if font_name == None: font_name = random.choice(AllFontNames) if font_size == None: if random.random() > 0.5: font_size = random.randint(9, 49) else: font_size = random.randint(9, 15) if font_mode == None: font_mode = random.choice([0, 1, 2, 4]) if font_hint == None: font_hint = random.choice([0, 1, 2, 3, 4, 5]) while True: font_length = random.randint(5, 400) # text = random.sample(CHARS, font_length) # text = text+text+[" "," "] # random.shuffle(text) # text = "".join(text).strip() text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint) temp_image = utils_pil.resize_by_height(image, image_height) w, h = temp_image.size if w * h < image_size * image_size: break image = utils_pil.convert_to_gray(image) w, h = image.size if h > image_height: image = utils_pil.resize_by_height(image, image_height) if add_noise and random.random() > 0.5: _h = random.randint(9, image_height + 1) image = utils_pil.resize_by_height(image, _h) image = utils_pil.random_space2(image, image_height) if add_noise: image = utils_font.add_noise(image) image = np.asarray(image) # image = utils.resize(image, height=image_height) if add_noise: image = image * random.uniform(0.3, 1) if add_noise and random.random() > 0.5: image = image / 255. else: image = (255. - image) / 255. inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append( [font_name, str(font_size), str(font_mode), str(font_hint)]) inputs = np.zeros([batch_size, image_size, image_size]) for i in range(batch_size): inputs[i, :] = utils.square_img(inputs_images[i], np.zeros([image_size, image_size])) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (image_size * image_size) // (POOL_SIZE * POOL_SIZE) return inputs, sparse_labels, seq_len, info
def get_next_batch_for_gan(batch_size=128): input_images = [] trim_images = [] for i in range(batch_size): font_name = random.choice(AllFontNames) # font_size = image_height #random.randint(image_height, 64) # if font_size==None: if random.random() > 0.5: font_size = random.randint(9, 49) else: font_size = random.randint(9, 15) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) #删除了2 while True: font_length = random.randint(3, 400) # text = utils_font.get_random_text(CHARS, eng_world_list, font_length) text = utils_font.get_words_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint, trim=False) if font_hint in (0, 1, 3, 5): clear_trim_image = utils_font.get_font_image_from_url( text, font_name, font_size, font_mode, 0, trim=False) else: clear_trim_image = utils_font.get_font_image_from_url( text, font_name, font_size, font_mode, 4, trim=False) bbox = utils_pil.get_trim_box(clear_trim_image) image = image.crop(bbox) clear_trim_image = clear_trim_image.crop(bbox) temp_image = utils_pil.resize_by_height(image, image_height) if clear_trim_image.size[0] != image.size[ 0] or clear_trim_image.size[1] != image.size[1]: print("get size not same,", image.size, clear_trim_image.size, font_name, font_size, font_mode, font_hint) continue w, h = temp_image.size if w * h <= image_size * image_size: break image = utils_pil.convert_to_gray(image) #原始图片 clear_trim_image = utils_pil.convert_to_gray(clear_trim_image) w, h = image.size if h > image_height: image = utils_pil.resize_by_height(image, image_height) clear_trim_image = utils_pil.resize_by_size( clear_trim_image, image.size) source_image = image.copy() # 随机缩放下图片 w, h = image.size if random.random() > 0.5 and h < image_height: _h = random.randint(h + 1, image_height + 1) image = utils_pil.resize_by_height(image, _h, random.random() > 0.5) clear_trim_image = utils_pil.resize_by_size( clear_trim_image, image.size) if clear_trim_image.size[0] != image.size[ 0] or clear_trim_image.size[1] != image.size[1]: print("random resize get size not same,", image.size, clear_trim_image.size, font_name, font_size, font_mode, font_hint) # image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) # 随机移动位置 trims_image 为字体实际位置标识 image, clear_trim_image = utils_pil.random_space2( image, clear_trim_image, image_height) if clear_trim_image.size[0] != image.size[0] or clear_trim_image.size[ 1] != image.size[1]: print("random move space get size not same,", image.size, clear_trim_image.size, font_name, font_size, font_mode, font_hint) trims_image = np.asarray(clear_trim_image) # 转黑白二值化,降低维度 trims_image = (255. - trims_image) / 255. trim_images.append(trims_image) if random.random() > 0.9: image = utils_font.add_noise(image) image = np.asarray(image) image.flags.writeable = True if random.random() > 0.9: image[image > 200] = 255 * random.uniform(0.5, 1) if random.random() > 0.5: image = (255. - image) / 255. else: image = image / 255. input_images.append(image) inputs = np.zeros([batch_size, image_size, image_size]) for i in range(batch_size): inputs[i, :] = utils.square_img(input_images[i], np.zeros([image_size, image_size]), image_height) trims = np.zeros([batch_size, image_size, image_size]) for i in range(batch_size): trims[i, :] = utils.square_img(trim_images[i], np.zeros([image_size, image_size]), image_height) trims[trims == 0] == -1 return inputs, trims