def __init__(self, chars_file, corpus_dir=None, length=None, random=False): self.corpus_dir = corpus_dir self.length = length self.corpus = [] self.random = random self.chars_file = chars_file self.charsets = load_chars(chars_file) self.load()
def restore_exist_labels(label_path): # 如果目标目录存在 labels.txt 则向该目录中追加图片 start_index = 0 if os.path.exists(label_path): start_index = len(utils.load_chars(label_path)) print('增量生成图片 %s. 从 %d 开始' % (flags.save_dir, start_index)) else: print('生成图片 %s' % flags.save_dir) return start_index
def restore_exist_labels(label_path): # 如果目标目录存在 labels.txt 则向该目录中追加图片 start_index = 0 if os.path.exists(label_path): start_index = len(utils.load_chars(label_path)) print('Generate more text images in %s. Start index %d' % (flags.save_dir, start_index)) else: print('Generate text images in %s' % flags.save_dir) return start_index
def __init__(self, chars_file, corpus_dir=None, length=None): self.corpus_dir = corpus_dir self.length = length self.corpus = [] # 语料库-字符串列表 self.chars_file = chars_file self.charsets = load_chars(chars_file) self.load()
def __init__(self, chars_file, corpus_dir=None, length=None): self.corpus_dir = corpus_dir self.length = length self.charsets = load_chars(chars_file) if not isinstance(self, RandomCorpus): print("Loading corpus from: " + self.corpus_dir) self.load()
def __init__(self, chars_file): self.chars = ''.join(load_chars(chars_file)) # char_set_length + ctc_blank self.num_classes = len(self.chars) + 1 self.encode_maps = {} self.decode_maps = {} self.create_encode_decode_maps(self.chars) print('Load chars file: %s num_classes: %d + 1(CTC Black)' % (chars_file, self.num_classes - 1))
def __init__(self, chars_file, corpus_dir=None, length=None, max_length=None, distribute_file=None): self.corpus_dir = corpus_dir self.length = length self.corpus = [] self.chars_file = chars_file self.charsets = load_chars(chars_file) self.max_length = max_length self.distribute_file = distribute_file self.load()
def __init__(self, chars_file, corpus_dir=None, length=None): self.corpus_dir = corpus_dir self.length = length self.corpus = [] self.chars_file = chars_file self.charsets = load_chars(chars_file) if not isinstance(self, RandomCorpus): print("Loading corpus from: " + self.corpus_dir) self.corpus_path = glob.glob(self.corpus_dir + '/**/*.txt', recursive=True) if len(self.corpus_path) == 0: print("Corpus not found.") exit(-1) self.load()
def get_unsupported_chars(fonts, chars_file): """ Get fonts unsupported chars by loads/saves font supported chars from cache file :param fonts: :param chars_file: :return: dict key -> font_path value -> font unsupported chars """ charset = load_chars(chars_file) charset = ''.join(charset) fonts_chars = get_fonts_chars(fonts, chars_file) fonts_unsupported_chars = {} for font_path, chars in fonts_chars.items(): unsupported_chars = list(filter(lambda x: x not in chars, charset)) fonts_unsupported_chars[font_path] = unsupported_chars return fonts_unsupported_chars
def get_fonts_chars(fonts, chars_file): """ loads/saves font supported chars from cache file :param fonts: list of font path. e.g ['./data/fonts/msyh.ttc'] :param chars_file: arg from parse_args :return: dict key -> font_path value -> font supported chars """ out = {} cache_dir = os.path.abspath( os.path.join(os.path.dirname(__file__), '../', '.caches')) if not os.path.exists(cache_dir): os.makedirs(cache_dir) chars = load_chars(chars_file) chars = ''.join(chars) for font_path in fonts: string = ''.join([font_path, chars]) file_md5 = md5(string) cache_file_path = os.path.join(cache_dir, file_md5) if not os.path.exists(cache_file_path): ttf = load_font(font_path) _, supported_chars = check_font_chars(ttf, chars) print('Save font(%s) supported chars(%d) to cache' % (font_path, len(supported_chars))) with open(cache_file_path, 'wb') as f: pickle.dump(supported_chars, f, pickle.HIGHEST_PROTOCOL) else: with open(cache_file_path, 'rb') as f: supported_chars = pickle.load(f) print('Load font(%s) supported chars(%d) from cache' % (font_path, len(supported_chars))) out[font_path] = supported_chars return out
type=str, default='./data/chars/char_std_5990.txt') parser.add_argument( '--font_dir', type=str, default='/Users/guoxiaolu/work/code/text_renderer/data/fonts/chn') parser.add_argument( '--delete', action="store_true", default=False, help= 'whether or not to delete font which not full support the chars_file') args, _ = parser.parse_known_args() charset = load_chars(args.chars_file) font_paths = glob.glob(args.font_dir + '/*.*') fonts = {} for p in font_paths: ttf = load_font(p) fonts[p] = ttf useful_fonts = [] for k, v in fonts.items(): try: unsupported_chars, _ = check_font_chars(v, charset) print("font: %s ,chars unsupported: %d" % (k, len(unsupported_chars))) if len(unsupported_chars) < 500: print(unsupported_chars)