def __init__(self, root, voc_type, height, width, num_samples, is_aug, mix_data=False): super(TOTALTEXT, self).__init__() self.root = root self.voc_type = voc_type self.height = height self.width = width self.num_samples = num_samples self.sampling_num_per_side = global_args.sampling_num_per_side self.fetcher = lambda path: cv2.imread(path) self.nid_labels = [] self.max_len = 0 # for recognition assert voc_type in [ 'LOWERCASE', 'ALLCASES', 'ALLCASES_SYMBOLS', 'LOWERCASE_SYMBOLS' ] self.EOS = 'EOS' self.PADDING = 'PADDING' self.UNKNOWN = 'UNKNOWN' self.UNKNOWN_COUNT = 0 self.voc = get_vocabulary(voc_type, EOS=self.EOS, PADDING=self.PADDING, UNKNOWN=self.UNKNOWN) self.char2id = dict(zip(self.voc, range(len(self.voc)))) self.id2char = dict(zip(range(len(self.voc)), self.voc)) self.rec_num_classes = len(self.voc) self.lowercase = (voc_type.find('LOWERCASE') >= 0) self.ratio = 4 label_json_path = os.path.join(self.root, 'Label.json') labels = json.load(open(label_json_path, 'r')) for label in labels: nid = os.path.join(self.root, label["img"]) # i.e. image path word = label["word"] poly_x = label["poly_x"] poly_y = label["poly_y"] self.nid_labels.append((nid, word, poly_x, poly_y)) if len(word) > self.max_len: self.max_len = len(word) # the last <eos> should be included. self.max_len += 1 self.nSamples = len(self.nid_labels) print( '\n ===== ===== ===== =====\nread {} images from {}.\n ===== ===== ===== =====\n' .format(self.nSamples, self.root))
def __init__(self, root, voc_type, height, width, num_samples, is_aug, lexicon_type='0', mix_data=False): super(SVT, self).__init__() self.root = root self.voc_type = voc_type self.height = height self.width = width self.num_samples = num_samples self.lexicon_type = lexicon_type self.sampling_num_per_side = global_args.sampling_num_per_side lexicon_root_dir = './Source/datasets/lexicon' assert self.lexicon_type in ['0', '50'] self.fetcher = lambda path: cv2.imread(path) self.nid_labels = [] self.max_len = 0 # for recognition assert voc_type in [ 'LOWERCASE', 'ALLCASES', 'ALLCASES_SYMBOLS', 'LOWERCASE_SYMBOLS' ] self.EOS = 'EOS' self.PADDING = 'PADDING' self.UNKNOWN = 'UNKNOWN' self.UNKNOWN_COUNT = 0 self.voc = get_vocabulary(voc_type, EOS=self.EOS, PADDING=self.PADDING, UNKNOWN=self.UNKNOWN) self.char2id = dict(zip(self.voc, range(len(self.voc)))) self.id2char = dict(zip(range(len(self.voc)), self.voc)) self.rec_num_classes = len(self.voc) self.lowercase = (voc_type.find('LOWERCASE') >= 0) self.ratio = 4 # load lexicon. self.lexicons50 = dict() with open(os.path.join(lexicon_root_dir, 'svt.list'), 'r') as f: lines = f.readlines() for line in lines: line = line.strip() image_path = line.split(' ')[0].split('/')[1] lexicon50 = line.split(' ')[3].split(',') lexicon50 = [w.lower() for w in lexicon50] self.lexicons50[image_path] = lexicon50 self.lexicons1k = dict() for k, v in self.lexicons50.items(): self.lexicons1k[k] = [] self.lexiconsfull = dict() for k, v in self.lexicons50.items(): self.lexiconsfull[k] = [] label_json_path = os.path.join(self.root, 'Label.json') labels = json.load(open(label_json_path, 'r')) for label in labels: nid = os.path.join(self.root, label["img"]) # i.e. image path word = label["word"] if len(word) < 3 or (not word.isalnum()): continue self.nid_labels.append((nid, word)) if len(word) > self.max_len: self.max_len = len(word) # the last <eos> should be included. self.max_len += 1 self.nSamples = len(self.nid_labels) print( '\n ===== ===== ===== =====\nread {} images from {}.\n ===== ===== ===== =====\n' .format(self.nSamples, self.root))