def __init__(self,
                 root,
                 voc_type,
                 height,
                 width,
                 num_samples,
                 is_aug,
                 mix_data=False):
        super(TOTALTEXT, self).__init__()
        self.root = root
        self.voc_type = voc_type
        self.height = height
        self.width = width
        self.num_samples = num_samples
        self.sampling_num_per_side = global_args.sampling_num_per_side

        self.fetcher = lambda path: cv2.imread(path)
        self.nid_labels = []
        self.max_len = 0

        # for recognition
        assert voc_type in [
            'LOWERCASE', 'ALLCASES', 'ALLCASES_SYMBOLS', 'LOWERCASE_SYMBOLS'
        ]
        self.EOS = 'EOS'
        self.PADDING = 'PADDING'
        self.UNKNOWN = 'UNKNOWN'
        self.UNKNOWN_COUNT = 0
        self.voc = get_vocabulary(voc_type,
                                  EOS=self.EOS,
                                  PADDING=self.PADDING,
                                  UNKNOWN=self.UNKNOWN)
        self.char2id = dict(zip(self.voc, range(len(self.voc))))
        self.id2char = dict(zip(range(len(self.voc)), self.voc))

        self.rec_num_classes = len(self.voc)
        self.lowercase = (voc_type.find('LOWERCASE') >= 0)
        self.ratio = 4

        label_json_path = os.path.join(self.root, 'Label.json')
        labels = json.load(open(label_json_path, 'r'))
        for label in labels:
            nid = os.path.join(self.root, label["img"])  # i.e. image path
            word = label["word"]
            poly_x = label["poly_x"]
            poly_y = label["poly_y"]
            self.nid_labels.append((nid, word, poly_x, poly_y))
            if len(word) > self.max_len:
                self.max_len = len(word)

        # the last <eos> should be included.
        self.max_len += 1

        self.nSamples = len(self.nid_labels)
        print(
            '\n ===== ===== ===== =====\nread {} images from {}.\n ===== ===== ===== =====\n'
            .format(self.nSamples, self.root))
Пример #2
0
    def __init__(self,
                 root,
                 voc_type,
                 height,
                 width,
                 num_samples,
                 is_aug,
                 lexicon_type='0',
                 mix_data=False):
        super(SVT, self).__init__()
        self.root = root
        self.voc_type = voc_type
        self.height = height
        self.width = width
        self.num_samples = num_samples
        self.lexicon_type = lexicon_type
        self.sampling_num_per_side = global_args.sampling_num_per_side

        lexicon_root_dir = './Source/datasets/lexicon'

        assert self.lexicon_type in ['0', '50']

        self.fetcher = lambda path: cv2.imread(path)
        self.nid_labels = []
        self.max_len = 0

        # for recognition
        assert voc_type in [
            'LOWERCASE', 'ALLCASES', 'ALLCASES_SYMBOLS', 'LOWERCASE_SYMBOLS'
        ]
        self.EOS = 'EOS'
        self.PADDING = 'PADDING'
        self.UNKNOWN = 'UNKNOWN'
        self.UNKNOWN_COUNT = 0
        self.voc = get_vocabulary(voc_type,
                                  EOS=self.EOS,
                                  PADDING=self.PADDING,
                                  UNKNOWN=self.UNKNOWN)
        self.char2id = dict(zip(self.voc, range(len(self.voc))))
        self.id2char = dict(zip(range(len(self.voc)), self.voc))

        self.rec_num_classes = len(self.voc)
        self.lowercase = (voc_type.find('LOWERCASE') >= 0)
        self.ratio = 4

        # load lexicon.
        self.lexicons50 = dict()
        with open(os.path.join(lexicon_root_dir, 'svt.list'), 'r') as f:
            lines = f.readlines()
            for line in lines:
                line = line.strip()
                image_path = line.split(' ')[0].split('/')[1]
                lexicon50 = line.split(' ')[3].split(',')
                lexicon50 = [w.lower() for w in lexicon50]
                self.lexicons50[image_path] = lexicon50

        self.lexicons1k = dict()
        for k, v in self.lexicons50.items():
            self.lexicons1k[k] = []

        self.lexiconsfull = dict()
        for k, v in self.lexicons50.items():
            self.lexiconsfull[k] = []

        label_json_path = os.path.join(self.root, 'Label.json')
        labels = json.load(open(label_json_path, 'r'))
        for label in labels:
            nid = os.path.join(self.root, label["img"])  # i.e. image path
            word = label["word"]
            if len(word) < 3 or (not word.isalnum()):
                continue
            self.nid_labels.append((nid, word))
            if len(word) > self.max_len:
                self.max_len = len(word)

        # the last <eos> should be included.
        self.max_len += 1

        self.nSamples = len(self.nid_labels)
        print(
            '\n ===== ===== ===== =====\nread {} images from {}.\n ===== ===== ===== =====\n'
            .format(self.nSamples, self.root))