Exemplo n.º 1
0
    def __init__(self, data_root='.', split='train', **kargs):
        ArrayDataset.__init__(self, **kargs)
        self.split = split
        ic15_root_dir = data_root + '/ICDAR2015/Challenge4/'
        train_data_dir = ic15_root_dir + 'ch4_training_images/'
        train_gt_dir = ic15_root_dir + 'ch4_training_localization_transcription_gt/'
        test_data_dir = ic15_root_dir + 'ch4_test_images/'
        test_gt_dir = ic15_root_dir + 'ch4_test_localization_transcription_gt/'
        if split == 'train':
            data_dirs = [train_data_dir]
            gt_dirs = [train_gt_dir]
        else:
            data_dirs = [test_data_dir]
            gt_dirs = [test_gt_dir]

        self.img_paths = []
        self.gt_paths = []

        for data_dir, gt_dir in zip(data_dirs, gt_dirs):
            img_names = util.io.ls(data_dir, '.jpg')
            img_names.extend(util.io.ls(data_dir, '.png'))
            # img_names.extend(util.io.ls(data_dir, '.gif'))
            img_names.sort()
            img_paths = []
            gt_paths = []
            for idx, img_name in enumerate(img_names):
                img_path = data_dir + img_name
                img_paths.append(img_path)

                gt_name = 'gt_' + img_name.split('.')[0] + '.txt'
                gt_path = gt_dir + gt_name
                gt_paths.append(gt_path)

            self.img_paths.extend(img_paths)
            self.gt_paths.extend(gt_paths)
Exemplo n.º 2
0
    def __init__(self, ctw_root='.', split='train', **kargs):
        ArrayDataset.__init__(self, **kargs)
        ctw_root_dir = ctw_root + '/data/ctw1500/'
        ctw_train_data_dir = ctw_root_dir + 'train/text_image/'
        ctw_train_gt_dir = ctw_root_dir + 'train/ctw1500_e2e_train/'
        ctw_test_data_dir = ctw_root_dir + 'test/text_image/'
        ctw_test_gt_dir = ctw_root_dir + 'test/ctw1500_e2e_test/'
        if split == 'train':
            data_dirs = [ctw_train_data_dir]
            gt_dirs = [ctw_train_gt_dir]
        else:
            data_dirs = [ctw_test_data_dir]
            gt_dirs = [ctw_test_gt_dir]

        self.img_paths = []
        self.gt_paths = []

        for data_dir, gt_dir in zip(data_dirs, gt_dirs):
            img_names = util.io.ls(data_dir, '.jpg')
            img_names.extend(util.io.ls(data_dir, '.png'))
            # img_names.extend(util.io.ls(data_dir, '.gif'))

            img_names.sort()
            img_paths = []
            gt_paths = []
            for idx, img_name in enumerate(img_names):
                img_path = data_dir + img_name
                img_paths.append(img_path)

                gt_name = img_name.split('.')[0] + '.txt'
                gt_path = gt_dir + gt_name
                gt_paths.append(gt_path)

            self.img_paths.extend(img_paths)
            self.gt_paths.extend(gt_paths)
Exemplo n.º 3
0
    def __init__(self, data_root='.', split='train', **kargs):
        ArrayDataset.__init__(self, **kargs)
        self.split = split
        root_dir = data_root + '/ReCTS2019/'
        train_data_dir = root_dir + 'train/img/'
        train_gt_path = root_dir + 'train/gt/'
        # not gt for test set
        test_data_dir = root_dir + 'ReCTS_test_part1/Task3_and_Task4/img/'
        test_gt_path = ''
        if split == 'train':
            data_dir = train_data_dir
            gt_dir = train_gt_path
        else:
            data_dir = test_data_dir
            gt_dir = test_gt_path

        self.random_scale = np.array([0.5, 0.7, 1.0, 1.2])
        # scan images
        self.img_paths = []
        img_names = util.io.ls(data_dir, '.jpg')
        img_names.extend(util.io.ls(data_dir, '.png'))
        img_names.sort()
        img_paths = []
        gt_paths = []
        for idx, img_name in enumerate(img_names):
            img_path = data_dir + img_name
            img_paths.append(img_path)
            if gt_dir:
                gt_name = img_name.split('.')[0] + '.json'
                gt_path = gt_dir + gt_name
                gt_paths.append(gt_path)

        self.img_paths = img_paths
        self.gt_paths = gt_paths
Exemplo n.º 4
0
    def __init__(self, data_root='.', split='train', **kargs):
        ArrayDataset.__init__(self, **kargs)
        self.split = split
        root_dir = data_root + '/ArT2019/'
        train_data_dir = root_dir + 'train_images/'
        train_gt_path = root_dir + 'train_labels.json'
        # not gt for test set
        test_data_dir = root_dir + 'test_images/'
        test_gt_path = ''
        if split == 'train':
            data_dir = train_data_dir
            gt_path = train_gt_path
        else:
            data_dir = test_data_dir
            gt_path = test_gt_path

        # scan images
        self.img_paths = []
        img_names = util.io.ls(data_dir, '.jpg')
        img_names.extend(util.io.ls(data_dir, '.png'))
        img_names.sort()
        img_paths = []
        for idx, img_name in enumerate(img_names):
            img_path = data_dir + img_name
            img_paths.append(img_path)

        self.img_paths.extend(img_paths)
        self.gt = None
        if gt_path:
            with open(gt_path, 'r') as f:
                gt = json.load(f)
                self.gt = gt
Exemplo n.º 5
0
    def __init__(self, datadir, name='--', exts=None):
        ArrayDataset.__init__(self, name)
        self.datadir = datadir
        # scan files
        filenames = os.listdir(datadir)
        filenames.sort()
        self.list = []

        for filename in filenames:
            if not allowed_file(filename, exts):
                continue
            self.list.append(filename)
Exemplo n.º 6
0
    def __init__(self, dataset, filter=None, name='--'):
        """
        Split a dataset
        :param dataset: source dataset
        :param filter: sampling rule, like low-0.5, high-100
        :param name: dataset name
        :return selected part of the dataset.
        """
        ArrayDataset.__init__(self, name=name)
        self.dataset = dataset
        self.filter = filter
        total = dataset.size()
        if filter is None:
            self.ibuf = [i for i in range(dataset.size())]
        else:
            #
            self.ibuf = []
            segs = filter.split('-')
            pos = segs[0]
            num = float(segs[1])
            if num < 1:
                base = 100
                num = int(num * base)
                if num % 10 == 0:
                    base = 10
                    num /= 10

                if pos == 'low':
                    for i in range(total):
                        if i % base < num:
                            self.ibuf.append(i)
                else:
                    for i in range(total):
                        if i % base >= (base-num):
                            self.ibuf.append(i)
            else:
                num = int(num)
                if pos == 'low':
                    for i in range(total):
                        if i < num:
                            self.ibuf.append(i)
                else:
                    for i in range(total):
                        if i >= (total-num):
                            self.ibuf.append(i)
Exemplo n.º 7
0
    def __init__(self, data_root='.', split='train', **kargs):
        ArrayDataset.__init__(self, **kargs)
        self.split = split
        ic15_root_dir = data_root+'/MLT2019/'
        train_data_dir = ic15_root_dir + 'train_images/'
        train_gt_dir = ic15_root_dir + 'train_gt_t13/'
        # not gt for test set
        test_data_dir = ic15_root_dir + 'train_images/'
        test_gt_dir = ic15_root_dir + 'train_gt_t13/'
        if split == 'train':
            data_dirs = [train_data_dir]
            gt_dirs = [train_gt_dir]
        else:
            data_dirs = [test_data_dir]
            gt_dirs = [test_gt_dir]

        self.img_paths = []
        self.gt_paths = []

        for data_dir, gt_dir in zip(data_dirs, gt_dirs):
            img_names = util.io.ls(data_dir, '.jpg')
            img_names.extend(util.io.ls(data_dir, '.png'))
            # img_names.extend(util.io.ls(data_dir, '.gif'))
            img_names.sort()
            img_paths = []
            gt_paths = []
            for idx, img_name in enumerate(img_names):
                img_path = data_dir + img_name
                img_paths.append(img_path)

                gt_name = img_name.split('.')[0] + '.txt'
                gt_path = gt_dir + gt_name
                gt_paths.append(gt_path)

            self.img_paths.extend(img_paths)
            self.gt_paths.extend(gt_paths)
Exemplo n.º 8
0
 def __init__(self, rootdir, imgdir = 'img_align_celeba', name='CelebA'):
     ArrayDataset.__init__(self, name)
     self.rootdir = rootdir
     self.attr_path = os.path.join(rootdir, 'list_attr_celeba.txt')
     self.landmarks_path = os.path.join(rootdir, 'list_landmarks_align_celeba.txt')
     self.bbox_path = os.path.join(rootdir, 'list_bbox_celeba.txt')
     self.img_dir = os.path.join(rootdir, imgdir)
     # image list
     self.img_list = os.listdir(self.img_dir)
     # {'path', 'landmark':[], 'attr':[] }
     # parse attr
     with open(self.landmarks_path, 'r') as f:
         total = f.readline()
         total = int(total)
         item_list = [{} for i in range(total)]
         attr_names = f.readline().strip()
         self.attr_names = attr_names.split()
         idx = 0
         for line in f.readlines():
             fields = line.strip().split()
             fname = fields[0]
             attr = [int(x) for x in fields[1:]]
             item = {'path': fname, 'attr': attr}
             item_list[idx] = item
             idx += 1
     # parse landmarks
     with open(self.landmarks_path, 'r') as f:
         f.readline()
         f.readline()
         idx = 0
         for line in f.readlines():
             fields = line.strip().split()
             landmarks = [int(x) for x in fields[1:]]
             item_list[idx]['landmark'] = landmarks
             idx += 1
     # parse bboxs
     with open(self.bbox_path, 'r') as f:
         f.readline()
         f.readline()
         idx = 0
         for line in f.readlines():
             fields = line.strip().split()
             bbox = [int(x) for x in fields[1:]]
             item_list[idx]['bbox'] = bbox
             idx += 1
     # check if got landmark directory
     ldir = os.path.join(rootdir, 'landmark')
     if os.path.exists(ldir):
         files = os.listdir(ldir)
         if len(files) > len(item_list) * 0.8:
             ok_idx = []
             for i in range(total):
                 item = item_list[i]
                 lpath = os.path.join(ldir, item['path']) + '.txt'
                 if not os.path.exists(lpath):
                     continue
                 pts = load_landmarks(lpath)
                 item['kp68'] = pts
                 ok_idx.append(i)
             # filter
             outputs = []
             for idx in ok_idx:
                 outputs.append(item_list[idx])
             item_list = outputs
     self.list = item_list