Пример #1
0
    def __init__(self, root, list_file_name_path, size=(512, 512), transform=None):
        self.root = root
        self.list_file_name_path = list_file_name_path
        self.size = size
        self.transform = transform

        # input
        self.lbl_fol = osp.join(root, 'labels')
        self.img_fol = osp.join(root, 'images')

        self.tensor_fol = osp.join(root, 'tensor_input')
        self.semantic_fol = osp.join(root, 'semantic_gt')
        self.obj_fol = osp.join(root, 'obj_gt')
        self.corpus_path = osp.join(root, 'corpus.json')
        self.target_path = osp.join(root, 'target.json')
        self.target2idx_path = osp.join(root, 'target2idx.json')

        self.file_names = self.get_file_names(self.root, self.list_file_name_path)
        self.img_lst = self.get_category_file_paths(self.root, self.img_fol, self.file_names, '.png')
        self.tensor_lst = self.get_category_file_paths(self.root, self.tensor_fol, self.file_names, '.pt')
        self.semantic_lst = self.get_category_file_paths(self.root, self.semantic_fol, self.file_names, '.png')
        self.obj_lst = self.get_category_file_paths(self.root, self.obj_fol, self.file_names, '.json')

        self.idx2name = {}
        for idx, path in enumerate(self.tensor_lst):
            name = osp.basename(path).split('.')[0]
            self.idx2name[idx] = name
        self.corpus = read_json(self.corpus_path)
        self.target = read_json(self.target_path)
        self.target2idx = read_json(self.target2idx_path)
        self.enc = OneHotEncoder(self.corpus)
        self.datagenerator = MaskGenerator()
Пример #2
0
    def __init__(self, corpus_path, target_path, model_path, **kwargs):
        self.char2idx = read_json(kwargs['char2idx_path'])
        self.corpus = read_json(corpus_path)
        self.target = read_json(target_path)
        self.model = Chargrid2D(len(self.corpus) + 1, len(self.target))
        if kwargs['device'] == 'cpu':
            self.model.load_state_dict(
                torch.load(model_path, map_location='cpu'))
        else:
            self.model.load_state_dict(torch.load(model_path))
        self.device = kwargs['device']
        self.model.to(self.device)

        self.size = 512
        self.aug = alb.Compose([
            alb.LongestMaxSize(self.size + 24, interpolation=0),
            alb.PadIfNeeded(self.size + 24,
                            self.size + 24,
                            border_mode=cv2.BORDER_CONSTANT),
            alb.RandomCrop(self.size, self.size, p=0.3),
            alb.Resize(self.size, self.size, 0)
        ])
        self.enc = OneHotEncoder(self.corpus)

        self.all_color = [
            (0, 0, 0),
            (0, 255, 0),
            (0, 0, 255),
            (0, 255, 255),
            (255, 0, 255),
            (255, 255, 0),
            (127, 255, 212),
            (69, 139, 116),
            (131, 139, 139),
            (227, 207, 87),
            (139, 125, 107),
            (138, 43, 226),
            (156, 102, 31),
            (165, 42, 42),
            (255, 64, 64),
            (255, 97, 3),
            (127, 255, 0),
            (238, 18, 137),
            (128, 128, 128),
            (34, 139, 34),
            (139, 105, 20),
            (255, 105, 180),
            (60, 179, 113),
            (139, 0, 0),
            (0, 139, 0),
            (0, 0, 139),
        ]
        self.all_color = self.all_color * (
            len(self.target) // len(self.all_color) + 1)
Пример #3
0
    def convert_lbl(self, lbl_fol, std_lbl_fol):
        all_files = glob.glob(osp.join(lbl_fol, '*.json'))

        for idx, file_path in enumerate(all_files):
            name = osp.basename(file_path)
            print(name)
            lbl_path = file_path
            img_path = osp.join(img_fol, name.replace('.json', '.png'))
            if not osp.exists(img_path):
                img_path = osp.join(img_fol, name.replace('.json', '.jpg'))

            print(
                f'Converting data......File {idx}/Total {len(all_files)}....Progress: {int(idx/len(all_files)*100)}%'
            )
            print(lbl_path)
            print(img_path)
            if not osp.exists(lbl_path) or not osp.exists(img_path):
                print('Image or Label is not exist')
                exit

            self.path_lbls.append(lbl_path)
            self.path_imgs.append(img_path)

            lbl_data = self.__convert_data(read_json(lbl_path))
            write_json(osp.join(std_lbl_fol, name), lbl_data)
            self.path_std_lbls.append(osp.join(std_lbl_fol, name))
Пример #4
0
    def process(self, img_path, textline_path):
        img = cv2.imread(img_path)
        textlines = read_json(textline_path)
        doc_h, doc_w = img.shape[0], img.shape[1]
        mask = np.zeros((doc_h, doc_w), dtype='int16')

        for item in textlines:
            w, h = item['location'][2], item['location'][3]
            char_w, char_h = int(w / (len(item['value']) + 1)), int(h)
            cur_x, cur_y = int(item['location'][0]), int(item['location'][1])

            for char in item['value']:
                mask[cur_y:cur_y + char_h,
                     cur_x:cur_x + char_w] = self.get_char2idx(char)
                cur_x += char_w
        tensor = torch.from_numpy(mask)

        img = transforms.functional.to_pil_image(tensor)
        img = np.asarray(img)
        augmented = self.aug(image=img)
        img = augmented['image'].astype('int16')
        img = torch.from_numpy(img).type(torch.LongTensor)
        img = img.unsqueeze(0)
        img = self.enc.process(img)
        img = img.unsqueeze(0)
        img = img.to(self.device)

        output = self.model.forward(img)
        # print(output.shape())
        pred = output[0].data.max(1)[1].cpu().numpy().reshape(512, 512)

        return pred
Пример #5
0
    def __getitem__(self, idx):
        name = self.idx2name[idx]
        # print(name)
        tensor_path = osp.join(self.tensor_fol, name + '.pt')
        semantic_path = osp.join(self.semantic_fol, name + '.png')
        obj_path = osp.join(self.obj_fol, name + '.json')

        tensor = torch.load(tensor_path)
        semantic = Image.open(semantic_path)
        obj = read_json(obj_path)

        img = transforms.functional.to_pil_image(tensor)
        img = np.asarray(img)
        mask = np.asarray(semantic)
        ori_boxes, label_boxes = self.__getobjcoor__(obj)
        ori_boxes = ori_boxes

        if self.transform:
            augmented = self.transform(image=img, mask=mask, bboxes=ori_boxes, lbl_id=label_boxes)
            img = augmented['image'].astype('int16')
            mask = augmented['mask'].astype('int16')
            boxes = augmented['bboxes']
            lbl_boxes = augmented['lbl_id']

            img, mask = torch.from_numpy(img).type(torch.LongTensor), torch.from_numpy(mask)
            # boxes = np.swapaxes(boxes, 0, 1)  # x_min, y_min, width, height -> we need to return 4 coordinates
            boxes, lbl_boxes = torch.from_numpy(np.array(boxes)).type(torch.LongTensor), torch.from_numpy(
                np.array(lbl_boxes))

            img = img.unsqueeze(0)
            img = self.enc.process(img)

        return img, mask, torch.tensor([]), torch.tensor([])  # boxes, lbl_boxes
Пример #6
0
    def __generate_object(self, img_path, lbl_path):
        name = osp.basename(img_path)
        name = name.split('.')[0]

        img = cv2.imread(img_path, 0)
        doc_h, doc_w = img.shape
        mask = np.zeros((doc_h, doc_w), dtype='int16')
        gt = np.zeros((doc_h, doc_w), dtype='int16')
        obj = []

        lbl_data = read_json(lbl_path)
        for item in lbl_data:
            w, h = item['location'][2], item['location'][3]
            char_w, char_h = int(w / (len(item['value']) + 1)), int(h) // 2
            cur_x, cur_y = int(item['location'][0]), int(item['location'][1])
            fm_key = item['formal_key']
            k_type = item['key_type']
            if fm_key == 'other':
                cl = 'other'
            else:
                cl = k_type + '_' + fm_key
            for char in item['value']:
                mask[cur_y:cur_y + char_h,
                     cur_x:cur_x + char_w] = self.get_char2idx(char)
                gt[cur_y:cur_y + char_h,
                   cur_x:cur_x + char_w] = self.target2idx[cl]
                cur_x += char_w

            std_item = {
                'text':
                item['value'],
                'box': [
                    int(item['location'][0]),
                    int(item['location'][1]),
                    int(item['location'][2]),
                    int(item['location'][3])
                ],
                'class':
                cl
            }
            obj.append(std_item)

        tensor = torch.from_numpy(mask)
        debug_img = np.zeros((doc_h, doc_w * 3))
        debug_img[:, :doc_w] = img
        debug_img[:, doc_w:doc_w * 2] = mask
        debug_img[:, doc_w * 2:doc_w * 3] = 255 - gt

        return debug_img, tensor, gt, obj
Пример #7
0
    def generate_target(self):
        for lbl_path in self.path_std_lbls:
            lbl_data = read_json(lbl_path)
            for item in lbl_data:
                fm_key = item['formal_key']
                k_type = item['key_type']
                if fm_key == 'other':
                    cl = 'other'
                else:
                    cl = k_type + '_' + fm_key

                if cl not in self.target:
                    self.target.append(cl)
        self.target = sorted(self.target)
        for idx, target in enumerate(self.target):
            self.target2idx[target] = idx

        write_json('./data/target.json', self.target)
        write_json('./data/target2idx.json', self.target2idx)