示例#1
0
    def apply_left(self, img: PILImage,
                   text_bbox: BBox) -> Tuple[PILImage, BBox]:
        in_offset, thickness, out_offset = self._get_lr_param()

        new_w = img.width + thickness + in_offset + out_offset
        new_h = img.height

        new_img = transparent_img((new_w, new_h))
        new_img.paste(img, (thickness + in_offset + out_offset, 0))

        draw = ImageDraw.Draw(new_img)

        text_bbox.offset_(text_bbox.right_top, (new_w, 0))
        text_bbox.left -= in_offset

        draw.line(
            list(text_bbox.left_top) + list(text_bbox.left_bottom),
            fill=self._get_line_color(img, text_bbox),
            width=thickness,
        )

        text_bbox.left -= thickness
        text_bbox.left -= out_offset

        return new_img, text_bbox
示例#2
0
    def gen_single_corpus(self) -> Tuple[PILImage, str]:
        font_text = self.corpus.sample()

        bg = self.bg_manager.get_bg()
        text_color = self.corpus.cfg.text_color_cfg.get_color(bg)
        text_mask = draw_text_on_bg(font_text,
                                    text_color,
                                    char_spacing=self.corpus.cfg.char_spacing)

        if self.cfg.corpus_effects is not None:
            text_mask, _ = self.cfg.corpus_effects.apply_effects(
                text_mask, BBox.from_size(text_mask.size))

        if self.cfg.perspective_transform is not None:
            transformer = PerspectiveTransform(self.cfg.perspective_transform)
            # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix
            _ = transformer.get_transformed_size(text_mask.size)

            (
                transformed_text_mask,
                transformed_text_pnts,
            ) = transformer.do_warp_perspective(text_mask)
        else:
            transformed_text_mask = text_mask

        img = self.paste_text_mask_on_bg(bg, transformed_text_mask)

        return img, font_text.text
示例#3
0
    def apply(self, img: PILImage, text_bbox: BBox) -> Tuple[PILImage, BBox]:
        w_ratio = np.random.uniform(*self.w_ratio)
        h_ratio = np.random.uniform(*self.h_ratio)
        new_w = int(img.width + img.width * w_ratio)
        new_h = int(img.height + img.height * h_ratio)

        new_img = transparent_img((new_w, new_h))
        xy = random_xy_offset(img.size, (new_w, new_h))
        new_img.paste(img, xy)

        new_bbox = text_bbox.move_origin(xy)
        return new_img, new_bbox
示例#4
0
    def apply(self, img: PILImage, text_bbox: BBox) -> Tuple[PILImage, BBox]:
        max_val = np.random.uniform(*self.amplitude)

        word_img = np.array(img)
        h, w = word_img.shape[:2]

        img_x = np.zeros((h, w), np.float32)
        img_y = np.zeros((h, w), np.float32)

        xmin = text_bbox.left
        xmax = text_bbox.right
        ymin = text_bbox.top
        ymax = text_bbox.bottom

        remap_y_min = ymin
        remap_y_max = ymax

        for y in range(h):
            for x in range(w):
                remaped_y = y + self._remap_y(x, max_val)

                if y == ymin:
                    if remaped_y < remap_y_min:
                        remap_y_min = remaped_y

                if y == ymax:
                    if remaped_y > remap_y_max:
                        remap_y_max = remaped_y

                img_y[y, x] = remaped_y
                img_x[y, x] = x

        dst = cv2.remap(word_img, img_x, img_y, cv2.INTER_CUBIC)
        bbox = BBox(left=xmin, top=remap_y_min, right=xmax, bottom=remap_y_max)
        bbox = bbox.offset((bbox.left, bbox.top), (0, 0))
        return Image.fromarray(dst), bbox
示例#5
0
    def apply(self, text_bboxes: List[BBox], img_bboxes: List[BBox],) -> List[BBox]:
        avg_height = sum([it.height for it in img_bboxes]) / len(img_bboxes)

        for i in range(0, len(img_bboxes) - 1):
            h_spacing_scale = np.random.uniform(*self.h_spacing)
            h_spacing = int(avg_height * h_spacing_scale)
            img_bboxes[i].right += h_spacing

        merged_bbox = BBox.from_bboxes(img_bboxes)

        img_bboxes[0].offset_(img_bboxes[0].left_cnt, merged_bbox.left_cnt)

        for i in range(1, len(img_bboxes)):
            img_bboxes[i].offset_(img_bboxes[i].left_cnt, img_bboxes[i - 1].right_cnt)

        return img_bboxes
    def __call__(self, *args, **kwargs) -> Tuple[np.ndarray, str]:
        try:
            if self._should_apply_layout():
                img, text = self.gen_multi_corpus()
            else:
                img, text = self.gen_single_corpus()

            if self.cfg.render_effects is not None:
                img, _ = self.cfg.render_effects.apply_effects(
                    img, BBox.from_size(img.size))

            img = img.convert("RGB")
            np_img = np.array(img)
            np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
            np_img = self.norm(np_img)
            return np_img, text
        except Exception as e:
            logger.exception(e)
            raise e
示例#7
0
    def __call__(self, *args, **kwargs) -> Tuple[np.ndarray, str]:
        try:
            if self._should_apply_layout():
                img, text, cropped_bg, transformed_text_mask = self.gen_multi_corpus(
                )
            else:
                img, text, cropped_bg, transformed_text_mask = self.gen_single_corpus(
                )

            if self.cfg.render_effects is not None:
                img, _ = self.cfg.render_effects.apply_effects(
                    img, BBox.from_size(img.size))

            if self.cfg.return_bg_and_mask:
                gray_text_mask = np.array(transformed_text_mask.convert("L"))
                _, gray_text_mask = cv2.threshold(
                    gray_text_mask, 0, 255,
                    cv2.THRESH_BINARY | cv2.THRESH_OTSU)
                transformed_text_mask = Image.fromarray(255 - gray_text_mask)

                merge_target = Image.new("RGBA", (img.width * 3, img.height))
                merge_target.paste(img, (0, 0))
                merge_target.paste(cropped_bg, (img.width, 0))
                merge_target.paste(
                    transformed_text_mask,
                    (img.width * 2, 0),
                    mask=transformed_text_mask,
                )

                np_img = np.array(merge_target)
                np_img = cv2.cvtColor(np_img, cv2.COLOR_RGBA2BGR)
                np_img = self.norm(np_img)
            else:
                img = img.convert("RGB")
                np_img = np.array(img)
                np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
                np_img = self.norm(np_img)
            return np_img, text
        except Exception as e:
            logger.exception(e)
            raise e
示例#8
0
    def gen_single_corpus(self) -> Tuple[PILImage, str, PILImage, PILImage]:
        font_text = self.corpus.sample()

        bg = self.bg_manager.get_bg()
        if self.cfg.text_color_cfg is not None:
            text_color = self.cfg.text_color_cfg.get_color(bg)

        # corpus text_color has higher priority than RenderCfg.text_color_cfg
        if self.corpus.cfg.text_color_cfg is not None:
            text_color = self.corpus.cfg.text_color_cfg.get_color(bg)

        text_mask = draw_text_on_bg(font_text,
                                    text_color,
                                    char_spacing=self.corpus.cfg.char_spacing)

        if self.cfg.corpus_effects is not None:
            text_mask, _ = self.cfg.corpus_effects.apply_effects(
                text_mask, BBox.from_size(text_mask.size))

        if self.cfg.perspective_transform is not None:
            transformer = PerspectiveTransform(self.cfg.perspective_transform)
            # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix
            _ = transformer.get_transformed_size(text_mask.size)

            try:
                (
                    transformed_text_mask,
                    transformed_text_pnts,
                ) = transformer.do_warp_perspective(text_mask)
            except Exception as e:
                logger.exception(e)
                logger.error(font_text.font_path, "text", font_text.text)
                raise e
        else:
            transformed_text_mask = text_mask

        img, cropped_bg = self.paste_text_mask_on_bg(bg, transformed_text_mask)

        return img, font_text.text, cropped_bg, transformed_text_mask
    def gen_single_corpus(self) -> Tuple[PILImage, str]:
        font_text = self.corpus.sample()

        bg = self.bg_manager.get_bg()
        text_color = self.corpus.cfg.text_color_cfg.get_color(bg)
        text_mask = draw_text_on_bg(font_text,
                                    text_color,
                                    char_spacing=self.corpus.cfg.char_spacing)

        if self.cfg.corpus_effects is not None:
            text_mask, _ = self.cfg.corpus_effects.apply_effects(
                text_mask, BBox.from_size(text_mask.size))

        if self.cfg.perspective_transform is not None:
            transformer = PerspectiveTransform(self.cfg.perspective_transform)
            # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix
            _ = transformer.get_transformed_size(text_mask.size)

            try:
                (
                    transformed_text_mask,
                    transformed_text_pnts,
                ) = transformer.do_warp_perspective(text_mask)
            except Exception as e:
                logger.exception(e)
                logger.error(font_text.font_path, "text", font_text.text)
                raise e
        else:
            transformed_text_mask = text_mask

        img = self.paste_text_mask_on_bg(bg, transformed_text_mask)

        # After pasting the text mask on the background we draw bbox for each character on the transformed image.
        img = self.lay_bbox_over_image(image=img,
                                       font_text=font_text,
                                       text_color=text_color)

        return img, font_text.text
def test_offset():
    bbox1 = BBox(0, 0, 100, 32)
    bbox2 = BBox(0, 0, 50, 16)

    bbox2.offset_(bbox2.left_cnt, bbox1.right_cnt)
    assert bbox2 == BBox(100, 8, 150, 24)
    def gen_multi_corpus(self) -> Tuple[PILImage, str]:
        font_texts: List[FontText] = [it.sample() for it in self.corpus]

        bg = self.bg_manager.get_bg()

        text_color = None
        if self.cfg.text_color_cfg is not None:
            text_color = self.cfg.text_color_cfg.get_color(bg)

        text_masks, text_bboxes = [], []
        for i in range(len(font_texts)):
            font_text = font_texts[i]

            if text_color is None:
                _text_color = self.corpus[i].cfg.text_color_cfg.get_color(bg)
            else:
                _text_color = text_color
            text_mask = draw_text_on_bg(
                font_text,
                _text_color,
                char_spacing=self.corpus[i].cfg.char_spacing)

            text_bbox = BBox.from_size(text_mask.size)
            if self.cfg.corpus_effects is not None:
                effects = self.cfg.corpus_effects[i]
                if effects is not None:
                    text_mask, text_bbox = effects.apply_effects(
                        text_mask, text_bbox)
            text_masks.append(text_mask)
            text_bboxes.append(text_bbox)

        text_mask_bboxes, merged_text = self.layout(
            font_texts,
            [it.copy() for it in text_bboxes],
            [BBox.from_size(it.size) for it in text_masks],
        )
        if len(text_mask_bboxes) != len(text_bboxes):
            raise PanicError(
                "points and text_bboxes should have same length after layout output"
            )

        merged_bbox = BBox.from_bboxes(text_mask_bboxes)
        merged_text_mask = transparent_img(merged_bbox.size)
        for text_mask, bbox in zip(text_masks, text_mask_bboxes):
            merged_text_mask.paste(text_mask, bbox.left_top)

        if self.cfg.perspective_transform is not None:
            transformer = PerspectiveTransform(self.cfg.perspective_transform)
            # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix
            _ = transformer.get_transformed_size(merged_text_mask.size)

            (
                transformed_text_mask,
                transformed_text_pnts,
            ) = transformer.do_warp_perspective(merged_text_mask)
        else:
            transformed_text_mask = merged_text_mask

        if self.cfg.layout_effects is not None:
            transformed_text_mask, _ = self.cfg.layout_effects.apply_effects(
                transformed_text_mask,
                BBox.from_size(transformed_text_mask.size))

        img = self.paste_text_mask_on_bg(bg, transformed_text_mask)

        return img, merged_text