Пример #1
0
def output_ranklist(img_results, img_infos, out_file):
    """Output the worst results for debugging.

    Args:
        img_results (list[dict]): Image result list.
        img_infos (list[dict]): Image information list.
        out_file (str): The output file path.

    Returns:
        sorted_results (list[dict]): Image results sorted by hmean.
    """
    assert utils.is_type_list(img_results, dict)
    assert utils.is_type_list(img_infos, dict)
    assert isinstance(out_file, str)
    assert out_file.endswith('json')

    sorted_results = []
    for idx, result in enumerate(img_results):
        name = img_infos[idx]['file_name']
        img_result = result
        img_result['file_name'] = name
        sorted_results.append(img_result)
    sorted_results = sorted(sorted_results,
                            key=itemgetter('hmean'),
                            reverse=False)

    mmcv.dump(sorted_results, file=out_file)

    return sorted_results
Пример #2
0
def sort_vertex(points_x, points_y):
    """Sort box vertices in clockwise order from left-top first.

    Args:
        points_x (list[float]): x of four vertices.
        points_y (list[float]): y of four vertices.
    Returns:
        sorted_points_x (list[float]): x of sorted four vertices.
        sorted_points_y (list[float]): y of sorted four vertices.
    """
    assert utils.is_type_list(points_x, float) or utils.is_type_list(
        points_x, int)
    assert utils.is_type_list(points_y, float) or utils.is_type_list(
        points_y, int)
    assert len(points_x) == 4
    assert len(points_y) == 4

    x = np.array(points_x)
    y = np.array(points_y)
    center_x = np.sum(x) * 0.25
    center_y = np.sum(y) * 0.25

    x_arr = np.array(x - center_x)
    y_arr = np.array(y - center_y)

    angle = np.arctan2(y_arr, x_arr) * 180.0 / np.pi
    sort_idx = np.argsort(angle)

    sorted_points_x, sorted_points_y = [], []
    for i in range(4):
        sorted_points_x.append(points_x[sort_idx[i]])
        sorted_points_y.append(points_y[sort_idx[i]])

    return convert_canonical(sorted_points_x, sorted_points_y)
Пример #3
0
def show_feature(features, names, to_uint8, out_file=None):
    """Visualize a list of feature maps.

    Args:
        features (list(ndarray)): The feature map list.
        names (list(str)): The visualized title list.
        to_uint8 (list(1|0)): The list indicating whether to convent
            feature maps to uint8.
        out_file (str): The output file name. If set to None,
            the output image will be shown without saving.
    """
    assert utils.is_ndarray_list(features)
    assert utils.is_type_list(names, str)
    assert utils.is_type_list(to_uint8, int)
    assert utils.is_none_or_type(out_file, str)
    assert utils.equal_len(features, names, to_uint8)

    num = len(features)
    row = col = math.ceil(math.sqrt(num))

    for i, (f, n) in enumerate(zip(features, names)):
        plt.subplot(row, col, i + 1)
        plt.title(n)
        if to_uint8[i]:
            f = f.astype(np.uint8)
        plt.imshow(f)
    if out_file is None:
        plt.show()
    else:
        plt.savefig(out_file)
Пример #4
0
    def __init__(self, indexes=[1], scores=[0.9]):
        assert utils.is_type_list(indexes, int)
        assert utils.is_type_list(scores, float)
        assert utils.equal_len(indexes, scores)

        self.indexes = indexes
        self.scores = scores
Пример #5
0
    def __init__(self,
                 in_channels,
                 stem_channels,
                 block_cfgs,
                 arch_layers,
                 arch_channels,
                 strides,
                 out_indices=None,
                 plugins=None,
                 init_cfg=[
                     dict(type='Xavier', layer='Conv2d'),
                     dict(type='Constant', val=1, layer='BatchNorm2d'),
                 ]):
        super().__init__(init_cfg=init_cfg)
        assert isinstance(in_channels, int)
        assert isinstance(stem_channels, int) or utils.is_type_list(
            stem_channels, int)
        assert utils.is_type_list(arch_layers, int)
        assert utils.is_type_list(arch_channels, int)
        assert utils.is_type_list(strides, tuple) or utils.is_type_list(
            strides, int)
        assert len(arch_layers) == len(arch_channels) == len(strides)
        assert out_indices is None or isinstance(out_indices, (list, tuple))

        self.out_indices = out_indices
        self._make_stem_layer(in_channels, stem_channels)
        self.num_stages = len(arch_layers)
        self.use_plugins = False
        self.arch_channels = arch_channels
        self.res_layers = []
        if plugins is not None:
            self.plugin_ahead_names = []
            self.plugin_after_names = []
            self.use_plugins = True
        for i, num_blocks in enumerate(arch_layers):
            stride = strides[i]
            channel = arch_channels[i]

            if self.use_plugins:
                self._make_stage_plugins(plugins, stage_idx=i)

            res_layer = self._make_layer(
                block_cfgs=block_cfgs,
                inplanes=self.inplanes,
                planes=channel,
                blocks=num_blocks,
                stride=stride,
            )
            self.inplanes = channel
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)
Пример #6
0
    def forward_train(self, feat, out_enc, targets_dict, img_metas):
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        valid_ratios = None
        if img_metas is not None:
            valid_ratios = [
                img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
            ] if self.mask else None

        targets = targets_dict['padded_targets'].to(feat.device)
        tgt_embedding = self.embedding(targets)
        # bsz * seq_len * emb_dim
        out_enc = out_enc.unsqueeze(1)
        # bsz * 1 * emb_dim
        in_dec = torch.cat((out_enc, tgt_embedding), dim=1)
        # bsz * (seq_len + 1) * C
        out_dec = self._2d_attention(in_dec,
                                     feat,
                                     out_enc,
                                     valid_ratios=valid_ratios)
        # bsz * (seq_len + 1) * num_classes

        return out_dec[:, 1:, :]  # bsz * seq_len * num_classes
Пример #7
0
def get_gt_masks(ann_infos):
    """Get ground truth masks and ignored masks.

    Args:
        ann_infos (list[dict]): Each dict contains annotation
            infos of one image, containing following keys:
            masks, masks_ignore.
    Returns:
        gt_masks (list[list[list[int]]]): Ground truth masks.
        gt_masks_ignore (list[list[list[int]]]): Ignored masks.
    """
    assert utils.is_type_list(ann_infos, dict)

    gt_masks = []
    gt_masks_ignore = []
    for ann_info in ann_infos:
        masks = ann_info['masks']
        mask_gt = []
        for mask in masks:
            assert len(mask[0]) >= 8 and len(mask[0]) % 2 == 0
            mask_gt.append(mask[0])
        gt_masks.append(mask_gt)

        masks_ignore = ann_info['masks_ignore']
        mask_gt_ignore = []
        for mask_ignore in masks_ignore:
            assert len(mask_ignore[0]) >= 8 and len(mask_ignore[0]) % 2 == 0
            mask_gt_ignore.append(mask_ignore[0])
        gt_masks_ignore.append(mask_gt_ignore)

    return gt_masks, gt_masks_ignore
Пример #8
0
    def str2tensor(self, strings):
        """Convert text-string to ctc-loss input tensor.

        Args:
            strings (list[str]): ['hello', 'world'].
        Returns:
            dict (str: tensor | list[tensor]):
                tensors (list[tensor]): [torch.Tensor([1,2,3,3,4]),
                    torch.Tensor([5,4,6,3,7])].
                flatten_targets (tensor): torch.Tensor([1,2,3,3,4,5,4,6,3,7]).
                target_lengths (tensor): torch.IntTensot([5,5]).
        """
        assert utils.is_type_list(strings, str)

        tensors = []
        indexes = self.str2idx(strings)
        for index in indexes:
            tensor = torch.IntTensor(index)
            tensors.append(tensor)
        target_lengths = torch.IntTensor([len(t) for t in tensors])
        flatten_target = torch.cat(tensors)

        return {
            'targets': tensors,
            'flatten_targets': flatten_target,
            'target_lengths': target_lengths
        }
Пример #9
0
    def tensor2idx(self, output, img_metas, topk=1, return_topk=False):
        """Convert model output tensor to index-list.
        Args:
            output (tensor): The model outputs with size: N * T * C.
            img_metas (list[dict]): Each dict contains one image info.
            topk (int): The highest k classes to be returned.
            return_topk (bool): Whether to return topk or just top1.
        Returns:
            indexes (list[list[int]]): [[1,2,3,3,4], [5,4,6,3,7]].
            scores (list[list[float]]): [[0.9,0.8,0.95,0.97,0.94],
                [0.9,0.9,0.98,0.97,0.96]]
                (
                    indexes_topk (list[list[list[int]->len=topk]]):
                    scores_topk (list[list[list[float]->len=topk]])
                ).
        """
        assert utils.is_type_list(img_metas, dict)
        assert len(img_metas) == output.size(0)
        assert isinstance(topk, int)
        assert topk >= 1

        valid_ratios = [
            img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
        ]

        batch_size = output.size(0)
        output = F.softmax(output, dim=2)
        output = output.cpu().detach()
        batch_topk_value, batch_topk_idx = output.topk(topk, dim=2)
        batch_max_idx = batch_topk_idx[:, :, 0]
        scores_topk, indexes_topk = [], []
        scores, indexes = [], []
        feat_len = output.size(1)
        for b in range(batch_size):
            valid_ratio = valid_ratios[b]
            decode_len = min(feat_len, math.ceil(feat_len * valid_ratio))
            pred = batch_max_idx[b, :]
            select_idx = []
            prev_idx = self.blank_idx
            for t in range(decode_len):
                tmp_value = pred[t].item()
                if tmp_value not in (prev_idx, self.blank_idx):
                    select_idx.append(t)
                prev_idx = tmp_value
            select_idx = torch.LongTensor(select_idx)
            topk_value = torch.index_select(batch_topk_value[b, :, :], 0,
                                            select_idx)  # valid_seqlen * topk
            topk_idx = torch.index_select(batch_topk_idx[b, :, :], 0,
                                          select_idx)
            topk_idx_list, topk_value_list = topk_idx.numpy().tolist(
            ), topk_value.numpy().tolist()
            indexes_topk.append(topk_idx_list)
            scores_topk.append(topk_value_list)
            indexes.append([x[0] for x in topk_idx_list])
            scores.append([x[0] for x in topk_value_list])

        if return_topk:
            return indexes_topk, scores_topk

        return indexes, scores
Пример #10
0
 def __init__(self,
              datasets,
              separate_eval=True,
              pipeline=None,
              force_apply=False,
              **kwargs):
     new_datasets = []
     if pipeline is not None:
         assert isinstance(
             pipeline,
             list), 'pipeline must be list[dict] or list[list[dict]].'
         if is_type_list(pipeline, dict):
             self._apply_pipeline(datasets, pipeline, force_apply)
             new_datasets = datasets
         elif is_2dlist(pipeline):
             assert is_2dlist(datasets)
             assert len(datasets) == len(pipeline)
             for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
                 self._apply_pipeline(sub_datasets, tmp_pipeline,
                                      force_apply)
                 new_datasets.extend(sub_datasets)
     else:
         if is_2dlist(datasets):
             for sub_datasets in datasets:
                 new_datasets.extend(sub_datasets)
         else:
             new_datasets = datasets
     datasets = [build_dataset(c, kwargs) for c in new_datasets]
     super().__init__(datasets, separate_eval)
Пример #11
0
    def tesseract_recog_inference(self, imgs, **kwargs):
        """Inference image(s) with the tesseract recognizer.

        Args:
            imgs (ndarray or list[ndarray]): image(s) to inference.

        Returns:
            result (dict): Predicted results.
        """
        is_batch = True
        if isinstance(imgs, np.ndarray):
            is_batch = False
            imgs = [imgs]
        assert is_type_list(imgs, np.ndarray)
        api = self.get_tesserocr_api()

        results = []
        for img in imgs:
            image = Image.fromarray(img)
            api.SetImage(image)
            api.SetRectangle(0, 0, img.shape[1], img.shape[0])
            # Remove beginning and trailing spaces from Tesseract
            text = api.GetUTF8Text().strip()
            conf = api.MeanTextConf() / 100
            results.append({'text': text, 'score': conf})

        # close tesserocr api
        api.End()

        if not is_batch:
            return results[0]
        else:
            return results
Пример #12
0
    def str2tensor(self, strings):
        """
        Convert text-string into tensor.
        Args:
            strings (list[str]): ['hello', 'world']
        Returns:
            dict (str: Tensor | list[tensor]):
                tensors (list[Tensor]): [torch.Tensor([1,2,3,3,4]),
                                                    torch.Tensor([5,4,6,3,7])]
                padded_targets (Tensor(bsz * max_seq_len))
        """
        assert utils.is_type_list(strings, str)

        tensors, padded_targets = [], []
        indexes = self.str2idx(strings)
        for index in indexes:
            tensor = torch.LongTensor(index)
            tensors.append(tensor)
            # target tensor for loss
            src_target = torch.LongTensor(tensor.size(0) + 2).fill_(0)
            src_target[-1] = self.end_idx
            src_target[0] = self.start_idx
            src_target[1:-1] = tensor
            padded_target = (torch.ones(self.max_seq_len) *
                             self.padding_idx).long()
            char_num = src_target.size(0)
            if char_num > self.max_seq_len:
                padded_target = src_target[:self.max_seq_len]
            else:
                padded_target[:char_num] = src_target
            padded_targets.append(padded_target)
        padded_targets = torch.stack(padded_targets, 0).long()

        return {'targets': tensors, 'padded_targets': padded_targets}
Пример #13
0
    def forward(self, feat, img_metas=None):
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        valid_ratios = None
        if img_metas is not None:
            valid_ratios = [
                img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
            ] if self.mask else None

        h_feat = feat.size(2)
        feat_v = F.max_pool2d(
            feat, kernel_size=(h_feat, 1), stride=1, padding=0)
        feat_v = feat_v.squeeze(2)  # bsz * C * W
        feat_v = feat_v.permute(0, 2, 1).contiguous()  # bsz * W * C

        holistic_feat = self.rnn_encoder(feat_v)[0]  # bsz * T * C

        if valid_ratios is not None:
            valid_hf = []
            T = holistic_feat.size(1)
            for i, valid_ratio in enumerate(valid_ratios):
                valid_step = min(T, math.ceil(T * valid_ratio)) - 1
                valid_hf.append(holistic_feat[i, valid_step, :])
            valid_hf = torch.stack(valid_hf, dim=0)
        else:
            valid_hf = holistic_feat[:, -1, :]  # bsz * C

        holistic_feat = self.linear(valid_hf)  # bsz * C

        return holistic_feat
Пример #14
0
def show_img_boundary(img, boundary):
    """Show image and instance boundaires.

    Args:
        img (ndarray): The input image.
        boundary (list[float or int]): The input boundary.
    """
    assert isinstance(img, np.ndarray)
    assert utils.is_type_list(boundary, int) or utils.is_type_list(
        boundary, float)

    cv2.polylines(img, [np.array(boundary).astype(np.int32).reshape(-1, 1, 2)],
                  True,
                  color=(0, 255, 0),
                  thickness=1)
    plt.imshow(img)
    plt.show()
Пример #15
0
    def _parse_anno_info(self, annotations):
        """Parse char boxes annotations.
        Args:
            annotations (list[dict]): Annotations of one image, where
                each dict is for one character.

        Returns:
            dict: A dict containing the following keys:

                - chars (list[str]): List of character strings.
                - char_rects (list[list[float]]): List of char box, with each
                    in style of rectangle: [x_min, y_min, x_max, y_max].
                - char_quads (list[list[float]]): List of char box, with each
                    in style of quadrangle: [x1, y1, x2, y2, x3, y3, x4, y4].
        """

        assert utils.is_type_list(annotations, dict)
        assert 'char_box' in annotations[0]
        assert 'char_text' in annotations[0]
        assert len(annotations[0]['char_box']) in [4, 8]

        chars, char_rects, char_quads = [], [], []
        for ann in annotations:
            char_box = ann['char_box']
            if len(char_box) == 4:
                char_box_type = ann.get('char_box_type', 'xyxy')
                if char_box_type == 'xyxy':
                    char_rects.append(char_box)
                    char_quads.append([
                        char_box[0], char_box[1], char_box[2], char_box[1],
                        char_box[2], char_box[3], char_box[0], char_box[3]
                    ])
                elif char_box_type == 'xywh':
                    x1, y1, w, h = char_box
                    x2 = x1 + w
                    y2 = y1 + h
                    char_rects.append([x1, y1, x2, y2])
                    char_quads.append([x1, y1, x2, y1, x2, y2, x1, y2])
                else:
                    raise ValueError(f'invalid char_box_type {char_box_type}')
            elif len(char_box) == 8:
                x_list, y_list = [], []
                for i in range(4):
                    x_list.append(char_box[2 * i])
                    y_list.append(char_box[2 * i + 1])
                x_max, x_min = max(x_list), min(x_list)
                y_max, y_min = max(y_list), min(y_list)
                char_rects.append([x_min, y_min, x_max, y_max])
                char_quads.append(char_box)
            else:
                raise Exception(
                    f'invalid num in char box: {len(char_box)} not in (4, 8)')
            chars.append(ann['char_text'])

        ann = dict(chars=chars, char_rects=char_rects, char_quads=char_quads)

        return ann
Пример #16
0
def warp_img(src_img,
             box,
             jitter_flag=False,
             jitter_ratio_x=0.5,
             jitter_ratio_y=0.1):
    """Crop box area from image using opencv warpPerspective w/o box jitter.

    Args:
        src_img (np.array): Image before cropping.
        box (list[float | int]): Coordinates of quadrangle.
    """
    assert utils.is_type_list(box, float) or utils.is_type_list(box, int)
    assert len(box) == 8

    h, w = src_img.shape[:2]
    points_x = [min(max(x, 0), w) for x in box[0:8:2]]
    points_y = [min(max(y, 0), h) for y in box[1:9:2]]

    points_x, points_y = sort_vertex(points_x, points_y)

    if jitter_flag:
        box_jitter(
            points_x,
            points_y,
            jitter_ratio_x=jitter_ratio_x,
            jitter_ratio_y=jitter_ratio_y)

    points = [Point(points_x[i], points_y[i]) for i in range(4)]
    edges = [
        LineString([points[i], points[i + 1 if i < 3 else 0]])
        for i in range(4)
    ]

    pts1 = np.float32([[points[i].x, points[i].y] for i in range(4)])
    box_width = max(edges[0].length, edges[2].length)
    box_height = max(edges[1].length, edges[3].length)

    pts2 = np.float32([[0, 0], [box_width, 0], [box_width, box_height],
                       [0, box_height]])
    M = cv2.getPerspectiveTransform(pts1, pts2)
    dst_img = cv2.warpPerspective(src_img, M,
                                  (int(box_width), int(box_height)))

    return dst_img
Пример #17
0
    def forward_test(self, feat, out_enc, img_metas):
        """
        Args:
            feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
            out_enc (Tensor): Encoder output of shape
                :math:`(N, D_m, H, W)`.
            img_metas (dict): A dict that contains meta information of input
                images. Preferably with the key ``valid_ratio``.

        Returns:
            Tensor: A raw logit tensor of shape :math:`(N, T, C-1)`.
        """
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        valid_ratios = None
        if img_metas is not None:
            valid_ratios = [
                img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
            ] if self.mask else None

        seq_len = self.max_seq_len

        bsz = feat.size(0)
        start_token = torch.full((bsz, ),
                                 self.start_idx,
                                 device=feat.device,
                                 dtype=torch.long)
        # bsz
        start_token = self.embedding(start_token)
        # bsz * emb_dim
        start_token = start_token.unsqueeze(1).expand(-1, seq_len, -1)
        # bsz * seq_len * emb_dim
        out_enc = out_enc.unsqueeze(1)
        # bsz * 1 * emb_dim
        decoder_input = torch.cat((out_enc, start_token), dim=1)
        # bsz * (seq_len + 1) * emb_dim

        outputs = []
        for i in range(1, seq_len + 1):
            decoder_output = self._2d_attention(decoder_input,
                                                feat,
                                                out_enc,
                                                valid_ratios=valid_ratios)
            char_output = decoder_output[:, i, :]  # bsz * num_classes
            char_output = F.softmax(char_output, -1)
            outputs.append(char_output)
            _, max_idx = torch.max(char_output, dim=1, keepdim=False)
            char_embedding = self.embedding(max_idx)  # bsz * emb_dim
            if i < seq_len:
                decoder_input[:, i + 1, :] = char_embedding

        outputs = torch.stack(outputs, 1)  # bsz * seq_len * num_classes

        return outputs
Пример #18
0
    def __init__(self, max_ratio=None, box_type=None):
        if max_ratio is None:
            max_ratio = [0.1, 0.2, 0.1, 0.2]
        else:
            assert utils.is_type_list(max_ratio, float)
            assert len(max_ratio) == 4
        assert box_type is None or box_type in ('char_rects', 'char_quads')

        self.max_ratio = max_ratio
        self.box_type = box_type
Пример #19
0
def sort_vertex(points_x, points_y):
    """Sort box vertices in clockwise order from left-top first.

    Args:
        points_x (list[float]): x of four vertices.
        points_y (list[float]): y of four vertices.
    Returns:
        sorted_points_x (list[float]): x of sorted four vertices.
        sorted_points_y (list[float]): y of sorted four vertices.
    """
    assert utils.is_type_list(points_x, (float, int))
    assert utils.is_type_list(points_y, (float, int))
    assert len(points_x) == 4
    assert len(points_y) == 4
    vertices = np.stack((points_x, points_y), axis=-1).astype(np.float32)
    vertices = _sort_vertex(vertices)
    sorted_points_x = list(vertices[:, 0])
    sorted_points_y = list(vertices[:, 1])
    return sorted_points_x, sorted_points_y
Пример #20
0
def crop_img(src_img,
             box,
             long_edge_pad_ratio=0.4,
             short_edge_pad_ratio=0.2,
             debug=False):
    """Crop text region with their bounding box.

    Args:
        src_img (np.array): The original image.
        box (list[float | int]): Points of quadrangle.
        long_edge_pad_ratio (float): Box pad ratio for long edge
            corresponding to font size.
        short_edge_pad_ratio (float): Box pad ratio for short edge
            corresponding to font size.
    """
    assert utils.is_type_list(box, float) or utils.is_type_list(box, int)
    assert len(box) == 8
    assert 0. <= long_edge_pad_ratio < 1.0
    assert 0. <= short_edge_pad_ratio < 1.0

    h, w = src_img.shape[:2]
    points_x = np.clip(np.array(box[0::2]), 0, w)
    points_y = np.clip(np.array(box[1::2]), 0, h)

    box_width = np.max(points_x) - np.min(points_x)
    box_height = np.max(points_y) - np.min(points_y)
    font_size = min(box_height, box_width)

    if box_height < box_width:
        horizontal_pad = long_edge_pad_ratio * font_size
        vertical_pad = short_edge_pad_ratio * font_size
    else:
        horizontal_pad = short_edge_pad_ratio * font_size
        vertical_pad = long_edge_pad_ratio * font_size

    left = np.clip(int(np.min(points_x) - horizontal_pad), 0, w)
    top = np.clip(int(np.min(points_y) - vertical_pad), 0, h)
    right = np.clip(int(np.max(points_x) + horizontal_pad), 0, w)
    bottom = np.clip(int(np.max(points_y) + vertical_pad), 0, h)

    dst_img = src_img[top:bottom, left:right]

    return dst_img
Пример #21
0
    def forward_train(self, feat, out_enc, targets_dict, img_metas=None):
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        valid_ratios = None
        if img_metas is not None:
            valid_ratios = [
                img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
            ] if self.mask else None

        if self.train_mode:
            targets = targets_dict['padded_targets'].to(feat.device)
            tgt_embedding = self.embedding(targets)

        outputs = []
        start_token = torch.full((feat.size(0), ),
                                 self.start_idx,
                                 device=feat.device,
                                 dtype=torch.long)
        start_token = self.embedding(start_token)
        for i in range(-1, self.max_seq_len):
            if i == -1:
                if self.dec_gru:
                    hx1 = cx1 = self.rnn_decoder_layer1(out_enc)
                    hx2 = cx2 = self.rnn_decoder_layer2(hx1)
                else:
                    hx1, cx1 = self.rnn_decoder_layer1(out_enc)
                    hx2, cx2 = self.rnn_decoder_layer2(hx1)
                if not self.train_mode:
                    y_prev = start_token
            else:
                if self.train_mode:
                    y_prev = tgt_embedding[:, i, :]
                y, hx1, cx1, hx2, cx2 = self._2d_attention(
                    y_prev,
                    feat,
                    out_enc,
                    hx1,
                    cx1,
                    hx2,
                    cx2,
                    valid_ratios=valid_ratios)
                if self.train_mode:
                    y = self.pred_dropout(y)
                else:
                    y = F.softmax(y, -1)
                    _, max_idx = torch.max(y, dim=1, keepdim=False)
                    char_embedding = self.embedding(max_idx)
                    y_prev = char_embedding
                outputs.append(y)

        outputs = torch.stack(outputs, 1)

        return outputs
Пример #22
0
    def __init__(self,
                 in_channels=3,
                 stem_channels=32,
                 base_channels=32,
                 arch_settings=[3, 4, 6, 6, 3],
                 strides=[2, 1, 2, 1, 1],
                 out_indices=None,
                 last_stage_pool=False,
                 init_cfg=[
                     dict(type='Xavier', layer='Conv2d'),
                     dict(type='Constant', val=1, layer='BatchNorm2d')
                 ]):
        super().__init__(init_cfg=init_cfg)
        assert isinstance(in_channels, int)
        assert isinstance(stem_channels, int)
        assert utils.is_type_list(arch_settings, int)
        assert utils.is_type_list(strides, int)
        assert len(arch_settings) == len(strides)
        assert out_indices is None or isinstance(out_indices, (list, tuple))
        assert isinstance(last_stage_pool, bool)

        self.out_indices = out_indices
        self.last_stage_pool = last_stage_pool
        self.block = BasicBlock
        self.inplanes = stem_channels

        self._make_stem_layer(in_channels, stem_channels)

        self.res_layers = []
        planes = base_channels
        for i, num_blocks in enumerate(arch_settings):
            stride = strides[i]
            res_layer = self._make_layer(block=self.block,
                                         inplanes=self.inplanes,
                                         planes=planes,
                                         blocks=num_blocks,
                                         stride=stride)
            self.inplanes = planes * self.block.expansion
            planes *= 2
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)
Пример #23
0
def convert_canonical(points_x, points_y):
    """Make left-top be first.

    Args:
        points_x (list[float]): x of four vertices.
        points_y (list[float]): y of four vertices.
    Returns:
        sorted_points_x (list[float]): x of sorted four vertices.
        sorted_points_y (list[float]): y of sorted four vertices.
    """
    assert utils.is_type_list(points_x, float) or utils.is_type_list(
        points_x, int)
    assert utils.is_type_list(points_y, float) or utils.is_type_list(
        points_y, int)
    assert len(points_x) == 4
    assert len(points_y) == 4

    points = [Point(points_x[i], points_y[i]) for i in range(4)]

    polygon = Polygon([(p.x, p.y) for p in points])
    min_x, min_y, _, _ = polygon.bounds
    points_to_lefttop = [
        LineString([points[i], Point(min_x, min_y)]) for i in range(4)
    ]
    distances = np.array([line.length for line in points_to_lefttop])
    sort_dist_idx = np.argsort(distances)
    lefttop_idx = sort_dist_idx[0]

    if lefttop_idx == 0:
        point_orders = [0, 1, 2, 3]
    elif lefttop_idx == 1:
        point_orders = [1, 2, 3, 0]
    elif lefttop_idx == 2:
        point_orders = [2, 3, 0, 1]
    else:
        point_orders = [3, 0, 1, 2]

    sorted_points_x = [points_x[i] for i in point_orders]
    sorted_points_y = [points_y[j] for j in point_orders]

    return sorted_points_x, sorted_points_y
Пример #24
0
def crop_img(src_img, box):
    """Crop box area to rectangle.

    Args:
        src_img (np.array): Image before crop.
        box (list[float | int]): Points of quadrangle.
    """
    assert utils.is_type_list(box, float) or utils.is_type_list(box, int)
    assert len(box) == 8

    h, w = src_img.shape[:2]
    points_x = [min(max(x, 0), w) for x in box[0:8:2]]
    points_y = [min(max(y, 0), h) for y in box[1:9:2]]

    left = int(min(points_x))
    top = int(min(points_y))
    right = int(max(points_x))
    bottom = int(max(points_y))

    dst_img = src_img[top:bottom, left:right]

    return dst_img
Пример #25
0
    def evaluate(self,
                 results,
                 metric='hmean-iou',
                 logger=None,
                 score_thr=None,
                 min_score_thr=0.3,
                 max_score_thr=0.9,
                 step=0.1,
                 rank_list=None,
                 **kwargs):
        """Evaluate the hmean metric.

        Args:
            results (list[dict]): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            score_thr (float): Deprecated. Please use min_score_thr instead.
            min_score_thr (float): Minimum score threshold of prediction map.
            max_score_thr (float): Maximum score threshold of prediction map.
            step (float): The spacing between score thresholds.
            rank_list (str): json file used to save eval result
                of each image after ranking.
        Returns:
            dict[dict[str: float]]: The evaluation results.
        """
        assert utils.is_type_list(results, dict)

        metrics = metric if isinstance(metric, list) else [metric]
        allowed_metrics = ['hmean-iou', 'hmean-ic13']
        metrics = set(metrics) & set(allowed_metrics)

        img_infos = []
        ann_infos = []
        for i in range(len(self)):
            img_info = {'filename': self.data_infos[i]['file_name']}
            img_infos.append(img_info)
            ann_infos.append(self.get_ann_info(i))

        eval_results = eval_hmean(results,
                                  img_infos,
                                  ann_infos,
                                  metrics=metrics,
                                  score_thr=score_thr,
                                  min_score_thr=min_score_thr,
                                  max_score_thr=max_score_thr,
                                  step=step,
                                  logger=logger,
                                  rank_list=rank_list)

        return eval_results
Пример #26
0
    def __init__(self,
                 box_keys=['x1', 'y1', 'x2', 'y2', 'x3', 'y3', 'x4', 'y4'],
                 jitter_prob=0.5,
                 max_jitter_ratio_x=0.05,
                 max_jitter_ratio_y=0.02):
        assert utils.is_type_list(box_keys, str)
        assert 0 <= jitter_prob <= 1
        assert 0 <= max_jitter_ratio_x <= 1
        assert 0 <= max_jitter_ratio_y <= 1

        self.box_keys = box_keys
        self.jitter_prob = jitter_prob
        self.max_jitter_ratio_x = max_jitter_ratio_x
        self.max_jitter_ratio_y = max_jitter_ratio_y
Пример #27
0
    def _parse_anno_info(self, annotations):
        """Parse annotations of boxes, texts and labels for one image.
        Args:
            annotations (list[dict]): Annotations of one image, where
                each dict is for one character.

        Returns:
            dict: A dict containing the following keys:

                - bboxes (np.ndarray): Bbox in one image with shape:
                    box_num * 4.
                - relations (np.ndarray): Relations between bbox with shape:
                    box_num * box_num * D.
                - texts (np.ndarray): Text index with shape:
                    box_num * text_max_len.
                - labels (np.ndarray): Box Labels with shape:
                    box_num * (box_num + 1).
        """

        assert utils.is_type_list(annotations, dict)
        assert 'box' in annotations[0]
        assert 'text' in annotations[0]
        assert 'label' in annotations[0]

        boxes, texts, text_inds, labels, edges = [], [], [], [], []
        for ann in annotations:
            box = ann['box']
            x_list, y_list = box[0:8:2], box[1:9:2]
            sorted_x_list, sorted_y_list = sort_vertex(x_list, y_list)
            sorted_box = []
            for x, y in zip(sorted_x_list, sorted_y_list):
                sorted_box.append(x)
                sorted_box.append(y)
            boxes.append(sorted_box)
            text = ann['text']
            texts.append(ann['text'])
            text_ind = [self.dict[c] for c in text if c in self.dict]
            text_inds.append(text_ind)
            labels.append(ann['label'])
            edges.append(ann.get('edge', 0))

        ann_infos = dict(
            boxes=boxes,
            texts=texts,
            text_inds=text_inds,
            edges=edges,
            labels=labels)

        return self.list_to_numpy(ann_infos)
Пример #28
0
    def forward_test(self, feat, out_enc, img_metas):
        """
        Args:
            feat (Tensor): Tensor of shape :math:`(N, D_i, H, W)`.
            out_enc (Tensor): Encoder output of shape
                :math:`(N, D_m, H, W)`.
            img_metas (dict): A dict that contains meta information of input
                images. Preferably with the key ``valid_ratio``.

        Returns:
            Tensor: A raw logit tensor of shape :math:`(N, T, C-1)`.
        """
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        return self.forward_train(feat, out_enc, None, img_metas)
Пример #29
0
    def __init__(self,
                 datasets,
                 separate_eval=True,
                 show_mean_scores='auto',
                 pipeline=None,
                 force_apply=False,
                 **kwargs):
        new_datasets = []
        if pipeline is not None:
            assert isinstance(
                pipeline,
                list), 'pipeline must be list[dict] or list[list[dict]].'
            if is_type_list(pipeline, dict):
                self._apply_pipeline(datasets, pipeline, force_apply)
                new_datasets = datasets
            elif is_2dlist(pipeline):
                assert is_2dlist(datasets)
                assert len(datasets) == len(pipeline)
                for sub_datasets, tmp_pipeline in zip(datasets, pipeline):
                    self._apply_pipeline(sub_datasets, tmp_pipeline,
                                         force_apply)
                    new_datasets.extend(sub_datasets)
        else:
            if is_2dlist(datasets):
                for sub_datasets in datasets:
                    new_datasets.extend(sub_datasets)
            else:
                new_datasets = datasets
        datasets = [build_dataset(c, kwargs) for c in new_datasets]
        super().__init__(datasets, separate_eval)

        if not separate_eval:
            raise NotImplementedError(
                'Evaluating datasets as a whole is not'
                ' supported yet. Please use "separate_eval=True"')

        assert isinstance(show_mean_scores, bool) or show_mean_scores == 'auto'
        if show_mean_scores == 'auto':
            show_mean_scores = len(self.datasets) > 1
        self.show_mean_scores = show_mean_scores
        if show_mean_scores is True or show_mean_scores == 'auto' and len(
                self.datasets) > 1:
            if len(set([type(ds) for ds in self.datasets])) != 1:
                raise NotImplementedError(
                    'To compute mean evaluation scores, all datasets'
                    'must have the same type')
Пример #30
0
    def forward_test(self, feat, out_enc, img_metas):
        if img_metas is not None:
            assert utils.is_type_list(img_metas, dict)
            assert len(img_metas) == feat.size(0)

        valid_ratios = None
        if img_metas is not None:
            valid_ratios = [
                img_meta.get('valid_ratio', 1.0) for img_meta in img_metas
            ] if self.mask else None

        seq_len = self.max_seq_len

        bsz = feat.size(0)
        start_token = torch.full((bsz, ),
                                 self.start_idx,
                                 device=feat.device,
                                 dtype=torch.long)
        # bsz
        start_token = self.embedding(start_token)
        # bsz * emb_dim
        start_token = start_token.unsqueeze(1).expand(-1, seq_len, -1)
        # bsz * seq_len * emb_dim
        out_enc = out_enc.unsqueeze(1)
        # bsz * 1 * emb_dim
        decoder_input = torch.cat((out_enc, start_token), dim=1)
        # bsz * (seq_len + 1) * emb_dim

        outputs = []
        for i in range(1, seq_len + 1):
            decoder_output = self._2d_attention(decoder_input,
                                                feat,
                                                out_enc,
                                                valid_ratios=valid_ratios)
            char_output = decoder_output[:, i, :]  # bsz * num_classes
            char_output = F.softmax(char_output, -1)
            outputs.append(char_output)
            _, max_idx = torch.max(char_output, dim=1, keepdim=False)
            char_embedding = self.embedding(max_idx)  # bsz * emb_dim
            if i < seq_len:
                decoder_input[:, i + 1, :] = char_embedding

        outputs = torch.stack(outputs, 1)  # bsz * seq_len * num_classes

        return outputs