示例#1
0
 def get_annot_from_image_data(self, image_data: ImageData) -> Dict:
     image_data = self.filter_image_data(image_data)
     image = image_data.open_image()
     height, width, _ = image.shape
     annot = {
         "description":
         "",
         "tags": [],
         "size": {
             "height": height,
             "width": width
         },
         "objects": [{
             "description":
             "",
             "geometryType":
             "rectangle",
             "tags": [{
                 "name": str(bbox_data.label),
                 "value": None,
             }],
             "classTitle":
             "bbox",
             "points": {
                 "exterior": [[int(bbox_data.xmin),
                               int(bbox_data.ymin)],
                              [int(bbox_data.xmax),
                               int(bbox_data.ymax)]],
                 "interior": []
             }
         } for bbox_data in image_data.bboxes_data]
     }
     return annot
示例#2
0
def apply_perspective_transform_to_image_data(
        image_data: ImageData, perspective_matrix: np.ndarray,
        result_width: int, result_height: int,
        allow_negative_and_large_coords: bool,
        remove_bad_coords: bool) -> ImageData:
    image = image_data.open_image()
    image = cv2.warpPerspective(image, perspective_matrix,
                                (result_width, result_height))

    image_data = copy.deepcopy(image_data)
    image_data.keypoints = apply_perspective_transform_to_points(
        image_data.keypoints, perspective_matrix, result_width, result_height,
        allow_negative_and_large_coords, remove_bad_coords)
    image_data.bboxes_data = [
        _apply_perspective_transform_to_bbox_data(
            bbox_data, perspective_matrix, result_width, result_height,
            allow_negative_and_large_coords, remove_bad_coords)
        for bbox_data in image_data.bboxes_data
    ]
    image_data.bboxes_data = [
        bbox_data for bbox_data in image_data.bboxes_data
        if bbox_data is not None
    ]
    image_data.image_path = None
    image_data.image = image

    return image_data
示例#3
0
def thumbnail_image_data(image_data: ImageData,
                         size: Tuple[int, int],
                         resample: Optional[int] = None) -> ImageData:
    image = image_data.open_image()
    new_width, new_height = get_thumbnail_resize(Image.fromarray(image), size)
    return resize_image_data(image_data, (new_width, new_height),
                             resample=resample)
示例#4
0
def convert_image_data_to_polygon_label(
    image_data: ImageData,
    from_name: str,
    polygonlabels: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        rectangle_labels.append({
            "original_width": im_width,
            "original_height": im_height,
            "image_rotation": 0,
            "value": {
                "points": [[x * 100 / im_width, y * 100 / im_height]
                           for x, y in bbox_data.keypoints],
                "polygonlabels": [polygonlabels]
            },
            "from_name": from_name,
            "to_name": "image",
            "type": "polygonlabels"
        })
    return rectangle_labels
示例#5
0
def convert_image_data_to_rectangle_labels(
    image_data: ImageData,
    from_name: str,
    to_name: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        rectangle_labels.append({
            "original_width": im_width,
            "original_height": im_height,
            "image_rotation": 0,
            "value": {
                "x": bbox_data.xmin / im_width * 100,
                "y": bbox_data.ymin / im_height * 100,
                "width": (bbox_data.xmax - bbox_data.xmin) / im_width * 100,
                "height": (bbox_data.ymax - bbox_data.ymin) / im_height * 100,
                "rotation": 0,
                "rectanglelabels": [bbox_data.label]
            },
            "from_name": from_name,
            "to_name": to_name,
            "type": "rectanglelabels"
        })
    return rectangle_labels
示例#6
0
def convert_image_data_to_keypoint_label(
    image_data: ImageData,
    from_name: str,
    keypointlabels: str,
) -> Dict:
    if image_data.image_path is not None:
        im_width, im_height = imagesize.get(image_data.image_path)
    else:
        im_height, im_width, _ = image_data.open_image().shape
    rectangle_labels = []
    for bbox_data in image_data.bboxes_data:
        for keypoint in bbox_data.keypoints:
            x, y = keypoint[0], keypoint[1]
            rectangle_labels.append({
                "original_width": im_width,
                "original_height": im_height,
                "image_rotation": 0,
                "value": {
                    "x": x * 100 / im_width,
                    "y": y * 100 / im_height,
                    "width": 0.55,
                    "keypointlabels": [keypointlabels]
                },
                "from_name": from_name,
                "to_name": "image",
                "type": "keypointlabels"
            })
    return rectangle_labels
示例#7
0
def rotate_image_data(image_data: ImageData,
                      angle: float,
                      border_mode: Optional[int] = None,
                      border_value: Tuple[int, int, int] = None):
    if abs(angle) <= 1e-6:
        return image_data

    image = image_data.open_image()
    height, width, _ = image.shape
    image_center = width // 2, height // 2

    angle_to_factor = {0: 0, 90: 1, 180: 2, 270: 3}
    angle = angle % 360
    rotated_image_data = copy.deepcopy(image_data)

    if angle in angle_to_factor:
        factor = angle_to_factor[angle]
        rotated_image = np.rot90(image, factor)
        rotated_image_data.keypoints = rotate_keypoints90(
            image_data.keypoints, factor, width, height)
        rotated_image_data.bboxes_data = [
            _rotate_bbox_data90(bbox_data, factor, width, height)
            for bbox_data in rotated_image_data.bboxes_data
        ]
    else:
        # grab the rotation matrix
        rotation_mat = cv2.getRotationMatrix2D(image_center, angle, 1.)
        # compute the new bounding dimensions of the image
        abs_cos = abs(rotation_mat[0, 0])
        abs_sin = abs(rotation_mat[0, 1])
        bound_w = int(height * abs_sin + width * abs_cos)
        bound_h = int(height * abs_cos + width * abs_sin)
        # adjust the rotation matrix to take into account translation
        rotation_mat[0, 2] += bound_w / 2 - image_center[0]
        rotation_mat[1, 2] += bound_h / 2 - image_center[1]

        rotated_image = cv2.warpAffine(image,
                                       rotation_mat, (bound_w, bound_h),
                                       borderMode=border_mode,
                                       borderValue=border_value)
        new_height, new_width, _ = rotated_image.shape
        rotated_image_data = copy.deepcopy(image_data)
        rotated_image_data.keypoints = rotate_keypoints(
            image_data.keypoints, rotation_mat, new_height, new_width)
        keypoints = []
        for (x, y) in rotated_image_data.keypoints:
            x = max(0, min(x, new_width - 1))
            y = max(0, min(y, new_height - 1))
            keypoints.append([x, y])
        rotated_image_data.keypoints = np.array(keypoints).reshape(-1, 2)
        rotated_image_data.bboxes_data = [
            _rotate_bbox_data(bbox_data, rotation_mat, new_height, new_width)
            for bbox_data in rotated_image_data.bboxes_data
        ]

    rotated_image_data.image_path = None  # It applies to all bboxes_data inside
    rotated_image_data.image = rotated_image

    return rotated_image_data
示例#8
0
def resize_image_data(image_data: ImageData,
                      size: Tuple[int, int],
                      resample: Optional[int] = None) -> ImageData:
    image_data = copy.deepcopy(image_data)
    image = image_data.open_image()
    old_height, old_width, _ = image.shape
    image = Image.fromarray(image)
    image = image.resize(size, resample=resample)
    image = np.array(image)
    new_height, new_width, _ = image.shape

    def resize_coords(bbox_data: BboxData):
        bbox_data.xmin = max(
            0, min(int(bbox_data.xmin * (new_width / old_width)),
                   new_width - 1))
        bbox_data.ymin = max(
            0,
            min(int(bbox_data.ymin * (new_height / old_height)),
                new_height - 1))
        bbox_data.xmax = max(
            0, min(int(bbox_data.xmax * (new_width / old_width)),
                   new_width - 1))
        bbox_data.ymax = max(
            0,
            min(int(bbox_data.ymax * (new_height / old_height)),
                new_height - 1))
        bbox_data.keypoints[:, 0] = (bbox_data.keypoints[:, 0] *
                                     (new_width / old_width)).astype(int)
        bbox_data.keypoints[:, 1] = (bbox_data.keypoints[:, 1] *
                                     (new_height / old_height)).astype(int)
        bbox_data.keypoints = bbox_data.keypoints.astype(int)
        bbox_data.cropped_image = None
        keypoints = []
        for (x, y) in bbox_data.keypoints:
            x = max(0, min(x, new_width - 1))
            y = max(0, min(y, new_height - 1))
            keypoints.append([x, y])
        bbox_data.keypoints = np.array(keypoints).reshape(-1, 2)
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            resize_coords(additional_bbox_data)

    for bbox_data in image_data.bboxes_data:
        resize_coords(bbox_data)
    image_data.keypoints[:, 0] = (image_data.keypoints[:, 0] *
                                  (new_width / old_width)).astype(int)
    image_data.keypoints[:, 1] = (image_data.keypoints[:, 1] *
                                  (new_height / old_height)).astype(int)
    keypoints = []
    for (x, y) in image_data.keypoints:
        x = max(0, min(x, new_width - 1))
        y = max(0, min(y, new_height - 1))
        keypoints.append([x, y])
    image_data.keypoints = np.array(keypoints).reshape(-1, 2)
    image_data.image_path = None
    image_data.image = image

    return image_data
def tf_record_from_image_data(image_data: ImageData,
                              label_map: Dict[str, int],
                              use_thumbnail: Tuple[int, int] = None):
    filename = image_data.image_path
    encoded_filename = str(filename).encode('utf8')

    true_bboxes = np.array(
        [[bbox_data.xmin, bbox_data.ymin, bbox_data.xmax, bbox_data.ymax]
         for bbox_data in image_data.bboxes_data],
        dtype=float)
    image = image_data.open_image()
    height, width, _ = image.shape
    if len(true_bboxes) > 0:
        normalized_true_bboxes = true_bboxes.copy()
        normalized_true_bboxes[:, [0, 2]] /= width
        normalized_true_bboxes[:, [1, 3]] /= height
        xmins = normalized_true_bboxes[:, 0]
        ymins = normalized_true_bboxes[:, 1]
        xmaxs = normalized_true_bboxes[:, 2]
        ymaxs = normalized_true_bboxes[:, 3]
    else:
        ymins, xmins, ymaxs, xmaxs = [], [], [], []

    encoded_jpg = BytesIO()
    image = Image.fromarray(image)
    if use_thumbnail:
        image.thumbnail(use_thumbnail)
    image.save(encoded_jpg, format='JPEG')
    encoded_jpg = encoded_jpg.getvalue()
    image_format = b'jpg'

    class_names = [bbox_data.label for bbox_data in image_data.bboxes_data]
    encoded_class_names = [
        class_name.encode('utf-8') for class_name in class_names
    ]
    classes = [label_map[class_name] for class_name in class_names]

    tf_record = create_tf_record(height=height,
                                 width=width,
                                 encoded_filename=encoded_filename,
                                 encoded_jpg=encoded_jpg,
                                 image_format=image_format,
                                 xmins=xmins,
                                 ymins=ymins,
                                 xmaxs=xmaxs,
                                 ymaxs=ymaxs,
                                 encoded_class_names=encoded_class_names,
                                 classes=classes)
    return tf_record
示例#10
0
def concat_images_data(image_data_a: ImageData,
                       image_data_b: ImageData,
                       background_color_a: Tuple[int, int, int, int] = None,
                       background_color_b: Tuple[int, int, int, int] = None,
                       thumbnail_size_a: Tuple[int, int] = None,
                       thumbnail_size_b: Tuple[int, int] = None,
                       how: Literal['horizontally',
                                    'vertically'] = 'horizontally',
                       mode: Literal['L', 'RGB', 'RGBA'] = 'RGBA',
                       background_edge_width: int = 3,
                       between_edge_width: int = 0) -> ImageData:

    image_data_a = copy.deepcopy(image_data_a)
    image_data_b = copy.deepcopy(image_data_b)

    if image_data_a is None and image_data_b is not None:
        return image_data_b
    if image_data_a is not None and image_data_b is None:
        return image_data_a

    image_a = image_data_a.open_image()
    image_b = image_data_b.open_image()

    ha, wa = image_a.shape[:2]
    hb, wb = image_b.shape[:2]

    image = concat_images(image_a=image_a,
                          image_b=image_b,
                          background_color_a=background_color_a,
                          background_color_b=background_color_b,
                          thumbnail_size_a=thumbnail_size_a,
                          thumbnail_size_b=thumbnail_size_b,
                          how=how,
                          mode=mode,
                          background_edge_width=background_edge_width,
                          between_edge_width=between_edge_width)
    image_data_a_new_xmin, image_data_a_new_ymin = None, None
    image_data_b_new_xmin, image_data_b_new_ymin = None, None

    if how == 'horizontally':
        max_height = np.max([ha, hb])
        min_ha = max_height // 2 - ha // 2
        max_ha = max_height // 2 + ha // 2
        min_hb = max_height // 2 - hb // 2
        max_hb = max_height // 2 + hb // 2
        image_data_a_new_xmin = 0
        image_data_a_new_ymin = min_ha
        image_data_a_new_xmax = wa
        image_data_a_new_ymax = max_ha
        image_data_b_new_xmin = wa + between_edge_width
        image_data_b_new_ymin = min_hb
        image_data_b_new_xmax = wa + between_edge_width + wb
        image_data_b_new_ymax = max_hb

    elif how == 'vertically':
        max_width = np.max([wa, wb])
        min_wa = max_width // 2 - wa // 2
        max_wa = max_width // 2 + wa // 2
        min_wb = max_width // 2 - wb // 2
        max_wb = max_width // 2 + wb // 2
        image_data_a_new_xmin = min_wa
        image_data_a_new_ymin = 0
        image_data_a_new_xmax = max_wa
        image_data_a_new_ymax = ha
        image_data_b_new_xmin = min_wb
        image_data_b_new_ymin = ha + between_edge_width
        image_data_b_new_xmax = max_wb
        image_data_b_new_ymax = ha + between_edge_width + hb

    keypoints_a = image_data_a.keypoints
    keypoints_b = image_data_a.keypoints
    keypoints_a[:, 0] += image_data_a_new_xmin
    keypoints_a[:, 1] += image_data_a_new_ymin
    keypoints_b[:, 0] += image_data_b_new_xmin
    keypoints_b[:, 1] += image_data_b_new_ymin

    def _get_new_coords_for_bbox_data(bbox_data: BboxData, xmin: int,
                                      ymin: int):
        bbox_data.keypoints[:, 0] += xmin
        bbox_data.keypoints[:, 1] += ymin
        bbox_data.xmin += xmin
        bbox_data.ymin += ymin
        bbox_data.xmax += xmin
        bbox_data.ymax += ymin
        bbox_data.image = None
        bbox_data.image_path = None
        bbox_data.cropped_image = None
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            _get_new_coords_for_bbox_data(additional_bbox_data, xmin, ymin)

    for bbox_data in image_data_a.bboxes_data:
        _get_new_coords_for_bbox_data(bbox_data, image_data_a_new_xmin,
                                      image_data_a_new_ymin)

    if 'concat_images_data__image_data' not in [
            bbox_data.label for bbox_data in image_data_a.bboxes_data
    ]:
        bbox_data_a_into = [
            BboxData(xmin=image_data_a_new_xmin,
                     ymin=image_data_a_new_ymin,
                     xmax=image_data_a_new_xmax,
                     ymax=image_data_a_new_ymax,
                     label='concat_images_data__image_data',
                     additional_bboxes_data=[
                         bbox_data for bbox_data in image_data_a.bboxes_data
                         if 'concat_images_data__image_data' != bbox_data.label
                     ])
        ]
    else:
        bbox_data_a_into = []
    image_data_a.bboxes_data = [
        bbox_data for bbox_data in image_data_a.bboxes_data
        if 'concat_images_data__image_data' == bbox_data.label
    ] + bbox_data_a_into

    for bbox_data in image_data_b.bboxes_data:
        _get_new_coords_for_bbox_data(bbox_data, image_data_b_new_xmin,
                                      image_data_b_new_ymin)
    if 'concat_images_data__image_data' not in [
            bbox_data.label for bbox_data in image_data_b.bboxes_data
    ]:
        bbox_data_b_into = [
            BboxData(xmin=image_data_b_new_xmin,
                     ymin=image_data_b_new_ymin,
                     xmax=image_data_b_new_xmax,
                     ymax=image_data_b_new_ymax,
                     label='concat_images_data__image_data',
                     additional_bboxes_data=[
                         bbox_data for bbox_data in image_data_b.bboxes_data
                         if 'concat_images_data__image_data' != bbox_data.label
                     ])
        ]
    else:
        bbox_data_b_into = []
    image_data_b.bboxes_data = [
        bbox_data for bbox_data in image_data_b.bboxes_data
        if 'concat_images_data__image_data' == bbox_data.label
    ] + bbox_data_b_into

    image_data = ImageData(image_path=None,
                           image=image,
                           bboxes_data=image_data_a.bboxes_data +
                           image_data_b.bboxes_data,
                           label=None,
                           keypoints=np.concatenate([keypoints_a, keypoints_b],
                                                    axis=0),
                           additional_info={
                               **image_data_a.additional_info,
                               **image_data_b.additional_info
                           })

    return image_data
示例#11
0
def crop_image_data(
    image_data: ImageData,
    xmin: int,
    ymin: int,
    xmax: int,
    ymax: int,
    allow_negative_and_large_coords: bool,
    remove_bad_coords: bool,
) -> ImageData:

    assert 0 <= xmin and 0 <= ymin
    assert xmin <= xmax and ymin <= ymax

    image_data = copy.deepcopy(image_data)
    image = image_data.open_image()
    height, width, _ = image.shape

    assert xmax <= width and ymax <= height

    image = image[ymin:ymax, xmin:xmax]
    new_height, new_width, _ = image.shape

    def resize_coords(bbox_data: BboxData):
        bbox_data.xmin = bbox_data.xmin - xmin
        bbox_data.ymin = bbox_data.ymin - ymin
        bbox_data.xmax = bbox_data.xmax - xmin
        bbox_data.ymax = bbox_data.ymax - ymin
        bbox_data.keypoints[:, 0] -= xmin
        bbox_data.keypoints[:, 1] -= ymin
        bbox_data.cropped_image = None
        if not allow_negative_and_large_coords:
            bbox_data.xmin = max(0, min(bbox_data.xmin, new_width - 1))
            bbox_data.ymin = max(0, min(bbox_data.ymin, new_height - 1))
            bbox_data.xmax = max(0, min(bbox_data.xmax, new_width - 1))
            bbox_data.ymax = max(0, min(bbox_data.ymax, new_height - 1))
            keypoints = []
            for (x, y) in bbox_data.keypoints:
                x = max(0, min(x, new_width - 1))
                y = max(0, min(y, new_height - 1))
                keypoints.append([x, y])
            bbox_data.keypoints = np.array(keypoints).reshape(-1, 2)
        for additional_bbox_data in bbox_data.additional_bboxes_data:
            resize_coords(additional_bbox_data)

    for bbox_data in image_data.bboxes_data:
        resize_coords(bbox_data)

    keypoints = []
    for (x, y) in image_data.keypoints:
        x = max(0, min(x - xmin, new_width - 1))
        y = max(0, min(y - ymin, new_height - 1))
        keypoints.append([x, y])
    image_data.keypoints = np.array(keypoints).reshape(-1, 2)

    def if_bbox_data_inside_crop(bbox_data: BboxData):
        bbox_data.keypoints = bbox_data.keypoints[(
            (bbox_data.keypoints[:, 0] >= 0) & (bbox_data.keypoints[:, 1] >= 0)
            & (bbox_data.keypoints[:, 0] < new_height) &
            (bbox_data.keypoints[:, 1] < new_width))]
        bbox_data.additional_bboxes_data = [
            additional_bbox_data
            for additional_bbox_data in bbox_data.additional_bboxes_data
            if if_bbox_data_inside_crop(additional_bbox_data)
        ]
        return (bbox_data.xmin >= 0 and bbox_data.ymin >= 0
                and bbox_data.xmax < new_width and bbox_data.ymax < new_height
                and bbox_data.xmin < bbox_data.xmax
                and bbox_data.ymin < bbox_data.ymax)

    if remove_bad_coords:
        image_data.bboxes_data = [
            bbox_data for bbox_data in image_data.bboxes_data
            if if_bbox_data_inside_crop(bbox_data)
        ]
        image_data.keypoints = image_data.keypoints[(
            (image_data.keypoints[:, 0] >= 0) &
            (image_data.keypoints[:, 1] >= 0) &
            (image_data.keypoints[:, 0] < new_height) &
            (image_data.keypoints[:, 1] < new_width))]

    image_data.image_path = None
    image_data.image = image

    return image_data
示例#12
0
def visualize_image_data(
    image_data: ImageData,
    use_labels: bool = False,
    score_type: Literal['detection', 'classification'] = None,
    filter_by_labels: List[str] = None,
    known_labels: List[str] = None,
    draw_base_labels_with_given_label_to_base_label_image: Callable[[str], np.ndarray] = None,
    keypoints_radius: int = 5,
    include_additional_bboxes_data: bool = False,
    additional_bboxes_data_depth: Optional[int] = None,
    fontsize: int = 24,
    thickness: int = 4,
    return_as_pil_image: bool = False
) -> Union[np.ndarray, Image.Image]:
    image_data = get_image_data_filtered_by_labels(
        image_data=image_data,
        filter_by_labels=filter_by_labels
    )
    image = image_data.open_image()
    if include_additional_bboxes_data:
        bboxes_data = []

        def recursive_get_bboxes_data(bbox_data, depth):
            if additional_bboxes_data_depth is not None and depth > additional_bboxes_data_depth:
                return
            bboxes_data.append(bbox_data)
            for bbox_data in bbox_data.additional_bboxes_data:
                recursive_get_bboxes_data(bbox_data, depth=depth+1)
        for bbox_data in image_data.bboxes_data:
            recursive_get_bboxes_data(bbox_data, depth=0)
    else:
        bboxes_data = image_data.bboxes_data
    labels = [bbox_data.label for bbox_data in bboxes_data]
    if known_labels is None:
        known_labels = list(set(labels))
    k_keypoints = [bbox_data.keypoints for bbox_data in bboxes_data]
    bboxes = np.array([
        (bbox_data.ymin, bbox_data.xmin, bbox_data.ymax, bbox_data.xmax)
        for bbox_data in bboxes_data
    ])
    angles = np.array([0. for _ in bboxes_data])
    if score_type == 'detection':
        scores = np.array([bbox_data.detection_score for bbox_data in bboxes_data])
        skip_scores = False
    elif score_type == 'classification':
        scores = np.array([bbox_data.classification_score for bbox_data in bboxes_data])
        skip_scores = False
    else:
        scores = None
        skip_scores = True

    image = visualize_boxes_and_labels_on_image_array(
        image=image,
        bboxes=bboxes,
        angles=angles,
        scores=scores,
        k_keypoints=k_keypoints,
        labels=labels,
        use_normalized_coordinates=False,
        skip_scores=skip_scores,
        skip_labels=not use_labels,
        groundtruth_box_visualization_color='lime',
        known_labels=known_labels,
        keypoints_radius=keypoints_radius,
        fontsize=fontsize,
        thickness=thickness
    )
    if len(image_data.keypoints) > 0:
        image_pil = Image.fromarray(image)
        draw = ImageDraw.Draw(image_pil)
        for idx, (x, y) in enumerate(image_data.keypoints):
            draw.pieslice(
                [(x-keypoints_radius, y-keypoints_radius), (x+keypoints_radius, y+keypoints_radius)], start=0, end=360,
                fill=STANDARD_COLORS_RGB[idx % len(STANDARD_COLORS_RGB)]
            )
        image = np.array(image_pil)
    if draw_base_labels_with_given_label_to_base_label_image is not None:
        for bbox_data in image_data.bboxes_data:
            base_label_image = draw_base_labels_with_given_label_to_base_label_image(bbox_data.label)
            draw_label_image(
                image=image,
                base_label_image=base_label_image,
                bbox_data=bbox_data,
                inplace=True
            )

    if return_as_pil_image:
        return Image.fromarray(image)

    return image