示例#1
0
def _classification_to_kwiver_detections(classification, w, h):
    """
    Convert kwarray classifications to kwiver deteted object sets

    Args:
        classification (bioharn.clf_predict.Classification)
        w (int): width of image
        h (int): height of image

    Returns:
        kwiver.vital.types.DetectedObjectSet
    """
    detected_objects = DetectedObjectSet()

    if classification.data.get('prob', None) is not None:
        # If we have a probability for each class, uses that
        class_names = list(classification.classes)
        class_prob = classification.prob
        detected_object_type = DetectedObjectType(class_names, class_prob)
    else:
        # Otherwise we only have the score for the predicted calss
        class_name = classification.classes[classification.cidx]
        class_score = classification.conf
        detected_object_type = DetectedObjectType(class_name, class_score)

    bounding_box = BoundingBoxD(0, 0, w, h)
    detected_object = DetectedObject(bounding_box, classification.conf,
                                     detected_object_type)
    detected_objects.add(detected_object)
    return detected_objects
示例#2
0
    def detect(self, in_img_c):

        import tensorflow as tf
        import humanfriendly

        image_height = in_img_c.height()
        image_width = in_img_c.width()

        if (self.norm_image_type and self.norm_image_type != "none"):
            print("Normalizing input image")

            in_img = in_img_c.image().asarray().astype("uint16")

            bottom, top = self.get_scaling_values(self.norm_image_type, in_img,
                                                  image_height)
            in_img = self.lin_normalize_image(in_img, bottom, top)

            in_img = np.tile(in_img, (1, 1, 3))
        else:
            in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB"))

        start_time = time.time()
        boxes, scores, classes = self.generate_detection(
            self.detection_graph, in_img)
        elapsed = time.time() - start_time
        print("Done running detector in {}".format(
            humanfriendly.format_timespan(elapsed)))

        good_boxes = []
        detections = DetectedObjectSet()

        for i in range(0, len(scores)):
            if (scores[i] >= self.confidence_thresh):
                bbox = boxes[i]
                good_boxes.append(bbox)

                top_rel = bbox[0]
                left_rel = bbox[1]
                bottom_rel = bbox[2]
                right_rel = bbox[3]

                xmin = left_rel * image_width
                ymin = top_rel * image_height
                xmax = right_rel * image_width
                ymax = bottom_rel * image_height

                dot = DetectedObjectType(self.category_name, scores[i])
                obj = DetectedObject(BoundingBoxD(xmin, ymin, xmax, ymax),
                                     scores[i], dot)
                detections.add(obj)

        print("Detected {}".format(len(good_boxes)))
        return detections
示例#3
0
def _create_detected_object_set():
    from kwiver.vital.types import (
        DetectedObject,
        DetectedObjectSet,
        BoundingBoxD,
    )

    dos = DetectedObjectSet()
    bbox = BoundingBoxD(0, 10, 100, 50)
    dos.add(DetectedObject(bbox, 0.2))
    dos.add(DetectedObject(bbox, 0.5))
    dos.add(DetectedObject(bbox, 0.4))

    return dos
示例#4
0
    def _dowork(self, img_container):
        """
        Helper to decouple the algorithm and pipeline logic

        CommandLine:
            xdoctest viame.processes.camtrawl.processes CamtrawlDetectFishProcess._dowork

        Example:
            >>> from viame.processes.camtrawl.processes import *
            >>> from kwiver.vital.types import ImageContainer
            >>> import kwiver.sprokit.pipeline.config
            >>> # construct dummy process instance
            >>> conf = kwiver.sprokit.pipeline.config.empty_config()
            >>> self = CamtrawlDetectFishProcess(conf)
            >>> self._configure()
            >>> # construct test data
            >>> from vital.util import VitalPIL
            >>> from PIL import Image as PILImage
            >>> pil_img = PILImage.open(ub.grabdata('https://i.imgur.com/Jno2da3.png'))
            >>> pil_img = PILImage.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))
            >>> img_container = ImageContainer(VitalPIL.from_pil(pil_img))
            >>> # Initialize the background detector by sending 10 black frames
            >>> for i in range(10):
            >>>     empty_set = self._dowork(img_container)
            >>> # now add a white box that should be detected
            >>> np_img = np.zeros((512, 512, 3), dtype=np.uint8)
            >>> np_img[300:340, 220:380] = 255
            >>> img_container = ImageContainer.fromarray(np_img)
            >>> detection_set = self._dowork(img_container)
            >>> assert len(detection_set) == 1
            >>> obj = detection_set[0]
        """
        # This should be read as np.uint8
        np_img = img_container.asarray()

        detection_set = DetectedObjectSet()
        ct_detections = self.detector.detect(np_img)

        for detection in ct_detections:
            bbox = BoundingBoxD(*detection.bbox.coords)
            mask = detection.mask.astype(np.uint8)
            vital_mask = ImageContainer.fromarray(mask)
            dot = DetectedObjectType("Motion", 1.0)
            obj = DetectedObject(bbox, 1.0, dot, mask=vital_mask)
            detection_set.add(obj)
        return detection_set
示例#5
0
    def merge(self, det_sets):

        # Get detection HL info in a list
        pred_sets = []
        for det_set in det_sets:
            pred_set = []
            for det in det_set:
                # Extract box info for this det
                bbox = det.bounding_box

                bbox_min_x = int(bbox.min_x())
                bbox_max_x = int(bbox.max_x())
                bbox_min_y = int(bbox.min_y())
                bbox_max_y = int(bbox.max_y())

                # Extract type info for this det
                if det.type is None:
                    continue

                #class_names = list( det.type.class_names() )
                #class_scores = [ det.type.score( n ) for n in class_names ]
                class_name = det.type.get_most_likely_class()
                class_score = det.type.score(class_name)

                pred_set.append([
                    bbox_min_x, bbox_min_y, bbox_max_x, bbox_max_y, class_name,
                    class_score
                ])
            pred_sets.append(pred_set)

        # Run merging algorithm
        #ensemble_preds = ensemble_box( preds_set, self._fusion_weights,
        #  self._iou_thr, self._skip_box_thr, self._sigma, self._fusion_type )
        ensemble_preds = []

        # Compile output detections
        output = DetectedObjectSet()

        for pred in ensemble_preds:
            score = pred[5]
            bbox = BoundingBoxD(pred[0], pred[1], pred[2], pred[3])
            dot = DetectedObjectType(pred[4], score)
            det = DetectedObject(bbox, score, dot)
            output.add(det)

        return output
示例#6
0
def _kwimage_to_kwiver_detections(detections):
    """
    Convert kwimage detections to kwiver deteted object sets

    Args:
        detected_objects (kwimage.Detections)

    Returns:
        kwiver.vital.types.DetectedObjectSet
    """
    from kwiver.vital.types.types import ImageContainer, Image

    segmentations = None
    # convert segmentation masks
    if 'segmentations' in detections.data:
        segmentations = detections.data['segmentations']

    boxes = detections.boxes.to_tlbr()
    scores = detections.scores
    class_idxs = detections.class_idxs

    if not segmentations:
        # Placeholders
        segmentations = (None, ) * len(boxes)

    # convert to kwiver format, apply threshold
    detected_objects = DetectedObjectSet()

    for tlbr, score, cidx, seg in zip(boxes.data, scores, class_idxs,
                                      segmentations):
        class_name = detections.classes[cidx]

        bbox_int = np.round(tlbr).astype(np.int32)
        bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2],
                                    bbox_int[3])

        detected_object_type = DetectedObjectType(class_name, score)
        detected_object = DetectedObject(bounding_box, score,
                                         detected_object_type)
        if seg:
            mask = seg.to_relative_mask().numpy().data
            detected_object.mask = ImageContainer(Image(mask))

        detected_objects.add(detected_object)
    return detected_objects
示例#7
0
    def detect(self, image_data):
        input_image = image_data.asarray().astype('uint8')
        if self._rgb_to_bgr:
            input_image = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)

        from mmdet.apis import inference_detector
        detections = inference_detector(self._model, input_image)

        if isinstance(detections, tuple):
            bbox_result, segm_result = detections
        else:
            bbox_result, segm_result = detections, None

        if np.size(bbox_result) > 0:
            bboxes = np.vstack(bbox_result)
        else:
            bboxes = []

        # convert segmentation masks
        masks = []
        if segm_result is not None:
            segms = mmcv.concat_list(segm_result)
            inds = np.where(bboxes[:, -1] > score_thr)[0]
            for i in inds:
                masks.append(maskUtils.decode(segms[i]).astype(np.bool))

        # collect labels
        labels = [
            np.full(bbox.shape[0], i, dtype=np.int32)
            for i, bbox in enumerate(bbox_result)
        ]

        if np.size(labels) > 0:
            labels = np.concatenate(labels)
        else:
            labels = []

        # convert to kwiver format, apply threshold
        output = DetectedObjectSet()

        for bbox, label in zip(bboxes, labels):
            class_confidence = float(bbox[-1])
            if class_confidence < self._thresh:
                continue

            bbox_int = bbox.astype(np.int32)
            bounding_box = BoundingBoxD(bbox_int[0], bbox_int[1], bbox_int[2],
                                        bbox_int[3])

            class_name = self._labels[label]
            detected_object_type = DetectedObjectType(class_name,
                                                      class_confidence)

            detected_object = DetectedObject(bounding_box,
                                             np.max(class_confidence),
                                             detected_object_type)
            output.add(detected_object)

        if np.size(labels) > 0 and self._display_detections:
            mmcv.imshow_det_bboxes(input_image,
                                   bboxes,
                                   labels,
                                   class_names=self._labels,
                                   score_thr=self._thresh,
                                   show=True)

        return output
示例#8
0
    def _step(self):

        # Get all inputs even ones we don't use
        in_img_c = self.grab_input_using_trait('image')
        timestamp = self.grab_input_using_trait('timestamp')

        if not timestamp.has_valid_frame():
            raise RuntimeError("Frame timestamps must contain frame IDs")

        frame_id = timestamp.get_frame()

        if self.has_input_port_edge_using_trait('detected_object_set'):
            detections = self.grab_input_using_trait('detected_object_set')
        else:
            detections = DetectedObjectSet()
        if self.has_input_port_edge_using_trait('initializations'):
            initializations = self.grab_input_using_trait('initializations')
        else:
            initializations = ObjectTrackSet()
        if self.has_input_port_edge_using_trait('recommendations'):
            recommendations = self.grab_input_using_trait('recommendations')
        else:
            recommendations = ObjectTrackSet()
        if self.has_input_port_edge_using_trait('evaluation_requests'):
            requests = self.grab_input_using_trait('evaluation_requests')
        else:
            requests = DetectedObjectSet()

        print('mdnet tracker timestamp = {!r}'.format(timestamp))

        # Handle new track external initialization
        init_track_pool = initializations.tracks()
        recc_track_pool = recommendations.tracks()
        init_track_ids = []
        img_used = False

        if len(init_track_pool) != 0 or len(self._trackers) != 0:
            img_npy = self.format_image(in_img_c)
            img_used = True

        for trk in init_track_pool:
            # Special case, initialize a track on a previous frame
            if trk[trk.last_frame].frame_id == self._last_frame_id and \
              ( not trk.id in self._track_init_frames or \
              self._track_init_frames[ trk.id ] < self._last_frame_id ):
                tid = trk.id
                cbox = trk[trk.last_frame].detection().bounding_box()
                bbox = [
                    cbox.min_x(),
                    cbox.min_y(),
                    cbox.width(),
                    cbox.height()
                ]
                self._last_frame = self.format_image(self._last_frame)
                self._trackers[tid] = mdnet.MDNetTracker(
                    self._last_frame, bbox)
                self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)]
                self._track_init_frames[tid] = self._last_frame_id
            # This track has an initialization signal for the current frame
            elif trk[trk.last_frame].frame_id == frame_id:
                tid = trk.id
                cbox = trk[trk.last_frame].detection().bounding_box()
                bbox = [
                    cbox.min_x(),
                    cbox.min_y(),
                    cbox.width(),
                    cbox.height()
                ]
                self._trackers[tid] = mdnet.MDNetTracker(img_npy, bbox)
                self._tracks[tid] = [ObjectTrackState(timestamp, cbox, 1.0)]
                init_track_ids.append(tid)
                self._track_init_frames[tid] = frame_id

        # Update existing tracks
        for tid in self._trackers.keys():
            if tid in init_track_ids:
                continue  # Already processed (initialized) on frame
            # Check if there's a recommendation for the update
            recc_bbox = []
            for trk in recc_track_pool:
                if trk.id == tid and trk[trk.last_frame].frame_id == frame_id:
                    cbox = trk[trk.last_frame].detection().bounding_box()
                    recc_bbox = [
                        cbox.min_x(),
                        cbox.min_y(),
                        cbox.width(),
                        cbox.height()
                    ]
                    break
            bbox, score = self._trackers[tid].update(img_npy,
                                                     likely_bbox=recc_bbox)
            if score > mdnet.opts['success_thr']:
                cbox = BoundingBoxD(bbox[0], bbox[1], bbox[0] + bbox[2],
                                    bbox[1] + bbox[3])
                new_state = ObjectTrackState(timestamp, cbox, score)
                self._tracks[tid].append(new_state)

        # Handle track termination
        # TODO: Remove old or dead tracks

        # Classify requested evaluations
        # TODO: Evaluate input detections
        output_evaluations = DetectedObjectSet()

        # Output results
        output_tracks = ObjectTrackSet(
            [Track(tid, trk) for tid, trk in self._tracks.items()])

        self.push_to_port_using_trait('timestamp', timestamp)
        self.push_to_port_using_trait('object_track_set', output_tracks)
        self.push_to_port_using_trait('evaluations', output_evaluations)

        self._last_frame_id = timestamp.get_frame()
        if img_used:
            self._last_frame = img_npy
        else:
            self._last_frame = in_img_c
        self._base_step()
示例#9
0
    def extract_chips_for_dets(self, image_files, truth_sets):
        import cv2
        output_files = []
        output_dets = []

        for i in range(len(image_files)):
            filename = image_files[i]
            groundtruth = truth_sets[i]
            detections = []
            scale = 1.0

            if self._target_type_scales:
                scale = self.compute_scale_factor(groundtruth)

            if len(groundtruth) > 0:
                img = cv2.imread(filename)

                if len(np.shape(img)) < 2:
                    continue

                img_max_x = np.shape(img)[1]
                img_max_y = np.shape(img)[0]

                # Optionally scale image
                if scale != 1.0:
                    img_max_x = int(scale * img_max_x)
                    img_max_y = int(scale * img_max_y)
                    img = cv2.resize(img, (img_max_x, img_max_y))

                # Run optional background detector on data
                if self._detector_model:
                    kw_image = Image(img)
                    kw_image_container = ImageContainer(kw_image)
                    detections = self._detector.detect(kw_image_container)

            if len(groundtruth) == 0 and len(detections) == 0:
                continue

            overlaps = np.zeros((len(detections), len(groundtruth)))
            det_boxes = []

            for det in detections:
                bbox = det.bounding_box
                det_boxes.append((int(bbox.min_x()), int(bbox.min_y()),
                                  int(bbox.width()), int(bbox.height())))

            for i, gt in enumerate(groundtruth):
                # Extract chip for this detection
                bbox = gt.bounding_box

                bbox_min_x = int(bbox.min_x() * scale)
                bbox_max_x = int(bbox.max_x() * scale)
                bbox_min_y = int(bbox.min_y() * scale)
                bbox_max_y = int(bbox.max_y() * scale)

                bbox_width = bbox_max_x - bbox_min_x
                bbox_height = bbox_max_y - bbox_min_y

                max_overlap = 0.0

                for j, det in enumerate(det_boxes):

                    # Compute overlap between detection and truth
                    (det_min_x, det_min_y, det_width, det_height) = det

                    # Get the overlap rectangle
                    overlap_x0 = max(bbox_min_x, det_min_x)
                    overlap_y0 = max(bbox_min_y, det_min_y)
                    overlap_x1 = min(bbox_max_x, det_min_x + det_width)
                    overlap_y1 = min(bbox_max_y, det_min_y + det_height)

                    # Check if there is an overlap
                    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
                        continue

                    # If yes, calculate the ratio of the overlap
                    det_area = float(det_width * det_height)
                    gt_area = float(bbox_width * bbox_height)
                    int_area = float(
                        (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0))
                    overlap = min(int_area / det_area, int_area / gt_area)
                    overlaps[j, i] = overlap

                    if overlap >= self._min_overlap_for_association and overlap > max_overlap:
                        max_overlap = overlap

                        bbox_min_x = det_min_x
                        bbox_min_y = det_min_y
                        bbox_max_x = det_min_x + det_width
                        bbox_max_y = det_min_y + det_height

                        bbox_width = det_width
                        bbox_height = det_height

                if self._chip_method == "fixed_width":
                    chip_width = int(self._chip_width)
                    half_width = int(chip_width / 2)

                    bbox_min_x = int(
                        (bbox_min_x + bbox_max_x) / 2) - half_width
                    bbox_min_y = int(
                        (bbox_min_y + bbox_max_y) / 2) - half_width
                    bbox_max_x = bbox_min_x + chip_width
                    bbox_max_y = bbox_min_y + chip_width

                    bbox_width = chip_width
                    bbox_height = chip_width

                bbox_area = bbox_width * bbox_height

                if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound:
                    continue
                if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound:
                    continue

                if self._reduce_category and gt.type() and \
                  gt.type().get_most_likely_class() == self._reduce_category and \
                  random.uniform( 0, 1 ) < 0.90:
                    continue

                if self._border_exclude > 0:
                    if bbox_min_x <= self._border_exclude:
                        continue
                    if bbox_min_y <= self._border_exclude:
                        continue
                    if bbox_max_x >= img_max_x - self._border_exclude:
                        continue
                    if bbox_max_y >= img_max_y - self._border_exclude:
                        continue

                crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x]
                self._sample_count = self._sample_count + 1
                crop_str = ('%09d' % self._sample_count) + ".png"
                new_file = os.path.join(self._chip_directory, crop_str)
                cv2.imwrite(new_file, crop)

                # Set new box size for this detection
                gt.bounding_box = BoundingBoxD(0, 0,
                                               np.shape(crop)[1],
                                               np.shape(crop)[0])
                new_set = DetectedObjectSet()
                new_set.add(gt)

                output_files.append(new_file)
                output_dets.append(new_set)

            neg_count = 0

            for j, det in enumerate(detections):

                if max(overlaps[j]) >= self._max_overlap_for_negative:
                    continue

                bbox = det.bounding_box

                bbox_min_x = int(bbox.min_x())
                bbox_max_x = int(bbox.max_x())
                bbox_min_y = int(bbox.min_y())
                bbox_max_y = int(bbox.max_y())

                bbox_width = bbox_max_x - bbox_min_x
                bbox_height = bbox_max_y - bbox_min_y

                bbox_area = bbox_width * bbox_height

                if self._chip_method == "fixed_width":
                    chip_width = int(self._chip_width)
                    half_width = int(chip_width / 2)

                    bbox_min_x = int(
                        (bbox_min_x + bbox_max_x) / 2) - half_width
                    bbox_min_y = int(
                        (bbox_min_y + bbox_max_y) / 2) - half_width
                    bbox_max_x = bbox_min_x + chip_width
                    bbox_max_y = bbox_min_y + chip_width

                    bbox_width = chip_width
                    bbox_height = chip_width

                if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound:
                    continue
                if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound:
                    continue

                if self._border_exclude > 0:
                    if bbox_min_x <= self._border_exclude:
                        continue
                    if bbox_min_y <= self._border_exclude:
                        continue
                    if bbox_max_x >= img_max_x - self._border_exclude:
                        continue
                    if bbox_max_y >= img_max_y - self._border_exclude:
                        continue

                # Handle random factor
                if self._max_neg_per_frame < 1.0 and random.uniform(
                        0, 1) > self._max_neg_per_frame:
                    break

                crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x]
                self._sample_count = self._sample_count + 1
                crop_str = ('%09d' % self._sample_count) + ".png"
                new_file = os.path.join(self._chip_directory, crop_str)
                cv2.imwrite(new_file, crop)

                # Set new box size for this detection
                det.bounding_box = BoundingBoxD(0, 0,
                                                np.shape(crop)[1],
                                                np.shape(crop)[0])
                det.type = DetectedObjectType(self._negative_category, 1.0)
                new_set = DetectedObjectSet()
                new_set.add(det)

                output_files.append(new_file)
                output_dets.append(new_set)

                # Check maximum negative count
                neg_count = neg_count + 1
                if neg_count > self._max_neg_per_frame:
                    break

        return [output_files, output_dets]