def filter_truth(self, init_truth, categories): filtered_truth = DetectedObjectSet() use_frame = True max_length = int(self._max_scale_wrt_chip * float(self._chip_width)) for i, item in enumerate(init_truth): if item.type() is None: continue class_lbl = item.type().get_most_likely_class() if categories is not None and not categories.has_class_id( class_lbl): continue if categories is not None: class_lbl = categories.get_class_name(class_lbl) elif class_lbl not in self._categories: self._categories.append(class_lbl) truth_type = DetectedObjectType(class_lbl, 1.0) item.set_type(truth_type) if self._mode == "detector" and \ ( item.bounding_box().width() > max_length or \ item.bounding_box().height() > max_length ): use_frame = False break filtered_truth.add(item) if self._gt_frames_only and len(init_truth) == 0: use_frame = False return filtered_truth, use_frame
def _step(self): print("[DEBUG] ----- start step") face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # grab image container from port using traits frame_c = self.grab_input_using_trait('image') # Get image from container frame_in = frame_c.image() #convert generic image to PIL pil_image = get_pil_image(frame_in) #convert to matrix frame = np.array(pil_image) detected_set = DetectedObjectSet() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) gray_frame = cv2.equalizeHist(gray_frame) faces = face_cascade.detectMultiScale(gray_frame, 1.3, 5) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) # get new image handle #new_ic = ImageContainer( frame ) dot = DetectedObjectType("face", 1.0) detected_set.add(DetectedObject(bbox, 1.0, dot)) # push object to output port self.push_to_port_using_trait('detected_object_set', detected_set) self._base_step()
def _create_detected_object(self): """ Helper function to generate a detected object for the track state :return: Detected object with bounding box coordinates of (10, 10, 20, 20), confidence of 0.4 and "test" label """ bbox = BoundingBox(10, 10, 20, 20) dot = DetectedObjectType("test", 0.4) do = DetectedObject(bbox, 0.4, dot) return do
def detect(self, in_img_c): image_height = in_img_c.height() image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def detect(self, image_c): cascade_classifier = cv2.CascadeClassifier(self.classifier_file) image = image_c.image().asarray().astype(np.uint8) detected_object_set = DetectedObjectSet() # NOTE: assarray() function return an rgb representation of the image gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray_image = cv2.equalizeHist(gray_image) faces = cascade_classifier.detectMultiScale(gray_image, self.scale_factor, self.min_neighbor) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) dot = DetectedObjectType(self.classifier_name, 1.0) detected_object_set.add(DetectedObject(bbox, 1.0, dot)) return detected_object_set
def detect(self, image_data): input_image = image_data.asarray().astype('uint8') from mmdet.apis import inference_detector detections = inference_detector(self._model, input_image) if isinstance(detections, tuple): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size(bbox_result) > 0: bboxes = np.vstack(bbox_result) else: bboxes = [] # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: masks.append(maskUtils.decode(segms[i]).astype(np.bool)) # collect labels labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] if np.size(labels) > 0: labels = np.concatenate(labels) else: labels = [] # convert to kwiver format, apply threshold output = DetectedObjectSet() for bbox, label in zip(bboxes, labels): class_confidence = float(bbox[-1]) if class_confidence < self._thresh: continue bbox_int = bbox.astype(np.int32) bounding_box = BoundingBox(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) class_name = self._labels[label] detected_object_type = DetectedObjectType(class_name, class_confidence) detected_object = DetectedObject(bounding_box, np.max(class_confidence), detected_object_type) output.add(detected_object) if np.size(labels) > 0 and self._display_detections: mmcv.imshow_det_bboxes(input_image, bboxes, labels, class_names=self._labels, score_thr=self._thresh, show=True) return output
def match_point2box(self, detected_object_set1, detected_object_set2): points = [] for d in detected_object_set1: bb = d.bounding_box() points.append(bb.center()) box2pt_dict = {} for idx, d in enumerate(detected_object_set2): bb = d.bounding_box() cent = bb.center() box2pt_dict[idx] = [] for pt_idx, pt in enumerate(points): if bb_contains_pt(bb, pt): box2pt_dict[idx].append(pt_idx) pt2box_dict = {} for box_idx, pt_idxs in box2pt_dict.items(): for pt_idx in pt_idxs: if pt_idx not in pt2box_dict: pt2box_dict[pt_idx] = [] pt2box_dict[pt_idx].append(box_idx) print(box2pt_dict) print(pt2box_dict) # unmatched detections (new) # unmatches points (not found) unmatched_detections = [] for box_idx, pt_matches in box2pt_dict.items(): if len(pt_matches) == 0: unmatched_detections.append(box_idx) unmatched_points = [] for pt_idx, box_matches in pt2box_dict.items(): if len(box_matches) == 0: unmatched_points.append(pt_idx) # matched detections (found, success) # multiple matches (todo: resolve matches) point_detection_matches = {} # pt_idx -> box_idx for pt_idx, box_matches in pt2box_dict.items(): if len(box_matches) == 0: continue if len(box_matches) == 1: point_detection_matches[pt_idx] = box_matches[0] elif len(box_matches) > 1: # resolve conflict by highest confidence best_score = -1 best_det = None for det_idx in box_matches: det = detected_object_set2[det_idx] if det.confidence() > best_score: best_score = det.confidence() best_det = det_idx point_detection_matches[pt_idx] = best_det matches = DetectedObjectSet() for pt_idx, det_idx in point_detection_matches.items(): det = detected_object_set2[det_idx] pt = detected_object_set1[pt_idx] dettype = det.type() new_type = DetectedObjectType( "matched_%s" % dettype.get_most_likely_class(), 1.) det.set_type(new_type) matches.add(det) new_detections = DetectedObjectSet() for det_idx in unmatched_detections: det = detected_object_set2[det_idx] dettype = det.type() new_type = DetectedObjectType( "new_%s" % dettype.get_most_likely_class(), det.confidence()) det.set_type(new_type) new_detections.add(det) return matches, new_detections
def refine(self, image_data, detections): if len(detections) == 0: return detections img = image_data.asarray().astype('uint8') predictor = self.predictor img_max_x = np.shape(img)[1] img_max_y = np.shape(img)[0] # Extract patches for ROIs image_chips = [] detection_ids = [] for i, det in enumerate(detections): # Extract chip for this detection bbox = det.bounding_box() bbox_min_x = int(bbox.min_x()) bbox_max_x = int(bbox.max_x()) bbox_min_y = int(bbox.min_y()) bbox_max_y = int(bbox.max_y()) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x] image_chips.append(crop) detection_ids.append(i) # Run classifier on ROIs classifications = list(predictor.predict(image_chips)) # Put classifications back into detections output = DetectedObjectSet() for i, det in enumerate(detections): if len(detection_ids) == 0 or i != detection_ids[0]: output.add(det) continue new_class = classifications[0] if new_class.data.get('prob', None) is not None: # If we have a probability for each class, uses that class_names = list(new_class.classes) class_scores = new_class.prob else: # Otherwise we only have the score for the predicted class class_names = [new_class.classes[new_class.cidx]] class_scores = [new_class.conf] if self._average_prior and det.type() is not None: priors = det.type() prior_names = priors.class_names() for name in prior_names: if name in class_names: class_scores[class_names.index(name)] += priors.score( name) else: class_names.append(name) class_scores.append(priors.score(name)) for i in range(len(class_scores)): class_scores[i] = class_scores[i] * 0.5 detected_object_type = DetectedObjectType(class_names, class_scores) det.set_type(detected_object_type) output.add(det) detection_ids.pop(0) classifications.pop(0) return output