def _step(self): print("[DEBUG] ----- start step") face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_alt.xml') # grab image container from port using traits frame_c = self.grab_input_using_trait('image') # Get image from container frame_in = frame_c.image() #convert generic image to PIL pil_image = get_pil_image(frame_in) #convert to matrix frame = np.array(pil_image) detected_set = DetectedObjectSet() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) gray_frame = cv2.equalizeHist(gray_frame) faces = face_cascade.detectMultiScale(gray_frame, 1.3, 5) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) # get new image handle #new_ic = ImageContainer( frame ) dot = DetectedObjectType("face", 1.0) detected_set.add(DetectedObject(bbox, 1.0, dot)) # push object to output port self.push_to_port_using_trait('detected_object_set', detected_set) self._base_step()
def detect( self, image_data ): # Convert image to 8-bit numpy input_image = image_data.asarray().astype( 'uint8' ) # TODO: do something with numpy image producing detections bboxes = [] labels = [] # Convert detections to kwiver format output = DetectedObjectSet() for bbox, label in zip( bboxes, labels ): bbox_int = bbox.astype( np.int32 ) bounding_box = BoundingBox( bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3] ) detected_object_type = DetectedObjectType( label, 1.0 ) detected_object = DetectedObject( bounding_box, np.max( class_confidence ), detected_object_type ) output.add( detected_object ) return output
def create_track_dict(self, object_track_set): if self.track_dict_initialized: return logger.debug("Initializing track dict") tracks = object_track_set.tracks() for i, track in enumerate(tracks): # track has ['__class__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'all_frame_ids', 'append', 'back', 'find_state', 'first_frame', 'front', 'id', 'is_empty', 'last_frame', 'size'] # t has .timestamp .tx .track_point .image_point .detection() # det has .bounding_box() .type() # bbox has .min_x() .min_y() .max_x() .max_y()) for t in track: det = t.detection() bbox = det.bounding_box() frame_id = t.frame_id logger.debug('Frame Id %d exists %s' % (frame_id, str(frame_id in self.track_dict))) if frame_id not in self.track_dict: self.track_dict[frame_id] = DetectedObjectSet() new_bbox = BoundingBox(bbox.min_x() - 1, bbox.min_y() - 1, bbox.max_x() + 1, bbox.max_y() + 1) det.set_bounding_box(new_bbox) self.track_dict[frame_id].add(det) self.track_dict_initialized = True keys = sorted(list(self.track_dict.keys())) for frame_id in keys: logger.debug('Frame %d : %d input detections' % (frame_id, len(self.track_dict[frame_id]))) return
def _step(self): print("[DEBUG] ----- start step") # grab image container from port using traits in_img_c = self.grab_input_using_trait("image") imageHeight = in_img_c.height() imageWidth = in_img_c.width() if (self.normImageType): print("Normalize image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.normImageType, imageHeight) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) self.push_to_port_using_trait("image_norm", ImageContainer(Image(in_img))) startTime = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - startTime print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) goodBoxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidenceThresh): bbox = boxes[i] goodBoxes.append(bbox) topRel = bbox[0] leftRel = bbox[1] bottomRel = bbox[2] rightRel = bbox[3] xmin = leftRel * imageWidth ymin = topRel * imageHeight xmax = rightRel * imageWidth ymax = bottomRel * imageHeight obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i]) detections.add(obj) print("Detected {}".format(len(goodBoxes))) self.push_to_port_using_trait("detected_object_set", detections) self._base_step()
def detect( self, image_data ): input_image = image_data.asarray().astype( 'uint8' ) from mmdet.apis import inference_detector gpu_string = 'cuda:' + str( self._gpu_index ) detections = inference_detector( self._model, input_image, self._cfg, device=gpu_string ) class_names = [ 'fish' ] * 10000 if isinstance( detections, tuple ): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size( bbox_result ) > 0: bboxes = np.vstack( bbox_result ) else: bboxes = [] sys.stdout.write( "Detected " + str( len( bbox_result ) ) + " objects" ) sys.stdout.flush() # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list( segm_result ) inds = np.where( bboxes[:, -1] > score_thr )[0] for i in inds: masks.append( maskUtils.decode( segms[i] ).astype( np.bool ) ) # collect labels labels = [ np.full( bbox.shape[0], i, dtype=np.int32 ) for i, bbox in enumerate( bbox_result ) ] if np.size( labels ) > 0: labels = np.concatenate( labels ) else: labels = [] # convert to kwiver format, apply threshold output = [] for entry in []: output.append( DetectedObject( BoundingBox( 1,1,2,2 ) ) ) if np.size( labels ) > 0: mmcv.imshow_det_bboxes( input_image, bboxes, labels, class_names=class_names, score_thr=-100.0, show=True) return DetectedObjectSet( output )
def _create_detected_object(self): """ Helper function to generate a detected object for the track state :return: Detected object with bounding box coordinates of (10, 10, 20, 20), confidence of 0.4 and "test" label """ bbox = BoundingBox(10, 10, 20, 20) dot = DetectedObjectType("test", 0.4) do = DetectedObject(bbox, 0.4, dot) return do
def bounding_box(self): # Get C pointer to internal bounding box do_get_bb = self.VITAL_LIB.vital_detected_object_bounding_box do_get_bb.argtypes = [self.C_TYPE_PTR] do_get_bb.restype = BoundingBox.C_TYPE_PTR bb_c_ptr = do_get_bb(self) # Make copy of bounding box to return do_bb_cpy = self.VITAL_LIB.vital_bounding_box_copy do_bb_cpy.argtypes = [BoundingBox.C_TYPE_PTR] do_bb_cpy.restype = BoundingBox.C_TYPE_PTR return BoundingBox(from_cptr=do_bb_cpy(bb_c_ptr))
def detect(self, image_data): dot = DetectedObjectSet([ DetectedObject( BoundingBox( self.m_center_x + self.frame_ct * self.m_dx - self.m_width / 2.0, self.m_center_y + self.frame_ct * self.m_dy - self.m_height / 2.0, self.m_center_x + self.frame_ct * self.m_dx + self.m_width / 2.0, self.m_center_y + self.frame_ct * self.m_dy + self.m_height / 2.0)) ]) self.frame_ct += 1 return dot
def detect(self, in_img_c): image_height = in_img_c.height() image_width = in_img_c.width() if (self.norm_image_type and self.norm_image_type != "none"): print("Normalizing input image") in_img = in_img_c.image().asarray().astype("uint16") bottom, top = self.get_scaling_values(self.norm_image_type, in_img, image_height) in_img = self.lin_normalize_image(in_img, bottom, top) in_img = np.tile(in_img, (1, 1, 3)) else: in_img = np.array(get_pil_image(in_img_c.image()).convert("RGB")) start_time = time.time() boxes, scores, classes = self.generate_detection( self.detection_graph, in_img) elapsed = time.time() - start_time print("Done running detector in {}".format( humanfriendly.format_timespan(elapsed))) good_boxes = [] detections = DetectedObjectSet() for i in range(0, len(scores)): if (scores[i] >= self.confidence_thresh): bbox = boxes[i] good_boxes.append(bbox) top_rel = bbox[0] left_rel = bbox[1] bottom_rel = bbox[2] right_rel = bbox[3] xmin = left_rel * image_width ymin = top_rel * image_height xmax = right_rel * image_width ymax = bottom_rel * image_height dot = DetectedObjectType(self.category_name, scores[i]) obj = DetectedObject(BoundingBox(xmin, ymin, xmax, ymax), scores[i], dot) detections.add(obj) print("Detected {}".format(len(good_boxes))) return detections
def detect(self, image_c): cascade_classifier = cv2.CascadeClassifier(self.classifier_file) image = image_c.image().asarray().astype(np.uint8) detected_object_set = DetectedObjectSet() # NOTE: assarray() function return an rgb representation of the image gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) gray_image = cv2.equalizeHist(gray_image) faces = cascade_classifier.detectMultiScale(gray_image, self.scale_factor, self.min_neighbor) for (x, y, w, h) in faces: bbox = BoundingBox(x, y, x + w, y + h) dot = DetectedObjectType(self.classifier_name, 1.0) detected_object_set.add(DetectedObject(bbox, 1.0, dot)) return detected_object_set
def detect(self, image_data): input_image = image_data.asarray().astype('uint8') from mmdet.apis import inference_detector detections = inference_detector(self._model, input_image) if isinstance(detections, tuple): bbox_result, segm_result = detections else: bbox_result, segm_result = detections, None if np.size(bbox_result) > 0: bboxes = np.vstack(bbox_result) else: bboxes = [] # convert segmentation masks masks = [] if segm_result is not None: segms = mmcv.concat_list(segm_result) inds = np.where(bboxes[:, -1] > score_thr)[0] for i in inds: masks.append(maskUtils.decode(segms[i]).astype(np.bool)) # collect labels labels = [ np.full(bbox.shape[0], i, dtype=np.int32) for i, bbox in enumerate(bbox_result) ] if np.size(labels) > 0: labels = np.concatenate(labels) else: labels = [] # convert to kwiver format, apply threshold output = DetectedObjectSet() for bbox, label in zip(bboxes, labels): class_confidence = float(bbox[-1]) if class_confidence < self._thresh: continue bbox_int = bbox.astype(np.int32) bounding_box = BoundingBox(bbox_int[0], bbox_int[1], bbox_int[2], bbox_int[3]) class_name = self._labels[label] detected_object_type = DetectedObjectType(class_name, class_confidence) detected_object = DetectedObject(bounding_box, np.max(class_confidence), detected_object_type) output.add(detected_object) if np.size(labels) > 0 and self._display_detections: mmcv.imshow_det_bboxes(input_image, bboxes, labels, class_names=self._labels, score_thr=self._thresh, show=True) return output
def extract_chips_for_dets( self, image_files, detections ): import cv2 output_files = [] output_dets = [] # Run detector on image, TODO #if self._detector_model: #else if len( train_dets ) == 0: # continue #TODO use self._overlap_for_association = 0.90 #TODO use self._max_negs_per_frame = 10 for i in range( len( image_files ) ): filename = image_files[ i ] groundtruth = detections[ i ] if len( groundtruth ) > 0: img = cv2.imread( filename ) img_max_x = np.shape( img )[1] img_max_y = np.shape( img )[0] if len( groundtruth ) == 0: continue pos_bboxs = [] for det in groundtruth: # Extract chip for this detection bbox = det.bounding_box() bbox_min_x = int( bbox.min_x() ) bbox_max_x = int( bbox.max_x() ) bbox_min_y = int( bbox.min_y() ) bbox_max_y = int( bbox.max_y() ) bbox_width = bbox_max_x - bbox_min_x bbox_height = bbox_max_y - bbox_min_y bbox_area = bbox_width * bbox_height if self._area_lower_bound > 0 and bbox_area < self._area_lower_bound: continue if self._area_upper_bound > 0 and bbox_area > self._area_upper_bound: continue if self._border_exclude > 0: if bbox_min_x <= self._border_exclude: continue if bbox_min_y <= self._border_exclude: continue if bbox_max_x >= img_max_x - self._border_exclude: continue if bbox_max_y >= img_max_y - self._border_exclude: continue crop = img[ bbox_min_y:bbox_max_y, bbox_min_x:bbox_max_x ] self._sample_count = self._sample_count + 1 crop_str = ( '%09d' % self._sample_count ) + ".png" new_file = os.path.join( self._chip_directory, crop_str ) cv2.imwrite( new_file, crop ) # Set new box size for this detection det.set_bounding_box( BoundingBox( 0, 0, np.shape( crop )[1], np.shape( crop )[0] ) ) new_set = DetectedObjectSet() new_set.add( det ) output_files.append( new_file ) output_dets.append( new_set ) return [ output_files, output_dets ]