def make_detection_node(frame_shape, node_description: NodeDescription) -> DetectionNode: """Creates a fake detection node that describes the given node description. :param frame_shape: The shape of the frame in (height, width, channels) :param node_description: The description that the returned node must adhere to :return: A fake detection node that adheres to this description """ height, width, _ = frame_shape attributes = {category: random.choice(possible_values) for category, possible_values in node_description.attributes.items()} extra_data = {data_key: 0.5129319283 for data_key in node_description.extra_data} detection_names = node_description.detections # Create random coordinates for this detection x1 = random.randint(0, width - 3) y1 = random.randint(0, height - 3) x2 = x1 + random.randint(0, width - x1 + 1) + 2 y2 = y1 + random.randint(0, height - y1 + 1) + 2 return DetectionNode( name=random.choice(detection_names) if len(detection_names) else "N/A", coords=[[x1, y1], [x2, y1], [x2, y2], [x1, y2]], attributes=attributes, encoding=np.zeros((128,)) if node_description.encoded else None, track_id=uuid4() if node_description.tracked else None, extra_data=extra_data)
def test_crop(): frame = np.zeros((100, 100, 3), dtype=np.uint8) cropped = Crop(30, 30, 40, 40).apply(frame) assert cropped.shape == (10, 10, 3) cropped = Crop(90, 90, 110, 110).apply(frame) assert cropped.shape == (9, 9, 3) cropped = Crop(-10, -10, 10, 10).apply(frame) assert cropped.shape == (10, 10, 3) cropped = Crop(0, 0, 100, 100).apply(frame) assert cropped.shape == (100, 100, 3) cropped = Crop(10, 10, 90, 90).apply(frame) assert cropped.shape == (80, 80, 3) cropped = Crop(10, 10, 20, 20).pad_percent(10, 10, 10, 10).apply(frame) assert cropped.shape == (12, 12, 3) cropped = Crop(10, 10, 20, 20).pad_px(10, 10, 10, 10).apply(frame) assert cropped.shape == (30, 30, 3) node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 20, 20])) cropped = Crop.from_detection(node).apply(frame) assert cropped.shape == (10, 10, 3)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState): if options["scale_frame"]: max_frame_side_length = options["scale_frame_max_side_length"] clamp = Clamp(frame=frame, max_width=max_frame_side_length, max_height=max_frame_side_length) frame = clamp.apply() predictions = self.send_to_batch(frame).result() results = [] # Convert all predictions with the required confidence that are people # to DetectionNodes for pred in predictions: if pred.confidence >= options["detection_threshold"] \ and pred.name == "person": results.append( DetectionNode(name=pred.name, coords=rect_to_coords(pred.rect))) # If we scaled the frame down earlier before processing, we need to # scale detections back up to match the original frame size if options["scale_frame"]: clamp.scale_detection_nodes(results) return results
def score(flow_id: str, frame: object) -> Tuple[str, PyDetectionBox]: img = frame_data_2_np_array(frame) if capsule.input_type.size is NodeDescription.Size.NONE: input_node = None else: input_node = DetectionNode(name='', coords=[[0, 0], [frame.width, 0], [frame.width, frame.height], [0, frame.height]]) if capsule.input_type.size is NodeDescription.Size.ALL: input_node = [input_node] result = capsule.process_frame(frame=img, detection_node=input_node, options=capsule.default_options, state=capsule.stream_state()) detection_box = PyDetectionBox(frame_id=frame.frame_id, engine_id='vision_capsules') if isinstance(result, list): for node in result: add_detection_node_2_detection_box(node, detection_box) elif isinstance(result, DetectionNode): add_detection_node_2_detection_box(result, detection_box) return flow_id, detection_box
def process_frame(self, frame: np.ndarray, detection_nodes: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: if len(detection_nodes) == 0: return detection_nodes confidence_threshold = options[config.confidence_threshold] iou_threshold = options[config.iou_threshold] prediction = self.send_to_batch(frame).get() behavior_detections = [] for pred in prediction: if pred.confidence < confidence_threshold: continue if pred.name in config.ignore: continue det = DetectionNode( name=pred.name, coords=rect_to_coords(pred.rect), extra_data={config.pose_confidence: pred.confidence}) behavior_detections.append(det) # Fill all detections with 'unknown' data for det in detection_nodes: det.attributes[config.pose] = "unknown" det.extra_data[config.pose_confidence] = 0 det.extra_data[config.pose_iou] = 0 # Exit early if there are no behavior detections (also empty lists cause # later lines to fail) if len(behavior_detections) == 0: return detection_nodes # Calculate the 'cost matrix' of every permutation of IOU to behavior iou_cost = iou_cost_matrix(detection_nodes, behavior_detections) iou_cost[iou_cost > (1 - iou_threshold)] = 1 indices = linear_assignment(iou_cost) for det_index, beh_index in indices: det = detection_nodes[det_index] best_match = behavior_detections[beh_index] pose_confidence = best_match.extra_data[config.pose_confidence] if det.extra_data[config.pose_confidence] < pose_confidence: pose_iou = detection_iou(det, [best_match]) cost_iou = iou_cost[det_index][beh_index] if cost_iou >= 1: continue det.attributes[config.pose] = best_match.class_name det.extra_data[config.pose_confidence] = pose_confidence det.extra_data[config.pose_iou] = pose_iou # If you want to see the behavior detections as well, uncomment this # for b in behavior_detections: # b.attributes[opts.pose] = b.class_name # b.extra_data[opts.pose_iou] = 0 # self.detection_nodes += behavior_detections return detection_nodes
def update(self, det: DetectionNode): # Assign a Track ID to the detection (this communicates to brainframe) det.track_id = self.track_id self.detections.append(det) self._misses = 0 if (self.state is TrackState.tentative and len(self.detections) > self.n_hits_to_init): self.state = TrackState.confirmed
def parse_detection_results( self, results: np.ndarray, resize: Resize, label_map: Dict[int, str], min_confidence: float = 0.0, boxes_output_name: str = None, frame_input_name: str = None) -> List[DetectionNode]: """A helper method to take results from a detection-type network. :param results: The inference results from the network :param resize: A Resize object that was used to resize the image to fit into the network originally. :param label_map: A dictionary mapping integers to class_names. :param min_confidence: Filter out detections that have a confidence less than this number. :param boxes_output_name: The name of output that carries the bounding box information to be parsed. Default=self.output_blob_names[0] :param frame_input_name: The name of the input that took the frame in. :returns: A list of DetectionNodes, in this case representing bounding boxes. """ output_blob_name = boxes_output_name or self.output_blob_names[0] inference_results = results[output_blob_name] input_name = frame_input_name or self.input_blob_names[0] _, _, h, w = self.net.input_info[input_name].input_data.shape nodes: List[DetectionNode] = [] for result in inference_results[0][0]: # If the first index == 0, that's the end of real predictions # The network always outputs an array of length 200 even if it does # not have that many predictions if result[0] != 0: break confidence = float(result[2]) if confidence <= min_confidence: continue x_min, y_min, x_max, y_max = result[3:7] # x and y in res are in terms of percent of image width/height x_min, x_max = x_min * w, x_max * w y_min, y_max = y_min * h, y_max * h coords = [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]] class_id = round(result[1]) res = DetectionNode( name=label_map[class_id], coords=coords, extra_data={"detection_confidence": confidence}) nodes.append(res) # Convert the coordinate space of the detections from the # resized frame to the resize.scale_and_offset_detection_nodes(nodes) return nodes
def test_describes_error(): # Test that a ValueError gets raised when a DetectionNode has an attribute # with values that are not described by the NodeDescription node_desc = NodeDescription(size=NodeDescription.Size.SINGLE, attributes={"Gender": ["boy", "girl"]}) det_node = DetectionNode(name="irrelevant", coords=[[0, 0]] * 4, attributes={"Gender": "NOT EXISTENT VALUE"}) with pytest.raises(ValueError): node_desc.describes(det_node)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: prediction = self.send_to_batch(frame).get() return [ DetectionNode(name=det.name, coords=rect_to_coords(det.rect), extra_data={detection_confidence: det.confidence}) for det in prediction if det.name == "face" and det.confidence >= options["threshold"] ]
def test_size_filter(): node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 20, 20])) assert len(SizeFilter([node]).apply()) == 1 assert len(SizeFilter([node]).min_size(12, 12).max_size(100, 100).apply()) == 0 assert len(SizeFilter([node]).min_size(5, 5).max_size(8, 8).apply()) == 0 assert len(SizeFilter([node]).min_size(5, 5).max_size(15, 15).apply()) == 1 assert len(SizeFilter([node ]).min_area(10 * 10).max_area(11 * 11).apply()) == 1 assert len(SizeFilter([node ]).min_area(11 * 11).max_area(11 * 11).apply()) == 0 assert len(SizeFilter([node]).min_area(9 * 9).max_area(9 * 9).apply()) == 0
def test_clamp(): frame = np.zeros((800, 800, 3), dtype=np.uint8) clamp = Clamp(frame, 100, 100) assert clamp.apply().shape == (100, 100, 3) detection_node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 100, 100])) clamp.scale_detection_nodes([detection_node]) assert detection_node.bbox == BoundingBox(80, 80, 800, 800) frame = np.zeros((800, 600, 3), dtype=np.uint8) clamp = Clamp(frame, 100, 100) assert clamp.apply().shape == (100, 75, 3) detection_node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 100, 100])) clamp.scale_detection_nodes([detection_node]) assert detection_node.bbox == BoundingBox(80, 80, 800, 800) frame = np.zeros((600, 800, 3), dtype=np.uint8) clamp = Clamp(frame, 100, 100) assert clamp.apply().shape == (75, 100, 3) detection_node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 100, 100])) clamp.scale_detection_nodes([detection_node]) assert detection_node.bbox == BoundingBox(80, 80, 800, 800)
def process_frame(self, frame: np.ndarray, detection_node: None, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: """ :param frame: A numpy array of shape (height, width, 3) :param detection_node: None :param options: Example: {"threshold": 0.5}. Defined in Capsule class above. :param state: (Unused in this capsule) :return: A list of detections """ # Send the frame to the BrainFrame backend. This function will return a # queue. BrainFrame will batch_process() received frames and populate # the queue with the results. prediction_output_queue = self.send_to_batch(frame) # Wait for predictions predictions = prediction_output_queue.get() # Iterate through all the predictions received in this frame detection_nodes = [] for prediction in predictions: # Filter out detections that is not a face. if prediction.name != "face": continue # Filter out detection with low confidence. if prediction.confidence < options["threshold"]: continue # Create a DetectionNode for the prediction. It will be reused by # any other capsules that require a face DetectionNode in their # input type. An age classifier capsule would be an example of such # a capsule. new_detection = DetectionNode( name=prediction.name, # convert [x1, y1, x2, y2] to [[x1,y1], [x1, y2]...] coords=rect_to_coords(prediction.rect), extra_data={"detection_confidence": prediction.confidence}) detection_nodes.append(new_detection) return detection_nodes
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: n, c, h, w = self.detector.net.inputs['im_data'].shape hidden_shape = self.recognizer_decoder.net.inputs['prev_hidden'].shape input_dict, resize = self.detector.prepare_inputs( frame, frame_input_name="im_data") input_dict["im_data"] = (input_dict["im_data"].reshape( (n, c, h, w)).astype(np.float32)) input_image_size = self.detector.net.inputs['im_data'].shape[-2:] input_image_info = np.asarray( [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32) input_dict["im_info"] = input_image_info prediction = self.detector.send_to_batch(input_dict).get() scores = prediction["scores"] detections_filter = scores > options["threshold"] scores = scores[detections_filter] rects = prediction["boxes"][detections_filter] text_features = prediction["text_features"][detections_filter] feature_queues = [] for text_feature in text_features: feature_queues.append( self.recognizer_encoder.send_to_batch({'input': text_feature})) detections = [] for score, rect, feature_queue in zip(scores, rects, feature_queues): feature = feature_queue.get()['output'] feature = np.reshape(feature, (feature.shape[0], feature.shape[1], -1)) feature = np.transpose(feature, (0, 2, 1)) hidden = np.zeros(hidden_shape) prev_symbol_index = np.ones((1, )) * SOS_INDEX text = '' for _ in range(MAX_SEQ_LEN): decoder_output = self.recognizer_decoder.send_to_batch({ 'prev_symbol': prev_symbol_index, 'prev_hidden': hidden, 'encoder_outputs': feature }).get() symbols_distr = decoder_output['output'] prev_symbol_index = int(np.argmax(symbols_distr, axis=1)) if prev_symbol_index == EOS_INDEX: break text += ALPHABET[prev_symbol_index] hidden = decoder_output['hidden'] detections.append( DetectionNode( name="text", coords=rect_to_coords(rect.tolist()), extra_data={ "detection_confidence": float(score), "text": text }, )) return resize.scale_and_offset_detection_nodes(detections)
def process_frame(self, frame, detection_node: None, options, state): return [ DetectionNode(name="fake_box", coords=[[10, 10], [100, 10], [100, 100], [10, 100]]) ]
def test_resize_scale(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Single integer resize resize = Resize(frame).resize(10, 20, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(5, 5, 10, 10) # Double integer resize (note that coords are rounded in node.bbox output) resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.EXACT) \ .resize(20, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(4, 4, 5, 5) # Single crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 25, 25, 30) # Two affecting crops plus one that should not change the offset input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .crop(10, 15, Resize.CropPadType.RIGHT_BOTTOM) \ .crop(8, 5, Resize.CropPadType.ALL) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(21, 30, 26, 35) # Crop then resize input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .resize(30, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(13, 18, 15, 20) # Resize then crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .resize(30, 40, Resize.ResizeType.EXACT) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 26, 23, 30)
NodeDescription(size=NodeDescription.Size.SINGLE, tracked=True), NodeDescription(size=NodeDescription.Size.SINGLE, tracked=True), NodeDescription(size=NodeDescription.Size.SINGLE)) ] @pytest.mark.parametrize(('desc1', 'desc2', 'diff_1_2', 'diff_2_1'), DIFFERENCE_CASES) def test_node_description_difference(desc1, desc2, diff_1_2, diff_2_1): """Test comparing two node descriptions""" assert desc1.difference(desc2) == diff_1_2 assert desc2.difference(desc1) == diff_2_1 DESCRIPTION_CASES = [ (DetectionNode(name="person", coords=[[0, 0]] * 4), True, False, False, False, False, False, False), (DetectionNode(name="person", coords=[[0, 0]] * 4, encoding=np.array([1])), True, False, False, True, False, False, False), (DetectionNode(name="hair", coords=[[0, 0]] * 4, attributes={"Gender": "boy"}), False, False, True, False, False, False, False), (DetectionNode(name="cat", coords=[[0, 0]] * 4, attributes={ "Uniform": "Police", "Gender": "girl" }, encoding=np.ndarray([1, 2, 3, 4, 5])), False, False, True, False, True, True, False),