def parse_detection_results( self, results: np.ndarray, resize: Resize, label_map: Dict[int, str], min_confidence: float = 0.0, boxes_output_name: str = None, frame_input_name: str = None) -> List[DetectionNode]: """A helper method to take results from a detection-type network. :param results: The inference results from the network :param resize: A Resize object that was used to resize the image to fit into the network originally. :param label_map: A dictionary mapping integers to class_names. :param min_confidence: Filter out detections that have a confidence less than this number. :param boxes_output_name: The name of output that carries the bounding box information to be parsed. Default=self.output_blob_names[0] :param frame_input_name: The name of the input that took the frame in. :returns: A list of DetectionNodes, in this case representing bounding boxes. """ output_blob_name = boxes_output_name or self.output_blob_names[0] inference_results = results[output_blob_name] input_name = frame_input_name or self.input_blob_names[0] _, _, h, w = self.net.input_info[input_name].input_data.shape nodes: List[DetectionNode] = [] for result in inference_results[0][0]: # If the first index == 0, that's the end of real predictions # The network always outputs an array of length 200 even if it does # not have that many predictions if result[0] != 0: break confidence = float(result[2]) if confidence <= min_confidence: continue x_min, y_min, x_max, y_max = result[3:7] # x and y in res are in terms of percent of image width/height x_min, x_max = x_min * w, x_max * w y_min, y_max = y_min * h, y_max * h coords = [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]] class_id = round(result[1]) res = DetectionNode( name=label_map[class_id], coords=coords, extra_data={"detection_confidence": confidence}) nodes.append(res) # Convert the coordinate space of the detections from the # resized frame to the resize.scale_and_offset_detection_nodes(nodes) return nodes
def prepare_inputs(self, frame: np.ndarray, frame_input_name: str = None) \ -> Tuple[OV_INPUT_TYPE, Resize]: """A helper method to create an OpenVINO input like {input_name: array} This method takes a frame, resizes it to fit the network inputs, then returns two things: The input, and the Resize information. The Resize information contains all of the operations that were done on the frame, allowing users to then map the detections from a resized frame to the coordinate space of the original frame. :param frame: The image. BGR ordered. :param frame_input_name: Set this value to force a certain node to be used as the frame input. Useful if you still want to use the default implementation from a subclass with network with multiple inputs :returns: ({input_name: resized_frame}, Resize) """ if not frame_input_name and len(self.net.inputs) > 1: raise ValueError("More than one input was expected for model, but " "default prepare_inputs implementation was used.") input_blob_name = frame_input_name or self.input_blob_names[0] input_blob = self.net.inputs[input_blob_name] _, _, h, w = input_blob.shape resize = Resize(frame).resize(w, h, Resize.ResizeType.EXACT) # Change data layout from HWC to CHW in_frame = np.transpose(resize.frame.copy(), (2, 0, 1)) return {input_blob_name: in_frame}, resize
def test_resize_pad(): input_width, input_height = 10, 5 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Pad bottom/right, then top/left frame_resize = Resize(frame) \ .pad(13, 9, 255, Resize.CropPadType.RIGHT_BOTTOM) \ .pad(17, 11, 254, Resize.CropPadType.LEFT_TOP) \ .frame frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape( (input_height, input_width)), ((0, 4), (0, 3)), 'constant', constant_values=255) frame_expected = np.pad(frame_expected, ((2, 0), (4, 0)), 'constant', constant_values=254) assert frame_resize.shape[1] == 17 assert frame_resize.shape[0] == 11 assert (frame_resize == frame_expected).all() # Pad all around frame_resize = Resize(frame) \ .pad(13, 9, 255, Resize.CropPadType.ALL) \ .frame frame_expected = np.pad(np.arange(50, dtype=np.uint8).reshape( (input_height, input_width)), ((2, 2), (1, 2)), 'constant', constant_values=255) assert frame_resize.shape[1] == 13 assert frame_resize.shape[0] == 9 # noinspection PyUnresolvedReferences assert (frame_resize == frame_expected).all() # Crop larger than frame should return frame frame_resize = Resize(frame).pad(9, 4, -1, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all() # CropPadType.NONE should be a nop frame_resize = Resize(frame).pad(-1, -1, -1, Resize.CropPadType.NONE).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all()
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() max_color = config.colors[prediction["color"].argmax()] max_type = config.vehicle_types[prediction["type"].argmax()] detection_node.attributes["color"] = max_color
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() emotion_id = int(prediction["prob_emotion"].argmax()) emotion = EMOTION_TYPES[emotion_id] emotion_score = float(prediction["prob_emotion"].flatten()[emotion_id]) detection_node.attributes["emotion"] = emotion detection_node.extra_data["emotion_confidence"] = emotion_score
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() # Convert prediction to a label probability = prediction["fc5"].flatten()[0] threshold = options["threshold"] label = self.LABELS[int(probability > threshold)] detection_node.attributes["mask"] = label detection_node.extra_data["mask_confidence"] = float(probability)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() age = int(prediction['age_conv3'] * 100) gender_id = prediction['prob'].argmax() gender = config.genders[gender_id] gender_confidence = float(prediction['prob'].flatten()[gender_id]) detection_node.extra_data['age'] = age detection_node.attributes['gender'] = gender detection_node.extra_data['gender_confidence'] = gender_confidence detection_node.attributes['age'] = _get_age_bin(age)
def process_frame(self, frame: np.ndarray, detection_node: DETECTION_NODE_TYPE, options: Dict[str, OPTION_TYPE], state: BaseStreamState) -> DETECTION_NODE_TYPE: crop = Resize(frame).crop_bbox(detection_node.bbox).frame input_dict, _ = self.prepare_inputs(crop) prediction = self.send_to_batch(input_dict).get() prediction = prediction['453'].flatten() # Iterate over predictions and add attributes accordingly for attribute_key, confidence in zip(ATTRIBUTES.keys(), prediction): attribute = ATTRIBUTES[attribute_key][ 0 if confidence >= 0.5 else 1] option_key = f"{attribute}_confidence" # The confidence value is remapped to create 2 confidence # thresholds for the attribute; one for how confident it is in the # upper range, the other for the confidence in the lower range. remapped_confidence = abs(confidence - 0.5) * 2 float_option = options[option_key] detection_node.attributes[attribute_key] = ( attribute if remapped_confidence > float_option else ATTRIBUTES[attribute_key][2])
def test_resize_crop(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Simple right/bottom crop frame_resize = Resize(frame) \ .crop(3, 4, Resize.CropPadType.RIGHT_BOTTOM) \ .frame # noinspection PyUnresolvedReferences assert (frame[:4, :3] == frame_resize).all() # Simple top/left crop frame_resize = Resize(frame).crop(4, 3, Resize.CropPadType.LEFT_TOP).frame # noinspection PyUnresolvedReferences assert (frame[-3:, -4:] == frame_resize).all() # Crop starting at a point frame_resize = Resize(frame) \ .crop(2, 7, Resize.CropPadType.CROP_START_POINT, top_left=(1, 3)).frame # noinspection PyUnresolvedReferences assert (frame[3:10, 1:3] == frame_resize).all() # Crop all sides (keep center) frame_resize = Resize(frame).crop(2, 3, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame[3:6, 1:3] == frame_resize).all() # Crop larger than frame should return frame frame_resize = Resize(frame).crop(6, 11, Resize.CropPadType.ALL).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all() # CropPadType.NONE should be a nop frame_resize = Resize(frame).crop(-1, -1, Resize.CropPadType.NONE).frame # noinspection PyUnresolvedReferences assert (frame == frame_resize).all()
def test_resize(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Basic resize up frame_resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 frame_resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 # Resize up where target aspect ratio is wider than source frame_resize = Resize(frame) \ .resize(30, 30, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 frame_resize = Resize(frame) \ .resize(30, 30, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 30 assert frame_resize.shape[0] == 60 # Resize up where target aspect ratio is taller than source frame_resize = Resize(frame) \ .resize(10, 30, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 10 assert frame_resize.shape[0] == 20 frame_resize = Resize(frame) \ .resize(10, 30, Resize.ResizeType.FIT_ONE).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 # Resize to width frame_resize = Resize(frame).resize(30, -1, Resize.ResizeType.WIDTH).frame assert frame_resize.shape[1] == 30 assert frame_resize.shape[0] == 60 # Resize to height frame_resize = Resize(frame).resize(-1, 30, Resize.ResizeType.HEIGHT).frame assert frame_resize.shape[1] == 15 assert frame_resize.shape[0] == 30 # Resize exactly frame_resize = Resize(frame).resize(8, 7, Resize.ResizeType.EXACT).frame assert frame_resize.shape[1] == 8 assert frame_resize.shape[0] == 7 # Resize where the scaling is not an integer # Round up frame_resize = Resize(frame) \ .resize(10, 15, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 8 assert frame_resize.shape[0] == 15 # Round down input_width, input_height = 15, 4 frame = np.arange(60, dtype=np.uint8).reshape((input_height, input_width)) frame_resize = Resize(frame) \ .resize(20, 10, Resize.ResizeType.FIT_BOTH).frame assert frame_resize.shape[1] == 20 assert frame_resize.shape[0] == 5
def test_resize_scale(): input_width, input_height = 5, 10 frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width)) # Single integer resize resize = Resize(frame).resize(10, 20, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([10, 10, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(5, 5, 10, 10) # Double integer resize (note that coords are rounded in node.bbox output) resize = Resize(frame) \ .resize(10, 20, Resize.ResizeType.EXACT) \ .resize(20, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(4, 4, 5, 5) # Single crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 25, 25, 30) # Two affecting crops plus one that should not change the offset input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .crop(10, 15, Resize.CropPadType.RIGHT_BOTTOM) \ .crop(8, 5, Resize.CropPadType.ALL) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(21, 30, 26, 35) # Crop then resize input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) \ .resize(30, 40, Resize.ResizeType.EXACT) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(13, 18, 15, 20) # Resize then crop input_width, input_height = 20, 30 frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width)) resize = Resize(frame) \ .resize(30, 40, Resize.ResizeType.EXACT) \ .crop(15, 20, Resize.CropPadType.LEFT_TOP) node = DetectionNode(name="person", coords=rect_to_coords([15, 15, 20, 20])) resize.scale_and_offset_detection_nodes([node]) assert node.bbox == BoundingBox(20, 26, 23, 30)