def make_detection_node(frame_shape,
                        node_description: NodeDescription) -> DetectionNode:
    """Creates a fake detection node that describes the given node description.
    :param frame_shape: The shape of the frame in (height, width, channels)
    :param node_description: The description that the returned node must
        adhere to
    :return: A fake detection node that adheres to this description
    """
    height, width, _ = frame_shape
    attributes = {category: random.choice(possible_values)
                  for category, possible_values in
                  node_description.attributes.items()}
    extra_data = {data_key: 0.5129319283
                  for data_key in node_description.extra_data}
    detection_names = node_description.detections

    # Create random coordinates for this detection
    x1 = random.randint(0, width - 3)
    y1 = random.randint(0, height - 3)
    x2 = x1 + random.randint(0, width - x1 + 1) + 2
    y2 = y1 + random.randint(0, height - y1 + 1) + 2

    return DetectionNode(
        name=random.choice(detection_names) if len(detection_names) else "N/A",
        coords=[[x1, y1], [x2, y1], [x2, y2], [x1, y2]],
        attributes=attributes,
        encoding=np.zeros((128,)) if node_description.encoded else None,
        track_id=uuid4() if node_description.tracked else None,
        extra_data=extra_data)
예제 #2
0
def test_crop():
    frame = np.zeros((100, 100, 3), dtype=np.uint8)

    cropped = Crop(30, 30, 40, 40).apply(frame)
    assert cropped.shape == (10, 10, 3)

    cropped = Crop(90, 90, 110, 110).apply(frame)
    assert cropped.shape == (9, 9, 3)

    cropped = Crop(-10, -10, 10, 10).apply(frame)
    assert cropped.shape == (10, 10, 3)

    cropped = Crop(0, 0, 100, 100).apply(frame)
    assert cropped.shape == (100, 100, 3)

    cropped = Crop(10, 10, 90, 90).apply(frame)
    assert cropped.shape == (80, 80, 3)

    cropped = Crop(10, 10, 20, 20).pad_percent(10, 10, 10, 10).apply(frame)
    assert cropped.shape == (12, 12, 3)

    cropped = Crop(10, 10, 20, 20).pad_px(10, 10, 10, 10).apply(frame)
    assert cropped.shape == (30, 30, 3)

    node = DetectionNode(name="person",
                         coords=rect_to_coords([10, 10, 20, 20]))
    cropped = Crop.from_detection(node).apply(frame)
    assert cropped.shape == (10, 10, 3)
예제 #3
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE], state: BaseStreamState):
        if options["scale_frame"]:
            max_frame_side_length = options["scale_frame_max_side_length"]
            clamp = Clamp(frame=frame,
                          max_width=max_frame_side_length,
                          max_height=max_frame_side_length)
            frame = clamp.apply()

        predictions = self.send_to_batch(frame).result()

        results = []

        # Convert all predictions with the required confidence that are people
        # to DetectionNodes
        for pred in predictions:
            if pred.confidence >= options["detection_threshold"] \
                    and pred.name == "person":
                results.append(
                    DetectionNode(name=pred.name,
                                  coords=rect_to_coords(pred.rect)))

        # If we scaled the frame down earlier before processing, we need to
        # scale detections back up to match the original frame size
        if options["scale_frame"]:
            clamp.scale_detection_nodes(results)

        return results
    def score(flow_id: str, frame: object) -> Tuple[str, PyDetectionBox]:
        img = frame_data_2_np_array(frame)
        if capsule.input_type.size is NodeDescription.Size.NONE:
            input_node = None
        else:
            input_node = DetectionNode(name='',
                                       coords=[[0, 0], [frame.width, 0],
                                               [frame.width, frame.height],
                                               [0, frame.height]])

        if capsule.input_type.size is NodeDescription.Size.ALL:
            input_node = [input_node]

        result = capsule.process_frame(frame=img,
                                       detection_node=input_node,
                                       options=capsule.default_options,
                                       state=capsule.stream_state())
        detection_box = PyDetectionBox(frame_id=frame.frame_id,
                                       engine_id='vision_capsules')
        if isinstance(result, list):
            for node in result:
                add_detection_node_2_detection_box(node, detection_box)
        elif isinstance(result, DetectionNode):
            add_detection_node_2_detection_box(result, detection_box)

        return flow_id, detection_box
예제 #5
0
    def process_frame(self, frame: np.ndarray,
                      detection_nodes: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        if len(detection_nodes) == 0:
            return detection_nodes

        confidence_threshold = options[config.confidence_threshold]
        iou_threshold = options[config.iou_threshold]

        prediction = self.send_to_batch(frame).get()

        behavior_detections = []
        for pred in prediction:
            if pred.confidence < confidence_threshold:
                continue
            if pred.name in config.ignore:
                continue
            det = DetectionNode(
                name=pred.name,
                coords=rect_to_coords(pred.rect),
                extra_data={config.pose_confidence: pred.confidence})
            behavior_detections.append(det)

        # Fill all detections with 'unknown' data
        for det in detection_nodes:
            det.attributes[config.pose] = "unknown"
            det.extra_data[config.pose_confidence] = 0
            det.extra_data[config.pose_iou] = 0

        # Exit early if there are no behavior detections (also empty lists cause
        # later lines to fail)
        if len(behavior_detections) == 0:
            return detection_nodes

        # Calculate the 'cost matrix' of every permutation of IOU to behavior
        iou_cost = iou_cost_matrix(detection_nodes, behavior_detections)
        iou_cost[iou_cost > (1 - iou_threshold)] = 1
        indices = linear_assignment(iou_cost)

        for det_index, beh_index in indices:
            det = detection_nodes[det_index]
            best_match = behavior_detections[beh_index]
            pose_confidence = best_match.extra_data[config.pose_confidence]
            if det.extra_data[config.pose_confidence] < pose_confidence:
                pose_iou = detection_iou(det, [best_match])
                cost_iou = iou_cost[det_index][beh_index]
                if cost_iou >= 1:
                    continue
                det.attributes[config.pose] = best_match.class_name
                det.extra_data[config.pose_confidence] = pose_confidence
                det.extra_data[config.pose_iou] = pose_iou

        # If you want to see the behavior detections as well, uncomment this
        # for b in behavior_detections:
        #     b.attributes[opts.pose] = b.class_name
        #     b.extra_data[opts.pose_iou] = 0
        # self.detection_nodes += behavior_detections
        return detection_nodes
예제 #6
0
 def update(self, det: DetectionNode):
     # Assign a Track ID to the detection (this communicates to brainframe)
     det.track_id = self.track_id
     self.detections.append(det)
     self._misses = 0
     if (self.state is TrackState.tentative
             and len(self.detections) > self.n_hits_to_init):
         self.state = TrackState.confirmed
    def parse_detection_results(
            self,
            results: np.ndarray,
            resize: Resize,
            label_map: Dict[int, str],
            min_confidence: float = 0.0,
            boxes_output_name: str = None,
            frame_input_name: str = None) -> List[DetectionNode]:
        """A helper method to take results from a detection-type network.
        :param results: The inference results from the network
        :param resize: A Resize object that was used to resize the image to
        fit into the network originally.
        :param label_map: A dictionary mapping integers to class_names.
        :param min_confidence: Filter out detections that have a confidence
        less than this number.
        :param boxes_output_name: The name of output that carries the bounding
        box information to be parsed. Default=self.output_blob_names[0]
        :param frame_input_name: The name of the input that took the frame in.

        :returns: A list of DetectionNodes, in this case representing bounding
        boxes.
        """
        output_blob_name = boxes_output_name or self.output_blob_names[0]
        inference_results = results[output_blob_name]

        input_name = frame_input_name or self.input_blob_names[0]
        _, _, h, w = self.net.input_info[input_name].input_data.shape

        nodes: List[DetectionNode] = []
        for result in inference_results[0][0]:
            # If the first index == 0, that's the end of real predictions
            # The network always outputs an array of length 200 even if it does
            # not have that many predictions
            if result[0] != 0:
                break

            confidence = float(result[2])
            if confidence <= min_confidence:
                continue

            x_min, y_min, x_max, y_max = result[3:7]
            # x and y in res are in terms of percent of image width/height
            x_min, x_max = x_min * w, x_max * w
            y_min, y_max = y_min * h, y_max * h
            coords = [[x_min, y_min], [x_max, y_min], [x_max, y_max],
                      [x_min, y_max]]

            class_id = round(result[1])
            res = DetectionNode(
                name=label_map[class_id],
                coords=coords,
                extra_data={"detection_confidence": confidence})
            nodes.append(res)

        # Convert the coordinate space of the detections from the
        # resized frame to the
        resize.scale_and_offset_detection_nodes(nodes)
        return nodes
def test_describes_error():
    # Test that a ValueError gets raised when a DetectionNode has an attribute
    # with values that are not described by the NodeDescription
    node_desc = NodeDescription(size=NodeDescription.Size.SINGLE,
                                attributes={"Gender": ["boy", "girl"]})
    det_node = DetectionNode(name="irrelevant",
                             coords=[[0, 0]] * 4,
                             attributes={"Gender": "NOT EXISTENT VALUE"})
    with pytest.raises(ValueError):
        node_desc.describes(det_node)
예제 #9
0
 def process_frame(self, frame: np.ndarray,
                   detection_node: DETECTION_NODE_TYPE,
                   options: Dict[str, OPTION_TYPE],
                   state: BaseStreamState) -> DETECTION_NODE_TYPE:
     prediction = self.send_to_batch(frame).get()
     return [
         DetectionNode(name=det.name,
                       coords=rect_to_coords(det.rect),
                       extra_data={detection_confidence: det.confidence})
         for det in prediction
         if det.name == "face" and det.confidence >= options["threshold"]
     ]
예제 #10
0
def test_size_filter():
    node = DetectionNode(name="person",
                         coords=rect_to_coords([10, 10, 20, 20]))

    assert len(SizeFilter([node]).apply()) == 1
    assert len(SizeFilter([node]).min_size(12, 12).max_size(100,
                                                            100).apply()) == 0
    assert len(SizeFilter([node]).min_size(5, 5).max_size(8, 8).apply()) == 0
    assert len(SizeFilter([node]).min_size(5, 5).max_size(15, 15).apply()) == 1
    assert len(SizeFilter([node
                           ]).min_area(10 * 10).max_area(11 * 11).apply()) == 1
    assert len(SizeFilter([node
                           ]).min_area(11 * 11).max_area(11 * 11).apply()) == 0

    assert len(SizeFilter([node]).min_area(9 * 9).max_area(9 * 9).apply()) == 0
예제 #11
0
def test_clamp():
    frame = np.zeros((800, 800, 3), dtype=np.uint8)
    clamp = Clamp(frame, 100, 100)
    assert clamp.apply().shape == (100, 100, 3)
    detection_node = DetectionNode(name="person",
                                   coords=rect_to_coords([10, 10, 100, 100]))
    clamp.scale_detection_nodes([detection_node])
    assert detection_node.bbox == BoundingBox(80, 80, 800, 800)

    frame = np.zeros((800, 600, 3), dtype=np.uint8)
    clamp = Clamp(frame, 100, 100)
    assert clamp.apply().shape == (100, 75, 3)
    detection_node = DetectionNode(name="person",
                                   coords=rect_to_coords([10, 10, 100, 100]))
    clamp.scale_detection_nodes([detection_node])
    assert detection_node.bbox == BoundingBox(80, 80, 800, 800)

    frame = np.zeros((600, 800, 3), dtype=np.uint8)
    clamp = Clamp(frame, 100, 100)
    assert clamp.apply().shape == (75, 100, 3)
    detection_node = DetectionNode(name="person",
                                   coords=rect_to_coords([10, 10, 100, 100]))
    clamp.scale_detection_nodes([detection_node])
    assert detection_node.bbox == BoundingBox(80, 80, 800, 800)
예제 #12
0
    def process_frame(self, frame: np.ndarray, detection_node: None,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        """
        :param frame: A numpy array of shape (height, width, 3)
        :param detection_node: None
        :param options: Example: {"threshold": 0.5}. Defined in Capsule class above.
        :param state: (Unused in this capsule)
        :return: A list of detections
        """

        # Send the frame to the BrainFrame backend. This function will return a
        # queue. BrainFrame will batch_process() received frames and populate
        # the queue with the results.
        prediction_output_queue = self.send_to_batch(frame)

        # Wait for predictions
        predictions = prediction_output_queue.get()

        # Iterate through all the predictions received in this frame
        detection_nodes = []
        for prediction in predictions:
            # Filter out detections that is not a face.
            if prediction.name != "face":
                continue
            # Filter out detection with low confidence.
            if prediction.confidence < options["threshold"]:
                continue

            # Create a DetectionNode for the prediction. It will be reused by
            # any other capsules that require a face DetectionNode in their
            # input type. An age classifier capsule would be an example of such
            # a capsule.
            new_detection = DetectionNode(
                name=prediction.name,
                # convert [x1, y1, x2, y2] to [[x1,y1], [x1, y2]...]
                coords=rect_to_coords(prediction.rect),
                extra_data={"detection_confidence": prediction.confidence})
            detection_nodes.append(new_detection)

        return detection_nodes
예제 #13
0
    def process_frame(self, frame: np.ndarray,
                      detection_node: DETECTION_NODE_TYPE,
                      options: Dict[str, OPTION_TYPE],
                      state: BaseStreamState) -> DETECTION_NODE_TYPE:
        n, c, h, w = self.detector.net.inputs['im_data'].shape
        hidden_shape = self.recognizer_decoder.net.inputs['prev_hidden'].shape

        input_dict, resize = self.detector.prepare_inputs(
            frame, frame_input_name="im_data")
        input_dict["im_data"] = (input_dict["im_data"].reshape(
            (n, c, h, w)).astype(np.float32))

        input_image_size = self.detector.net.inputs['im_data'].shape[-2:]
        input_image_info = np.asarray(
            [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32)
        input_dict["im_info"] = input_image_info
        prediction = self.detector.send_to_batch(input_dict).get()

        scores = prediction["scores"]
        detections_filter = scores > options["threshold"]
        scores = scores[detections_filter]
        rects = prediction["boxes"][detections_filter]
        text_features = prediction["text_features"][detections_filter]

        feature_queues = []
        for text_feature in text_features:
            feature_queues.append(
                self.recognizer_encoder.send_to_batch({'input': text_feature}))

        detections = []
        for score, rect, feature_queue in zip(scores, rects, feature_queues):
            feature = feature_queue.get()['output']
            feature = np.reshape(feature,
                                 (feature.shape[0], feature.shape[1], -1))
            feature = np.transpose(feature, (0, 2, 1))

            hidden = np.zeros(hidden_shape)
            prev_symbol_index = np.ones((1, )) * SOS_INDEX

            text = ''
            for _ in range(MAX_SEQ_LEN):
                decoder_output = self.recognizer_decoder.send_to_batch({
                    'prev_symbol':
                    prev_symbol_index,
                    'prev_hidden':
                    hidden,
                    'encoder_outputs':
                    feature
                }).get()
                symbols_distr = decoder_output['output']
                prev_symbol_index = int(np.argmax(symbols_distr, axis=1))
                if prev_symbol_index == EOS_INDEX:
                    break
                text += ALPHABET[prev_symbol_index]
                hidden = decoder_output['hidden']

            detections.append(
                DetectionNode(
                    name="text",
                    coords=rect_to_coords(rect.tolist()),
                    extra_data={
                        "detection_confidence": float(score),
                        "text": text
                    },
                ))
        return resize.scale_and_offset_detection_nodes(detections)
예제 #14
0
 def process_frame(self, frame, detection_node: None, options, state):
     return [
         DetectionNode(name="fake_box",
                       coords=[[10, 10], [100, 10], [100, 100], [10, 100]])
     ]
예제 #15
0
def test_resize_scale():

    input_width, input_height = 5, 10
    frame = np.arange(50, dtype=np.uint8).reshape((input_height, input_width))

    # Single integer resize
    resize = Resize(frame).resize(10, 20, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([10, 10, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(5, 5, 10, 10)

    # Double integer resize (note that coords are rounded in node.bbox output)
    resize = Resize(frame) \
        .resize(10, 20, Resize.ResizeType.EXACT) \
        .resize(20, 40, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(4, 4, 5, 5)

    # Single crop
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(20, 25, 25, 30)

    # Two affecting crops plus one that should not change the offset
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP) \
        .crop(10, 15, Resize.CropPadType.RIGHT_BOTTOM) \
        .crop(8, 5, Resize.CropPadType.ALL)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(21, 30, 26, 35)

    # Crop then resize
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP) \
        .resize(30, 40, Resize.ResizeType.EXACT)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(13, 18, 15, 20)

    # Resize then crop
    input_width, input_height = 20, 30
    frame = np.arange(600, dtype=np.uint8).reshape((input_height, input_width))
    resize = Resize(frame) \
        .resize(30, 40, Resize.ResizeType.EXACT) \
        .crop(15, 20, Resize.CropPadType.LEFT_TOP)
    node = DetectionNode(name="person",
                         coords=rect_to_coords([15, 15, 20, 20]))
    resize.scale_and_offset_detection_nodes([node])
    assert node.bbox == BoundingBox(20, 26, 23, 30)
     NodeDescription(size=NodeDescription.Size.SINGLE, tracked=True),
     NodeDescription(size=NodeDescription.Size.SINGLE, tracked=True),
     NodeDescription(size=NodeDescription.Size.SINGLE))
]


@pytest.mark.parametrize(('desc1', 'desc2', 'diff_1_2', 'diff_2_1'),
                         DIFFERENCE_CASES)
def test_node_description_difference(desc1, desc2, diff_1_2, diff_2_1):
    """Test comparing two node descriptions"""
    assert desc1.difference(desc2) == diff_1_2
    assert desc2.difference(desc1) == diff_2_1


DESCRIPTION_CASES = [
    (DetectionNode(name="person", coords=[[0, 0]] * 4), True, False, False,
     False, False, False, False),
    (DetectionNode(name="person", coords=[[0, 0]] * 4, encoding=np.array([1])),
     True, False, False, True, False, False, False),
    (DetectionNode(name="hair",
                   coords=[[0, 0]] * 4,
                   attributes={"Gender": "boy"}), False, False, True, False,
     False, False, False),
    (DetectionNode(name="cat",
                   coords=[[0, 0]] * 4,
                   attributes={
                       "Uniform": "Police",
                       "Gender": "girl"
                   },
                   encoding=np.ndarray([1, 2, 3, 4, 5])), False, False, True,
     False, True, True, False),