示例#1
0
def run_mask_detection(video_path,
                       detection_graph,
                       label_map,
                       categories,
                       category_index,
                       show_window,
                       visualize,
                       write_output,
                       ros_enabled,
                       usage_check,
                       graph_trace_enabled=False,
                       score_node=None,
                       expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    from tf_object_detection.utils import ops as utils_ops
    from PIL import Image
    from tf_object_detection.utils import visualization_utils as vis_util

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)

            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                         tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0],
                                       [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                       [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, r, c)
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            # Using the split model hack
            if score_node is not None and expand_node is not None:
                score_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores:0')
                expand_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1:0')
                score_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores_1:0')
                expand_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1_1:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        output_dict = sess.run(
                            tensor_dict,
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                    else:
                        raise Exception("Split model not supported for mask")

                    end = time.time()

                    boxes = output_dict['detection_boxes']
                    scores = output_dict['detection_scores']
                    classes = output_dict['detection_classes']

                    # all outputs are float32 numpy arrays, so convert types as appropriate
                    output_dict['num_detections'] = int(
                        output_dict['num_detections'][0])
                    output_dict['detection_classes'] = output_dict[
                        'detection_classes'][0].astype(np.uint8)
                    output_dict['detection_boxes'] = output_dict[
                        'detection_boxes'][0]
                    output_dict['detection_scores'] = output_dict[
                        'detection_scores'][0]
                    output_dict['detection_masks'] = output_dict[
                        'detection_masks'][0]

                    logger.info(output_dict['detection_masks'].shape)

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    if graph_trace_enabled:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        with open('graph_timeline.json', 'w') as f:
                            f.write(chrome_trace)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))
                    # get boxes that pass the min requirements and their pixel coordinates
                    filtered_boxes = parse_tf_output(curr_frame.shape, boxes,
                                                     scores, classes)

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        # drawn_img = overlay(curr_frame, category_index, filtered_boxes)
                        vis_util.visualize_boxes_and_labels_on_image_array(
                            curr_frame,
                            output_dict['detection_boxes'],
                            output_dict['detection_classes'],
                            output_dict['detection_scores'],
                            category_index,
                            instance_masks=output_dict.get('detection_masks'),
                            use_normalized_coordinates=True,
                            line_thickness=8)
                        if show_window:
                            window_name = "stream"
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.imshow(window_name, curr_frame)

                        if write_output:
                            trackedVideo.write(curr_frame)
                    else:
                        logger.info("".join([str(i) for i in filtered_boxes]))

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#2
0
def run_segmentation(video_path,
                     detection_graph,
                     label_map,
                     categories,
                     category_index,
                     show_window,
                     visualize,
                     write_output,
                     ros_enabled,
                     usage_check,
                     graph_trace_enabled=False,
                     score_node=None,
                     expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    from tf_object_detection.utils import ops as utils_ops
    from PIL import Image
    from tf_object_detection.utils import visualization_utils as vis_util

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }

            seg_tensor = "SemanticPredictions:0"
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'ImageTensor:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2RGB)
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        output_dict = sess.run(
                            seg_tensor,
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                    else:
                        raise Exception(
                            "Split model not supported for segmentation")

                    end = time.time()

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    if graph_trace_enabled:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        with open('graph_timeline.json', 'w') as f:
                            f.write(chrome_trace)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        logger.warning("visualize not implmented!")

                    else:
                        logger.info(output_dict.shape)

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#3
0
def run_detection(video_path,
                  detection_graph,
                  label_map,
                  categories,
                  category_index,
                  show_window,
                  visualize,
                  write_output,
                  ros_enabled,
                  usage_check,
                  graph_trace_enabled=False,
                  score_node=None,
                  expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')

            # Using the split model hack
            if score_node is not None and expand_node is not None:
                score_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores:0')
                expand_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1:0')
                score_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores_1:0')
                expand_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1_1:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)
                    curr_frame_expanded = np.int8(curr_frame_expanded)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        (boxes, scores, classes) = sess.run(
                            [
                                detection_boxes, detection_scores,
                                detection_classes
                            ],
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_nosplit.json")

                    else:
                        # Split Detection in two sessions.
                        (score, expand) = sess.run(
                            [score_out, expand_out],
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_conv.json")

                        (boxes, scores,
                         classes) = sess.run([
                             detection_boxes, detection_scores,
                             detection_classes
                         ],
                                             feed_dict={
                                                 score_in: score,
                                                 expand_in: expand
                                             },
                                             options=options,
                                             run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_nms.json")

                    end = time.time()

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))
                    # get boxes that pass the min requirements and their pixel coordinates
                    filtered_boxes = parse_tf_output(curr_frame.shape, boxes,
                                                     scores, classes)

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        drawn_img = overlay(curr_frame, category_index,
                                            filtered_boxes)
                        if show_window:
                            window_name = "stream"
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.imshow(window_name, drawn_img)

                        if write_output:
                            trackedVideo.write(drawn_img)
                    else:
                        logger.info("".join([str(i) for i in filtered_boxes]))

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#4
0
    #     # draw the bounding box of the face along with the associated
    #     # probability
    #     text = "{:.2f}%".format(confidence * 100)
    #     y = startY - 10 if startY - 10 > 10 else startY + 10
    #     cv2.rectangle(frame, (startX, startY), (endX, endY),
    #                   (0, 0, 255), 2)
    #     cv2.putText(frame, text, (startX, y),
    #                 cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

    # show the output frame
    cv2.imshow("Didux.io", frame)
    cv2.setWindowProperty('Didux.io', cv2.WND_PROP_ASPECT_RATIO,
                          cv2.WINDOW_FREERATIO)
    cv2.setWindowProperty('Didux.io', cv2.WND_PROP_FULLSCREEN,
                          cv2.WINDOW_FULLSCREEN)
    fps.update()
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break
    if fps._numFrames < args["num_frames"]:
        fps.update()
    if fps._numFrames == args["num_frames"]:
        # stop the timer and display FPS information
        fps.stop()
        print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))
        print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))
        fps.start()

cv2.destroyAllWindows()
cv2.waitKey(1)
vs.stop()
def run_detection(video_path,
                  model_path,
                  model_name,
                  weights_path,
                  classes,
                  show_window=True,
                  visualize=True,
                  write_output=False,
                  is_cpu=False,
                  ros_enabled=False,
                  usage_check=False):

    # Calling the class of the model
    spec = importlib.util.spec_from_file_location(model_name, model_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    Net = getattr(mod, model_name)

    model = Net(classes)
    model = torch.nn.DataParallel(model)
    model = load_my_state_dict(model, torch.load(weights_path))
    model.eval()

    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if not is_cpu:
        model = model.cuda()
    else:
        return Exception("[ERROR: CPU mode not implemented]")

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.info("Video frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    if usage_check:
        fps = FPS().start()

    # Read video frame by frame and perform inference
    while (vid.is_running()):
        try:
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            logger.debug("Frame {}".format(count))
            retval, curr_frame = vid.read()

            if not retval:
                logger.info("Video ending at frame {}".format(count))
                break

            if show_window:
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            start = time.time()

            # Convert numpy img to PyTorch Tensor, then expand dimension for model
            convert = Compose([ToTensor()])
            img_tensor = convert(curr_frame)
            img_tensor = img_tensor.unsqueeze(0)

            con = time.time()
            logger.debug("img conversion time: {:.4f}".format(con - start))

            if (not is_cpu):
                image = img_tensor.cuda()

            inputs = Variable(image, volatile=True)

            outputs = model(inputs)

            out = time.time()
            logger.debug("output time: {:.4f}".format(out - con))

            # Visualization only works on cpu tensor
            if visualize:
                label = outputs[0].max(
                    0)[1].byte().cpu().data  # Mask to be published
            else:
                label = outputs[0].max(
                    0)[1].byte().data  # Mask to be published

            l = time.time()
            logger.debug("labeling time: {:.4f}".format(l - out))

            end = time.time()

            if usage_check:
                fps.update()
                logger.info("Session run time: {:.4f}".format(end - start))
                logger.info("Frame {}".format(count))
                cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                    cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

            # TODO: Publish Segmentation
            if ros_enabled:
                logger.info("Publishing segmengatation via ROS")
            else:
                logger.info("Publishing segmentation via custom module")

            # Visualization of the results of a detection.
            if visualize:
                # Visualizes based off of cityscape classes; this step takes a ton of time!
                label_color = Colorize()(label.unsqueeze(0))
                label_color = np.moveaxis(label_color.numpy(), 0, -1)
                label_color = label_color[..., ::-1]
                vis = time.time()
                logger.debug("visualization time: {:.4f}".format(vis - end))

                if show_window:
                    window_name = "stream"
                    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                    cv2.imshow(window_name, label_color)

                if write_output:
                    trackedVideo.write(label_color)

            count += 1

            # # Quick benchmarking
            # if timer.get_elapsed_time() >= 60:
            #     break

        except KeyboardInterrupt:
            logger.info("Ctrl + C Pressed. Attempting graceful exit")
            break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()
示例#6
0
    def detect(self):
        #TODO: make a generalized detection workflow
        labels_per_frame = []
        boxes_per_frame = []

        if self.benchmark:
            from benchmark.usage import Timer, UsageTracker
            self.logger.info("Initial startup")
            timer = Timer()
            usage = UsageTracker(timer)
            usage.get_usage()

        self.logger.debug("Frame width: {} height: {}".format(
            self.width, self.height))

        if self.write_output:
            self.trackedVideo = cv2.VideoWriter(
                'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
                (self.width, self.height))
            self.record = open("record.txt", "w")

        count = 0

        if self.benchmark:
            fps = FPS().start()

        while self.feed.is_running:
            try:
                # the array based representation of the image will be used later in order to prepare the
                # result image with boxes and labels on it.
                self.logger.debug("Frame {}".format(count))
                retval, curr_frame = self.feed.read()

                if not retval:
                    self.logger.info("Video ending at frame {}".format(count))
                    break

                if self.show_stream:
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

                # Actual detection.
                start = time.time()
                output = self.model.inference(curr_frame)
                end = time.time()

                if self.task == "segmentation" and self.library == "pytorch":
                    mask = output.data

                if self.benchmark:
                    fps.update()
                    self.logger.info("Session run time: {:.4f}".format(end -
                                                                       start))
                    self.logger.info("Frame {}".format(count))
                    usage.get_usage()

                # TODO: Publish Output
                if self.ros_enabled:
                    self.logger.info("Publishing via ROS")
                else:
                    self.logger.info("Publishing via custom module")

                if self.show_stream:
                    #TODO: set which type of visualization to use based on task
                    if self.task == "segmentation" and self.library == "pytorch":
                        vis_output = output.cpu().data
                    self._visualize(self.task, vis_output)

                count += 1

                # Quick benchmarking
                if timer.get_elapsed_time() >= 60:
                    break

            except KeyboardInterrupt:
                self.logger.info("Ctrl + C Pressed. Attempting graceful exit")
                break

        if self.benchmark:
            fps.stop()
            self.logger.info("[USAGE] elasped time: {:.2f}".format(
                fps.elapsed()))
            self.logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
            self.logger.info("[USAGE] inferenced frames: {}".format(
                fps.get_frames()))
            self.logger.info("[USAGE] raw frames: {}".format(
                self.feed.get_raw_frames()))
            self.logger.info(
                "[USAGE] Total Time elapsed: {:.2f} seconds".format(
                    timer.get_elapsed_time()))
            usage.dump_usage()

        self.feed.stop()

        self.logger.debug("Result: {} frames".format(count))

        if self.show_stream:
            cv2.destroyAllWindows()

        if self.write_output:
            self.record.close()
            self.trackedVideo.release()