class CameraPublisher:
    def __init__(self, src=0):
        # initialize the video camera stream and read the first frame
        # from the stream
        self.stream = cv2.VideoCapture(src)
        if not self.stream.isOpened():
            raise Exception("Video/Camera device not found at: {}".format(src))

        self.pub = rospy.Publisher("camera", String)
        rospy.init_node("img_raw", anonymous=True)

        (self.grabbed, self.frame) = self.stream.read()

        # initialize the variable used to indicate if the thread should
        # be stopped
        self.stopped = False

        self.f = FPS()
        self.f.start()

    def start(self):
        # start the thread to read frames from the video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping infinitely until the thread is stopped
        while True:
            # if the thread indicator variable is set, stop the thread
            if self.stopped:
                return

            # otherwise, read the next frame from the stream
            (self.grabbed, self.frame) = self.stream.read()
            self.pub.publish(self.frame)
            self.f.update()

    def read(self):
        # return the frame most recently read
        return self.grabbed, self.frame

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
        self.f.stop()

    def get_dimensions(self):

        c = int(self.stream.get(3))
        r = int(self.stream.get(4))
        return r, c

    def get_raw_frames(self):
        return self.f.get_frames()
示例#2
0
def run_mask_detection(video_path,
                       detection_graph,
                       label_map,
                       categories,
                       category_index,
                       show_window,
                       visualize,
                       write_output,
                       ros_enabled,
                       usage_check,
                       graph_trace_enabled=False,
                       score_node=None,
                       expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    from tf_object_detection.utils import ops as utils_ops
    from PIL import Image
    from tf_object_detection.utils import visualization_utils as vis_util

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
                    ).get_tensor_by_name(tensor_name)

            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                         tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0],
                                       [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                       [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, r, c)
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'image_tensor:0')

            # Using the split model hack
            if score_node is not None and expand_node is not None:
                score_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores:0')
                expand_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1:0')
                score_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores_1:0')
                expand_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1_1:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        output_dict = sess.run(
                            tensor_dict,
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                    else:
                        raise Exception("Split model not supported for mask")

                    end = time.time()

                    boxes = output_dict['detection_boxes']
                    scores = output_dict['detection_scores']
                    classes = output_dict['detection_classes']

                    # all outputs are float32 numpy arrays, so convert types as appropriate
                    output_dict['num_detections'] = int(
                        output_dict['num_detections'][0])
                    output_dict['detection_classes'] = output_dict[
                        'detection_classes'][0].astype(np.uint8)
                    output_dict['detection_boxes'] = output_dict[
                        'detection_boxes'][0]
                    output_dict['detection_scores'] = output_dict[
                        'detection_scores'][0]
                    output_dict['detection_masks'] = output_dict[
                        'detection_masks'][0]

                    logger.info(output_dict['detection_masks'].shape)

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    if graph_trace_enabled:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        with open('graph_timeline.json', 'w') as f:
                            f.write(chrome_trace)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))
                    # get boxes that pass the min requirements and their pixel coordinates
                    filtered_boxes = parse_tf_output(curr_frame.shape, boxes,
                                                     scores, classes)

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        # drawn_img = overlay(curr_frame, category_index, filtered_boxes)
                        vis_util.visualize_boxes_and_labels_on_image_array(
                            curr_frame,
                            output_dict['detection_boxes'],
                            output_dict['detection_classes'],
                            output_dict['detection_scores'],
                            category_index,
                            instance_masks=output_dict.get('detection_masks'),
                            use_normalized_coordinates=True,
                            line_thickness=8)
                        if show_window:
                            window_name = "stream"
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.imshow(window_name, curr_frame)

                        if write_output:
                            trackedVideo.write(curr_frame)
                    else:
                        logger.info("".join([str(i) for i in filtered_boxes]))

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#3
0
def run_segmentation(video_path,
                     detection_graph,
                     label_map,
                     categories,
                     category_index,
                     show_window,
                     visualize,
                     write_output,
                     ros_enabled,
                     usage_check,
                     graph_trace_enabled=False,
                     score_node=None,
                     expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    from tf_object_detection.utils import ops as utils_ops
    from PIL import Image
    from tf_object_detection.utils import visualization_utils as vis_util

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }

            seg_tensor = "SemanticPredictions:0"
            image_tensor = tf.get_default_graph().get_tensor_by_name(
                'ImageTensor:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2RGB)
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        output_dict = sess.run(
                            seg_tensor,
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                    else:
                        raise Exception(
                            "Split model not supported for segmentation")

                    end = time.time()

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    if graph_trace_enabled:
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        with open('graph_timeline.json', 'w') as f:
                            f.write(chrome_trace)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        logger.warning("visualize not implmented!")

                    else:
                        logger.info(output_dict.shape)

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#4
0
def run_detection(video_path,
                  detection_graph,
                  label_map,
                  categories,
                  category_index,
                  show_window,
                  visualize,
                  write_output,
                  ros_enabled,
                  usage_check,
                  graph_trace_enabled=False,
                  score_node=None,
                  expand_node=None):

    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(allow_growth=True))

    labels_per_frame = []
    boxes_per_frame = []
    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if ros_enabled:
        from utils.ros_op import DetectionPublisher, CameraSubscriber
        pub = DetectionPublisher()
        sub = CameraSubscriber()

    if graph_trace_enabled:
        from tensorflow.python.client import timeline

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.debug("Frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    # Detection
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph, config=config) as sess:
            options = None
            run_metadata = None
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')

            # Using the split model hack
            if score_node is not None and expand_node is not None:
                score_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores:0')
                expand_out = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1:0')
                score_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/convert_scores_1:0')
                expand_in = detection_graph.get_tensor_by_name(
                    'Postprocessor/ExpandDims_1_1:0')

            if usage_check:
                fps = FPS().start()

            if graph_trace_enabled:
                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

            # Read video frame by frame and perform inference
            while (vid.is_running()):
                try:
                    # the array based representation of the image will be used later in order to prepare the
                    # result image with boxes and labels on it.
                    logger.debug("Frame {}".format(count))
                    retval, curr_frame = vid.read()

                    if not retval:
                        logger.info("Video ending at frame {}".format(count))
                        break

                    if show_window:
                        if cv2.waitKey(1) & 0xFF == ord('q'):
                            break

                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    curr_frame_expanded = np.expand_dims(curr_frame, axis=0)
                    curr_frame_expanded = np.int8(curr_frame_expanded)

                    # Actual detection.
                    start = time.time()
                    if score_node is None and expand_node is None:
                        (boxes, scores, classes) = sess.run(
                            [
                                detection_boxes, detection_scores,
                                detection_classes
                            ],
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_nosplit.json")

                    else:
                        # Split Detection in two sessions.
                        (score, expand) = sess.run(
                            [score_out, expand_out],
                            feed_dict={image_tensor: curr_frame_expanded},
                            options=options,
                            run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_conv.json")

                        (boxes, scores,
                         classes) = sess.run([
                             detection_boxes, detection_scores,
                             detection_classes
                         ],
                                             feed_dict={
                                                 score_in: score,
                                                 expand_in: expand
                                             },
                                             options=options,
                                             run_metadata=run_metadata)
                        if graph_trace_enabled:
                            write_trace(run_metadata, timeline,
                                        "graph_timeline_nms.json")

                    end = time.time()

                    if usage_check:
                        fps.update()
                        logger.info("Session run time: {:.4f}".format(end -
                                                                      start))
                        logger.info("Frame {}".format(count))
                        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                            cpu_usage_dump, mem_usage_dump, time_usage_dump,
                            timer)

                    (r, c, _) = curr_frame.shape
                    logger.debug("image height:{}, width:{}".format(r, c))
                    # get boxes that pass the min requirements and their pixel coordinates
                    filtered_boxes = parse_tf_output(curr_frame.shape, boxes,
                                                     scores, classes)

                    if ros_enabled:
                        # TODO: Send the detected info to other systems every frame
                        logger.info("Publishing bboxes")
                        logger.info("".join([str(i) for i in filtered_boxes]))
                        pub.send_boxes(filtered_boxes)

                    if write_output:
                        record.write(str(count) + "\n")
                        for i in range(len(filtered_boxes)):
                            record.write("{}\n".format(str(filtered_boxes[i])))

                    # Visualization of the results of a detection.
                    if visualize:
                        drawn_img = overlay(curr_frame, category_index,
                                            filtered_boxes)
                        if show_window:
                            window_name = "stream"
                            cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                            cv2.imshow(window_name, drawn_img)

                        if write_output:
                            trackedVideo.write(drawn_img)
                    else:
                        logger.info("".join([str(i) for i in filtered_boxes]))

                    count += 1

                    # Quick benchmarking
                    if timer.get_elapsed_time() >= 60:
                        break

                except KeyboardInterrupt:
                    logger.info("Ctrl + C Pressed. Attempting graceful exit")
                    break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()

    return labels_per_frame, boxes_per_frame
示例#5
0
    fps = FPS().start()

    while True:
        try:
            if not stream.is_connected():
                stop_application()

            image = stream.read()

            if image is False:
                t.sleep(0.02)
                continue

            result = od.detect_objects(image)

            fps.update()

            if result_server.is_client_connected():
                result_q.put(result)

            if args.visualize or args.create_result_video:
                image = v.draw_boxes_and_labels(image,
                                                result["recognizedObjects"])

            if args.create_result_video:
                if not result_video_writer.is_running(
                ) and t.time() - fps_dt > args.warmup:
                    current_fps = math.ceil(fps.fps(True))
                    result_video_writer.init("result", current_fps).start()

                if result_video_writer.is_thread_alive():
def run_detection(video_path,
                  model_path,
                  model_name,
                  weights_path,
                  classes,
                  show_window=True,
                  visualize=True,
                  write_output=False,
                  is_cpu=False,
                  ros_enabled=False,
                  usage_check=False):

    # Calling the class of the model
    spec = importlib.util.spec_from_file_location(model_name, model_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    Net = getattr(mod, model_name)

    model = Net(classes)
    model = torch.nn.DataParallel(model)
    model = load_my_state_dict(model, torch.load(weights_path))
    model.eval()

    cpu_usage_dump = ""
    mem_usage_dump = ""
    time_usage_dump = ""

    if not is_cpu:
        model = model.cuda()
    else:
        return Exception("[ERROR: CPU mode not implemented]")

    if usage_check:
        timer = Timer()
        logger.info("Initial startup")
        cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
            cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

    if ros_enabled:
        if not sub.is_running():
            return Exception("[ERROR: Camera Node not running]")
    else:
        vid = WebcamVideoStream(src=video_path).start()

    r, c = vid.get_dimensions()

    logger.info("Video frame width: {} height: {}".format(r, c))

    if write_output:
        trackedVideo = cv2.VideoWriter(
            'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
            (c, r))
        record = open("record.txt", "w")

    count = 0

    if usage_check:
        fps = FPS().start()

    # Read video frame by frame and perform inference
    while (vid.is_running()):
        try:
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            logger.debug("Frame {}".format(count))
            retval, curr_frame = vid.read()

            if not retval:
                logger.info("Video ending at frame {}".format(count))
                break

            if show_window:
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            start = time.time()

            # Convert numpy img to PyTorch Tensor, then expand dimension for model
            convert = Compose([ToTensor()])
            img_tensor = convert(curr_frame)
            img_tensor = img_tensor.unsqueeze(0)

            con = time.time()
            logger.debug("img conversion time: {:.4f}".format(con - start))

            if (not is_cpu):
                image = img_tensor.cuda()

            inputs = Variable(image, volatile=True)

            outputs = model(inputs)

            out = time.time()
            logger.debug("output time: {:.4f}".format(out - con))

            # Visualization only works on cpu tensor
            if visualize:
                label = outputs[0].max(
                    0)[1].byte().cpu().data  # Mask to be published
            else:
                label = outputs[0].max(
                    0)[1].byte().data  # Mask to be published

            l = time.time()
            logger.debug("labeling time: {:.4f}".format(l - out))

            end = time.time()

            if usage_check:
                fps.update()
                logger.info("Session run time: {:.4f}".format(end - start))
                logger.info("Frame {}".format(count))
                cpu_usage_dump, mem_usage_dump, time_usage_dump = show_usage(
                    cpu_usage_dump, mem_usage_dump, time_usage_dump, timer)

            # TODO: Publish Segmentation
            if ros_enabled:
                logger.info("Publishing segmengatation via ROS")
            else:
                logger.info("Publishing segmentation via custom module")

            # Visualization of the results of a detection.
            if visualize:
                # Visualizes based off of cityscape classes; this step takes a ton of time!
                label_color = Colorize()(label.unsqueeze(0))
                label_color = np.moveaxis(label_color.numpy(), 0, -1)
                label_color = label_color[..., ::-1]
                vis = time.time()
                logger.debug("visualization time: {:.4f}".format(vis - end))

                if show_window:
                    window_name = "stream"
                    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
                    cv2.imshow(window_name, label_color)

                if write_output:
                    trackedVideo.write(label_color)

            count += 1

            # # Quick benchmarking
            # if timer.get_elapsed_time() >= 60:
            #     break

        except KeyboardInterrupt:
            logger.info("Ctrl + C Pressed. Attempting graceful exit")
            break

    if usage_check:
        fps.stop()
        logger.info("[USAGE] elasped time: {:.2f}".format(fps.elapsed()))
        logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
        logger.info("[USAGE] inferenced frames: {}".format(fps.get_frames()))
        logger.info("[USAGE] raw frames: {}".format(vid.get_raw_frames()))
        logger.info("[USAGE] Total Time elapsed: {:.2f} seconds".format(
            timer.get_elapsed_time()))
        with open("cpu_usage.txt", "w") as c:
            c.write(cpu_usage_dump)
        with open("mem_usage.txt", "w") as m:
            m.write(mem_usage_dump)
        with open("time_usage.txt", "w") as t:
            t.write(time_usage_dump)

    vid.stop()

    logger.debug("Result: {} frames".format(count))

    if visualize:
        cv2.destroyAllWindows()

    if write_output:
        record.close()
        trackedVideo.release()
示例#7
0
    def detect(self):
        #TODO: make a generalized detection workflow
        labels_per_frame = []
        boxes_per_frame = []

        if self.benchmark:
            from benchmark.usage import Timer, UsageTracker
            self.logger.info("Initial startup")
            timer = Timer()
            usage = UsageTracker(timer)
            usage.get_usage()

        self.logger.debug("Frame width: {} height: {}".format(
            self.width, self.height))

        if self.write_output:
            self.trackedVideo = cv2.VideoWriter(
                'output.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 20.0,
                (self.width, self.height))
            self.record = open("record.txt", "w")

        count = 0

        if self.benchmark:
            fps = FPS().start()

        while self.feed.is_running:
            try:
                # the array based representation of the image will be used later in order to prepare the
                # result image with boxes and labels on it.
                self.logger.debug("Frame {}".format(count))
                retval, curr_frame = self.feed.read()

                if not retval:
                    self.logger.info("Video ending at frame {}".format(count))
                    break

                if self.show_stream:
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        break

                # Actual detection.
                start = time.time()
                output = self.model.inference(curr_frame)
                end = time.time()

                if self.task == "segmentation" and self.library == "pytorch":
                    mask = output.data

                if self.benchmark:
                    fps.update()
                    self.logger.info("Session run time: {:.4f}".format(end -
                                                                       start))
                    self.logger.info("Frame {}".format(count))
                    usage.get_usage()

                # TODO: Publish Output
                if self.ros_enabled:
                    self.logger.info("Publishing via ROS")
                else:
                    self.logger.info("Publishing via custom module")

                if self.show_stream:
                    #TODO: set which type of visualization to use based on task
                    if self.task == "segmentation" and self.library == "pytorch":
                        vis_output = output.cpu().data
                    self._visualize(self.task, vis_output)

                count += 1

                # Quick benchmarking
                if timer.get_elapsed_time() >= 60:
                    break

            except KeyboardInterrupt:
                self.logger.info("Ctrl + C Pressed. Attempting graceful exit")
                break

        if self.benchmark:
            fps.stop()
            self.logger.info("[USAGE] elasped time: {:.2f}".format(
                fps.elapsed()))
            self.logger.info("[USAGE] approx. FPS: {:.2f}".format(fps.fps()))
            self.logger.info("[USAGE] inferenced frames: {}".format(
                fps.get_frames()))
            self.logger.info("[USAGE] raw frames: {}".format(
                self.feed.get_raw_frames()))
            self.logger.info(
                "[USAGE] Total Time elapsed: {:.2f} seconds".format(
                    timer.get_elapsed_time()))
            usage.dump_usage()

        self.feed.stop()

        self.logger.debug("Result: {} frames".format(count))

        if self.show_stream:
            cv2.destroyAllWindows()

        if self.write_output:
            self.record.close()
            self.trackedVideo.release()
class WebcamVideoStream:
    def __init__(self, src=0, res=None):
        # initialize the video camera stream and read the first frame
        # from the stream
        self.stream = cv2.VideoCapture(src)
        if not self.stream.isOpened():
            raise Exception("Video/Camera device not found at: {}".format(src))

        (self.grabbed, self.frame) = self.stream.read()

        self.resize = None
        res = (480, 480)
        # res is a tuple of (width, height)
        if res is not None:
            self.resize = res

        # initialize the variable used to indicate if the thread should
        # be stopped
        self.stopped = False

        self.f = FPS()
        self.f.start()

    def start(self):
        # start the thread to read frames from the video stream
        t = Thread(target=self.update, args=())
        t.daemon = True
        t.start()
        return self

    def update(self):
        # keep looping infinitely until the thread is stopped
        while True:
            # if the thread indicator variable is set, stop the thread
            if self.stopped:
                return

            # otherwise, read the next frame from the stream
            (self.grabbed, self.frame) = self.stream.read()
            self.f.update()

    def read(self):
        # return the frame most recently read
        if self.resize is not None:
            self.frame = cv2.resize(self.frame, self.resize)
        return self.grabbed, self.frame

    def stop(self):
        # indicate that the thread should be stopped
        self.stopped = True
        self.f.stop()
        # TODO: Weird error "VIDIOC_DQBUF: Invalid argument"
        self.stream.release()

    def get_dimensions(self):
        if self.resize is not None:
            return self.resize[0], self.resize[1]

        c = int(self.stream.get(3))
        r = int(self.stream.get(4))
        return r, c

    def get_raw_frames(self):
        return self.f.get_frames()

    def is_running(self):
        if self.stopped:
            return False
        else:
            return True