def intruder_detector():
    """
    Process the input source frame by frame and detects intruder, if any.

    :return status: 0 on success, negative value on failure
    """
    global CONF_CANDIDATE_CONFIDENCE
    global LOG_WIN_HEIGHT
    global LOG_WIN_WIDTH
    global CONFIG_FILE
    global video_caps
    global conf_labels_file_path
    global is_async_mode
    global UI
    global LOOP_VIDEO

    parse_args()
    ret = check_args()
    if ret != 0:
        return ret, ""

    if not os.path.isfile(CONFIG_FILE):
        return -12, ""

    if not os.path.isfile(conf_labels_file_path):
        return -13, ""

    # Creates subdirectory to save output snapshots
    pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True)

    # Read the configuration file
    ret, req_labels = get_input()
    if ret != 0:
        return ret, req_labels[0]

    if not video_caps:
        return -14, ''

    # Get the labels that are used in the application
    ret, label_names, used_labels = get_used_labels(req_labels)
    if ret != 0:
        return ret, ''
    if True not in used_labels:
        return -15, ''

    # Init a rolling log to store events
    rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20)
    log_list = collections.deque(maxlen=rolling_log_size)

    # Open a file for intruder logs
    log_file = open(LOG_FILE_PATH, 'w')
    if not log_file:
        return -16, ''

    # Initializing VideoWriter for each source
    if UI and not LOOP_VIDEO:
        for video_cap in video_caps:
            ret, ret_value = video_cap.init_vw(int(video_cap.input_height),
                                               int(video_cap.input_width))
            if ret != 0:
                return ret, ret_value

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2,
                                          CPU_EXTENSION)[1]
    # Arrange windows so that they are not overlapping
    arrange_windows()

    min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps])
    signal.signal(
        signal.SIGINT,
        signal_handler,
    )
    no_more_data = [False] * len(video_caps)
    start_time = time.time()
    inf_time = 0
    next_request_id = 1
    cur_request_id = 0
    # Main loop starts here. Loop over all the video captures

    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")

    while True:
        for idx, video_cap in enumerate(video_caps):
            # Get a new frame
            vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_fps))):
                if is_async_mode:
                    ret, video_cap.next_frame = video_cap.vc.read()
                else:
                    ret, video_cap.frame = video_cap.vc.read()
                video_cap.loop_frames += 1
                # If no new frame or error in reading a frame, exit the loop
                if not ret:
                    no_more_data[idx] = True
                    break
            if no_more_data[idx]:
                stream_end_frame = numpy.zeros((int(
                    video_cap.input_height), int(video_cap.input_width), 1),
                                               dtype='uint8')
                stream_end_message = "Stream from {} has ended.".format(
                    video_cap.cam_name)
                cv2.putText(stream_end_frame, stream_end_message,
                            (int(video_cap.input_width / 2) - 30,
                             int(video_cap.input_height / 2) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                cv2.imshow(video_cap.cam_name, stream_end_frame)
                continue
            for i in range(video_cap.no_of_labels):
                video_cap.current_count[i] = 0
                video_cap.changed_count[i] = False

            # Resize to expected size (in model .xml file)
            # Input frame is resized to infer resolution
            if is_async_mode:
                in_frame = cv2.resize(video_cap.next_frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))
                in_frame = in_frame.reshape((n, c, h, w))

                # Start asynchronous inference for specified request.
                infer_network.exec_net(next_request_id, in_frame)
                video_cap.frame = video_cap.next_frame
                # Async enabled and only one video capture
                if len(video_caps) == 1:
                    videoCapResult = video_cap
                # Async enabled and more than one video capture
                else:
                    # Get previous index
                    videoCapResult = video_caps[idx - 1 if idx -
                                                1 >= 0 else len(video_caps) -
                                                1]

            else:
                in_frame = cv2.resize(video_cap.frame, (w, h))
                in_frame = in_frame.transpose((2, 0, 1))
                in_frame = in_frame.reshape((n, c, h, w))

                # Start synchronous inference for specified request.
                infer_network.exec_net(cur_request_id, in_frame)
                videoCapResult = video_cap

            inf_start = time.time()
            # Wait for the result
            if infer_network.wait(cur_request_id) == 0:
                inf_time = time.time() - inf_start
                # Results of the output layer of the network
                res = infer_network.get_output(cur_request_id)
                for obj in res[0][0]:
                    label = int(obj[1]) - 1
                    # Draw the bounding box around the object when the probability is more than specified threshold
                    if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]:
                        videoCapResult.current_count[label] += 1
                        xmin = int(obj[3] * videoCapResult.input_width)
                        ymin = int(obj[4] * videoCapResult.input_height)
                        xmax = int(obj[5] * videoCapResult.input_width)
                        ymax = int(obj[6] * videoCapResult.input_height)
                        # Draw bounding box around the intruder detected
                        cv2.rectangle(videoCapResult.frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                for i in range(videoCapResult.no_of_labels):
                    if videoCapResult.candidate_count[
                            i] == videoCapResult.current_count[i]:
                        videoCapResult.candidate_confidence[i] += 1
                    else:
                        videoCapResult.candidate_confidence[i] = 0
                        videoCapResult.candidate_count[
                            i] = videoCapResult.current_count[i]

                    if videoCapResult.candidate_confidence[
                            i] == CONF_CANDIDATE_CONFIDENCE:
                        videoCapResult.candidate_confidence[i] = 0
                        videoCapResult.changed_count[i] = True
                    else:
                        continue

                    if videoCapResult.current_count[
                            i] > videoCapResult.last_correct_count[i]:
                        videoCapResult.total_count[
                            i] += videoCapResult.current_count[
                                i] - videoCapResult.last_correct_count[i]
                        det_objs = videoCapResult.current_count[
                            i] - videoCapResult.last_correct_count[i]
                        total_count = sum(videoCapResult.total_count)
                        for det_obj in range(det_objs):
                            current_time = time.strftime("%H:%M:%S")
                            log = "{} - Intruder {} detected on {}".format(
                                current_time, label_names[i],
                                videoCapResult.cam_name)
                            log_list.append(log)
                            log_file.write(log + "\n")
                            event = Event(event_time=current_time,
                                          intruder=label_names[i],
                                          count=total_count,
                                          frame=videoCapResult.frame_count)
                            videoCapResult.events.append(event)

                        snapshot_name = "output/intruder_{}.png".format(
                            total_count)
                        cv2.imwrite(snapshot_name, videoCapResult.frame)
                    videoCapResult.last_correct_count[
                        i] = videoCapResult.current_count[i]

                # Create intruder log window, add logs to the frame and display it
                log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1),
                                         dtype='uint8')
                for i, log in enumerate(log_list):
                    cv2.putText(log_window, log, (10, 20 * i + 15),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                                1)
                cv2.imshow("Intruder Log", log_window)
                videoCapResult.frame_count += 1

                # Video output
                if UI and not LOOP_VIDEO:
                    videoCapResult.vw.write(videoCapResult.frame)

                log_message = "Async mode is on." if is_async_mode else \
                    "Async mode is off."
                cv2.putText(videoCapResult.frame, log_message,
                            (10, int(videoCapResult.input_height) - 50),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (200, 10, 10), 1)
                inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
                    "Inference time: {:.3f} ms".format(inf_time * 1000)
                cv2.putText(videoCapResult.frame, inf_time_message,
                            (10, int(videoCapResult.input_height) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
                fps_time = time.time() - start_time
                fps_message = "FPS: {:.3f} fps".format(1 / fps_time)
                cv2.putText(videoCapResult.frame, fps_message,
                            (10, int(videoCapResult.input_height) - 10),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

                # Display the video output
                cv2.imshow(videoCapResult.cam_name, videoCapResult.frame)

            start_time = time.time()

            # Loop video to mimic continuous input if LOOP_VIDEO flag is True
            if LOOP_VIDEO and not videoCapResult.is_cam:
                vfps = int(round(videoCapResult.vc.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if videoCapResult.loop_frames > videoCapResult.vc.get(
                        cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)):
                    videoCapResult.loop_frames = 0
                    videoCapResult.vc.set(cv2.CAP_PROP_POS_FRAMES, 0)

            if is_async_mode:
                # Swap infer request IDs
                cur_request_id, next_request_id = next_request_id, cur_request_id

        if cv2.waitKey(1) == 27:
            break

        if cv2.waitKey(1) == 9:
            is_async_mode = not is_async_mode
            print("Switched to {} mode".format(
                "async" if is_async_mode else "sync"))

        if False not in no_more_data:
            break

    ret = save_json()
    if ret != 0:
        return ret, ''

    infer_network.clean()
    log_file.close()
    return [0, '']
示例#2
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    # prob_threshold = args.prob_threshold
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    time_on_video = 0
    time_not_on_video = 0
    image_mode = False
    positive_count = 0
    ### TODO: Load the model through `infer_network` ###
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]
    ### TODO: Handle the input stream ###
    # Checks for image input
    if args.input.endswith('.jpg') or args.input.endswith('.png') or \
            args.input.endswith('.bmp'):
        image_mode = True
        media_stream = args.input

    # Checks for webcam input
    elif args.input == 'CAM':
        media_stream = 0

    # Check for video input
    else:
        media_stream = args.input
        assert os.path.isfile(args.input)

    ### TODO: Loop until stream is over ###
    capture = cv2.VideoCapture(media_stream)

    if media_stream:
        capture.open(args.input)

    if not capture.isOpened():
        log.error("Not able to open the video file!")

        ### TODO: Read from the video capture ###
    # global width, height, prob_threshold
    prob_threshold = args.prob_threshold
    width = capture.get(3)
    height = capture.get(4)

    while capture.isOpened():
        check, frame = capture.read()
        if not check:
            break

        ### TODO: Pre-process the image as needed ###
        image = cv2.resize(frame, (w, h))
        image = image.transpose(2, 0, 1)
        image = image.reshape(n, c, h, w)

        ### TODO: Start asynchronous inference for specified request ###
        inference_start = time.time()
        infer_network.exec_net(cur_request_id, image)

        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            inference_time = time.time() - inference_start

            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(cur_request_id)

            # if perf_counts:
            # perf_count = infer_network.exec_net(cur_request_id)
            # performance_counts(perf_count)

            ### TODO: Extract any desired stats from the results ###
            current_count = 0
            track_frames = {}
            track_person = {positive_count: 0}
            frame_count = 0

            for character in result[0][0]:
                if character[2] > prob_threshold:
                    frame_count += 1
                    track_frames[frame_count] = character[2]
                    start_time_not_on_video = time.time()
                    positive_count += 1
                    track_person[positive_count] = time_on_video
                    xmin = int(character[3] * width)
                    ymin = int(character[4] * height)
                    xmax = int(character[5] * width)
                    ymax = int(character[6] * height)
                    frame = cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                          (0, 55, 255), 1)

                    time_on_video = start_time_not_on_video - start_time
                    if time_on_video > 3:
                        if current_count > 1:
                            current_count = last_count
                        else:
                            current_count += 1
                    else:
                        current_count = last_count

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if current_count > last_count:
                start_time = time.time()
                time_not_on_video = time.time() - start_time_not_on_video
                if current_count == 1 and last_count == 0:
                    if time_on_video > 2:
                        total_count = total_count + current_count - last_count

            client.publish("person", json.dumps({"total": total_count}))
            if current_count < last_count:
                if current_count == 0:
                    start_time_not_on_video = time.time()
                time_on_video = int(time.time() - start_time)
                if last_count == 0 and time_not_on_video < 0.005:
                    time_on_video = track_person[positive_count] + time_on_video
                client.publish("person/duration",
                               json.dumps({"duration": time_on_video}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            cv2.putText(
                frame, "Inference time =  {:.2f} ms".format(
                    (inference_time * 1000)), (15, 15),
                cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            cv2.putText(frame,
                        "Persons in video frame = {:}".format(last_count),
                        (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(frame, "Total count = {:}".format(total_count),
                        (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(frame,
                        "Time on video = {:.2f} s".format(time_on_video),
                        (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(
                frame, "Time not on video = {:.3f} s".format(
                    time_not_on_video * 1000), (15, 75),
                cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            key = cv2.waitKey(15)
            if key == ord('q'):
                break

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if image_mode:
            cv2.imwrite('output.jpg', frame)

        # cv2.imshow('frame', frame)

    capture.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
def main():
    """
    Load the network and parse the SSD output.

    :return: None
   """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    model = os.environ['MODEL']
    device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU'
    cpu_extension = os.environ[
        'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None

    # Checks for live feed
    if os.environ['INPUT'] == 'CAM':
        input_stream = 0

    # Checks for input image
    elif os.environ['INPUT'].endswith('.jpg') or os.environ['INPUT'].endswith(
            '.bmp'):
        single_image_mode = True
        input_stream = os.environ['INPUT']

    # Checks for video file
    else:
        input_stream = os.environ['INPUT']
        assert os.path.isfile(
            os.environ['INPUT']), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(os.environ['INPUT'])

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model, device, 1, 1, cur_request_id,
                                          cpu_extension)[1]
    global initial_w, initial_h
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cmdstring = (
        'ffmpeg',
        '-y',
        '-r',
        '%d' % (fps),  # overwrite, 60fps
        '-s',
        '%dx%d' % (initial_w, initial_h),  # size of image string
        '-pixel_format',
        'bgr24',  # format
        '-f',
        'rawvideo',
        '-i',
        '-',  # tell ffmpeg to expect raw video from the pipe
        'http://localhost:8090/fac.ffm')  # output encoding
    p = subprocess.Popen(cmdstring, stdin=subprocess.PIPE)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start async inference
        inf_start = time.time()
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        # Start asynchronous inference for specified request.
        infer_network.exec_net(cur_request_id, image)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if os.environ['PERF_COUNTS'] > str(0):
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)
            frame, current_count = ssd_parser(frame, result)
            inf_time_message = "Inference time: {:.3f}ms" \
                .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        p.stdin.write(frame.tostring())
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
            infer_network.clean()
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#4
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    args = build_argparser().parse_args()
    single_image_mode = False

    # Initialise the class
    infer_network = Network()
    model = args.model
    video_file = args.input
    extnsn = args.cpu_extension
    device = args.device

    start_time = 0
    cur_request_id = 0
    last_count = 0
    total_count = 0

    n, c, h, w = infer_network.load_model(model, device, 1, 1, cur_request_id,
                                          extnsn)[1]

    ### TODO: Handle the input stream ###
    # Checks for live feed
    if video_file == 'CAM':
        input_stream = 0

    # Checks for input image
    elif video_file.endswith('.jpg') or video_file.endswith('.bmp'):
        single_image_mode = True
        input_stream = video_file

    else:
        input_stream = video_file
        assert os.path.isfile(video_file), "File doesn't exist"

    try:
        # Capture video
        capture = cv2.VideoCapture(video_file)
    except FileNotFoundError:
        print("Cannot locate the file: " + video_file)
    except Exception as e:
        print("Something went wrong with the file: " + e)

    global initial_w, initial_h, prob_threshold
    total_count = 0
    duration = 0
    initial_w = capture.get(3)
    initial_h = capture.get(4)
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    temp = 0
    tk = 0
    #Loop until stream is over
    while capture.isOpened():
        flag, frame = capture.read()
        if not flag:
            break

        key_pressed = cv2.waitKey(60)

        #Pre-processing the input/frame
        image = cv2.resize(frame, (w, h))
        image = image.transpose((2, 0, 1))
        image.reshape((n, c, h, w))

        #Async inference
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)
        color = (255, 0, 0)

        #Waiting for result
        if infer_network.wait(cur_request_id) == 0:
            time_elapsed = time.time() - inf_start

            #Result from the inference
            result = infer_network.get_output(cur_request_id)

            #Bounting box
            frame, current_count, d, tk = draw_box(result, frame, initial_w,
                                                   initial_h, temp, tk)

            #inference time
            inf_timemsg = "Inference Time: {:,3f}ms".format(time_elapsed *
                                                            1000)
            cv2.putText(frame, inf_timemsg, (15, 15), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, color, 1)

            #Calculating and sending info
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            if current_count < last_count:
                duration = int(time.time() - start_time)
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            text_2 = "Distance: %d" % d + " Lost frame: %d" % tk
            cv2.putText(frame, text_2, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        color, 1)

            text_2 = "Current count: %d" % current_count
            cv2.putText(frame, text_2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        color, 1)

            if current_count > 3:
                text_2 = "Maximum count reached!!!"
                (text_width,
                 text_height) = cv2.getTextSize(text_2,
                                                cv2.FONT_HERSHEY_COMPLEX,
                                                0.5,
                                                thickness=1)[0]
                text_offset_x = 10
                text_offset_y = frame.shape[0] - 10
                box_coords = ((text_offset_x, text_offset_y + 2),
                              (text_offset_x + text_width,
                               text_offset_y - text_height - 2))
                cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0),
                              cv2.FILLED)
                cv2.putText(frame, text_2, (text_offset_x, text_offset_y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)

            client.publish("person", json.dumps({"count": current_count}))

            last_count = current_count
            temp = d
            if key_pressed == 27:
                break

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        #Saving Image
        if single_image_mode:
            cv2.write('output_image.jpg', frame)

    capture.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#5
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # my) init parameters
    current_count = 0
    total_count = 0
    duration = 0
    last_count = 0
    start_time = 0
    isFirst = True
    single_image_mode = False

    # Initialise the class (ok)
    infer_network = Network()

    # Set Probability threshold for detections (ok)
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ### (ok)
    infer_network.load_model(args.model,
                             device="CPU",
                             cpu_extension=args.cpu_extension)
    n, c, h, w = infer_network.get_input_shape()

    ### TODO: Handle the input stream ### (ok)
    if args.input == 'CAM':
        input_stream = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input
    else:
        input_stream = args.input
        #assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    cap.open(input_stream)

    ### TODO: Loop until stream is over ###(ok)
    while cap.isOpened():
        ### TODO: Read from the video capture ###(ok)
        ret, frame = cap.read()
        key_pressed = cv2.waitKey(60)
        if not ret:
            break

        ### TODO: Pre-process the image as needed ###(ok)
        image = cv2.resize(frame, (w, h))
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        ### TODO: Start asynchronous inference for specified request ###(ok)
        infer_network.exec_net(image)

        ### TODO: Wait for the result ###(ok)
        if infer_network.wait() == 0:
            ### TODO: Get the results of the inference request ###(ok)
            result = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###(ok)
            boxes, score = post_detection(result, frame.shape, prob_threshold)

            for box in boxes:
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 238, 255),
                              1)

            ### TODO: Calculate and send relevant information on ###(ok)
            ### current_count, total_count and duration to the MQTT server ###
            if len(boxes) != current_count:
                if isFirst:
                    ts1 = time.time()
                    isFirst = False
                if time.time() - ts1 > 0.5:
                    current_count = len(boxes)
                    isFirst = True

            ### Topic "person": keys of "count" and "total" ###(ok)
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            ### Topic "person/duration": key of "duration" ###(ok)
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))
            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count
            if key_pressed == ord('q'):
                break

        ### TODO: Send the frame to the FFMPEG server ###(ok)
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###(ok)
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_net = Network()

    ### TODO: Load the model through `infer_network` ###
    exec_net = infer_net.load_model(args.model, args.device,
                                    args.cpu_extension)
    ### TODO: Handle the input stream ###
    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")

    global initial_w, initial_h, prob_threshold, FRAME_THRES, count_conf
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    FRAME_THRES = args.frame_threshold
    count_conf = 0
    # Flag for the input image
    single_image_mode = False

    # output video for testing
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    people_counter = cv2.VideoWriter("people_counter.mp4", 0x00000021, fps,
                                     (int(initial_w), int(initial_h)), True)
    #cv2.VideoWriter_fourcc(*"AVC1"),

    cur_request_id = 0
    last_count = 0
    prev_count, current_count = 0, 0
    total_count = 0
    start_time = 0

    ## assess perf
    det_time = []
    input_capture_time = []
    frame_count = 0
    total_start = time.time()
    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        input_capture_start = time.time()
        flag, frame = cap.read()
        frame_count += 1
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        image = infer_net.preprocess(frame)
        input_capture_time.append(time.time() - input_capture_start)

        ### TODO: Start asynchronous inference for specified request ###
        inf_start = time.time()
        infer_net.execute(cur_request_id, image)
        ### TODO: Wait for the result ###
        if infer_net.wait(cur_request_id) == 0:
            det_time.append(time.time() - inf_start)

            ### TODO: Get the results of the inference request ###

            result = infer_net.get_output(cur_request_id)
            ### TODO: Extract any desired stats from the results ###
            frame, detected_count = ssd_out(frame, result)
            if detected_count == prev_count:
                current_count = prev_count
                count_conf = 0
            else:
                count_conf += 1

            if count_conf == FRAME_THRES:
                current_count = detected_count
                #update prev_count and current_count
                count_conf = 0
                prev_count = current_count
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            # When new person enters the video
            if current_count > last_count:
                start_frame = cap.get(cv2.CAP_PROP_POS_FRAMES)
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))
            cv2.putText(frame, str(total_count)+' '+ str(current_count)+ ' '+ str(cap.get(cv2.CAP_PROP_POS_FRAMES)),\
                        (15, 15),cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            ### Topic "person/duration": key of "duration" ###
            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(
                    (cap.get(cv2.CAP_PROP_POS_FRAMES) - start_frame) /
                    int(cap.get(cv2.CAP_PROP_FPS)))
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count
        people_counter.write(frame)
        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

    total_time = time.time() - total_start
    with open('stats.txt', 'w') as f:
        f.write(str(round(total_time, 1)) + '\n')
        f.write(str(frame_count) + '\n')
        #print('input capture time: avg', sum(input_capture_time)*1000/len(input_capture_time), 'ms| min ', min(input_capture_time)*1000,'|max ', max(input_capture_time)*1000)
    #print('detection time: avg', sum(det_time)*1000/len(det_time), 'ms | min ', min(det_time)*1000,'| max ' , max(det_time)*1000)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_net.clean()
示例#7
0
def main():
    """
    Load the network and parse the SSD output.

    :return: None
    """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    args = build_argparser().parse_args()

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]
    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start asynchronous inference for specified request.
        inf_start = time.time()

        img_preprocessed = preprocess(n, c, h, w, frame)

        infer_network.exec_net(cur_request_id, img_preprocessed)

        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            output = infer_network.get_output(cur_request_id,
                                              'DetectionOutput')
            detections = output[0, 0, :, :]
            current_count = 0
            for detection in detections:
                # If only the cifidence rate is above 0.5, then proceed
                confidence = detection[2]
                if confidence > .5:
                    current_count += 1
                    # detection class
                    idx = detection[1]
                    class_name = coco_classes[idx]
                    log.info(" " + str(idx) + " " + str(confidence) + " " +
                             class_name)
                    if int(idx) == 1:  #only person
                        # Get the box to be displayed
                        axis = detection[3:7] * (initial_w, initial_h,
                                                 initial_w, initial_h)
                        (start_X, start_Y, end_X,
                         end_Y) = axis.astype(np.int)[:4]
                        cv2.rectangle(frame, (start_X, start_Y),
                                      (end_X, end_Y), (0, 55, 255),
                                      thickness=2)
                        cv2.putText(frame, class_name, (start_X, start_Y),
                                    cv2.FONT_ITALIC, (.0005 * initial_w),
                                    (0, 0, 255))

            #boxes, labels, probs
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            #frame, current_count = ssd_out(frame, result)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        # Send frame to the ffmpeg server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

        current_count = 0

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#8
0
def main():
    """
    Load the network and parse the SSD output.

    :return: None
    """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    args = build_argparser().parse_args()

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    # Checks for input image
    if args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        #assert os.path.isfile(args.input), "Specified input file doesn't exist"

    if os.path.isfile(args.input):
        ##works for local file:
        gstreamer_pipeline = (
            'filesrc location = %s ! qtdemux ! h264parse ! avdec_h264 ! videoconvert ! appsink sync=false'
            % (input_stream))
        wk = 33
    else:
        #RTSP stream:
        gstreamer_pipeline = (
            'rtspsrc location=%s ! queue ! rtph264depay ! h264parse config-interval=-1 ! avdec_h264 ! videoconvert ! appsink sync=false'
            % (input_stream))
        wk = 1

    cap = cv2.VideoCapture(gstreamer_pipeline, cv2.CAP_GSTREAMER)

    output_stream = args.output
    #gstreamer_out = ('appsrc ! h264parse config-interval=-1 ! flvmux streamable=true ! rtmpsink location=%s sync=false'  % (output_stream))
    #gstreamer_out = ("appsrc ! videoconvert ! x264enc tune=zerolatency threads=1 speed-preset=superfast ! flvmux streamable=true ! rtmpsink location='%s live=1'"  % (output_stream))
    gstreamer_out = (
        "appsrc ! videoconvert ! x264enc tune=zerolatency bitrate=5000 tune=zerolatency speed-preset=ultrafast ! flvmux streamable=true ! rtmpsink location='%s live=1'"
        % (output_stream))

    #gstreamer_out = ('appsrc ! videoconvert ! video/x-raw,framerate=25/1 ! videoconvert ! x264enc tune="zerolatency" threads=1  ! h264parse ! flvmux streamable=true ! rtmpsink location=%s async=false'  % (output_stream))
    #gstreamer_out = ('appsrc ! queue ! videoconvert ! video/x-raw ! x264enc ! h264parse ! rtmpsink location=%s async=false'  % (output_stream))
    #gstreamer_out = ("appsrc ! 'video/x-raw, width=1920, height=1080, framerate=25/1'  ! videoconvert ! x264enc bframes=0 b-adapt=false speed-preset=1 tune=0x00000004 ! h264parse ! flvmux ! rtmpsink location='%s live=1' async=false"  % (output_stream))

    #gstreamer_out = ("appsrc ! 'video/x-raw, width=1920, height=1080, framerate=25/1' ! videoconvert ! h264parse ! flvmux streamable=true ! rtmpsink location=%s" % (output_stream))

    #gstreamer_out = ("appsrc ! h264parse ! flvmux streamable=true ! rtmpsink location='%s'" % (output_stream))

    #fcc = cv2.VideoWriter_fourcc(*'X264')
    fps = int(args.fps)
    out = cv2.VideoWriter(gstreamer_out, -1, fps,
                          (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
                           int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))), True)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(wk)
        # Start async inference
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)

            resol = str(int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) + 'x' + str(
                int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

            client.publish("video/fps", fps)
            client.publish("video/resolution", resol)

            client.publish("person", current_count)
            client.publish("person/inference", "{:.3f}ms"\
                                            .format(det_time * 1000))

            last_count = current_count

            if key_pressed == 27:
                break

        # Send frame to the ffmpeg server
        #sys.stdout.buffer.write(frame)
        out.write(frame)

        #print('Count:' + str(current_count))
        #print('FPS:' + str(int(cap.get(cv2.CAP_PROP_FPS))))
        #print('Resol:' + str(resol))
        #sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#9
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    #Initial, global variables for counting
    current_request_id = 0
    start_time = 0
    last_count = 0
    total_count = 0
    
    
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ###  Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, current_request_id, args.cpu_extension)
    model_input_shape = infer_network.get_input_shape()

    ### Handle the input stream ###
    single_image_mode = False
    
    while args.input == 'CAM':
        input_stream = 0
        
    if args.input.endswith('.jpg') or args.input.endswith('.png') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input
        
    else:
        input_stream = args.input
        assert os.path.isfile(args.input),"The input file does not exist"
        
    cap = cv2.VideoCapture(input_stream)
    
    if input_stream:
        cap.open(input_stream)
        
    if not cap.IsOpened():
        log.error('Error! The video file/source is not opening' )
    
    #inital width and height taken from the input
    initial_width = int(cap.get(3))
    initial_height = int(cap.get(4))
     ###  Loop until stream is over ###   
    while cap.isOpened():
         ###  Read from the video capture ###
        flag, frame = cap.read()
        
        if not flag:
            break
            
        pressed_key = cv2.waitKey(60)
        ### Pre-process the image as needed ###
        width = model_input_shape[3]
        height = model_input_shape[2]
        processed_input_image = cv2.resize(frame,(width, height))
        processed_input_image = processed_input_image.transpose((2, 0, 1))
        processed_input_image = processed_input_image.reshape(model_input_shape[0], model_input_shape[1], height, width)
        ###  Start asynchronous inference for specified request ###
        start_of_inference = time.time()
        infer_network.exec_net(current_request_id, processed_input_image)
        
        ###  Wait for the result ###
        if infer_network.wait(current_request_id) == 0:
            detection_time = int(time.time() - start_of_inference) * 1000
            ###  Get the results of the inference request ###
            result = infer_network.get_output(current_request_id)
            ### Extract any desired stats from the results ###
            frame, present_count = draw_rectangular_box(frame, result, initial_width, initial_height, prob_threshold)
            ##Find out the inference time and write the result on the video as text.
            inf_time_msg = "Inference time: {:.5f}ms".format(detection_time)
            cv2.putText(frame, inf_time_msg, (20,10), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            #Person's count is calculated here
            if present_count > last_count:
                start_time = time.time()
                total_count += present_count - last_count
                client.publish('person', json.dumps({"total": total_count}))
            #Duration is calculated here
            if present_count < last_count:
                person_duration = int(time.time() - start_time)
                # This is to prevent double counting. Higher value to ensure that the app does not get oversensitive#
                if person_duration > 5:
                    total_count -= 1
                client.publish('person/duration', json.dumps({"duration": person_duration}))
            
                #if present_count >=4:
                #print('Alert! Number of people exceeds the limit! Please take necessary action.')
                
                
            client.publish('person', json.dumps({"count": present_count}))
            last_count = present_count
            # End if escape key is pressed
            if pressed_key == 27:
                break
         ###  Send the frame to the FFMPEG server ###    
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imWrite('output_image.jpg', frame)
        
        cap.release()
        cv2.DestroyAllWindows()
        client.disconnect()
        infer_network.clean()
def main():
    """
    Load the network and parse the output.

    :return: None
    """
    global DELAY
    global CLIENT
    global SIG_CAUGHT
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()
    logger = log.getLogger()
    render_time = 0
    roi_x = args.pointx
    roi_y = args.pointy
    roi_w = args.width
    roi_h = args.height

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        sys.exit(1)

    if input_stream:
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension)

    ret, frame = cap.read()
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_count = 0
    job_id = os.environ['PBS_JOBID']
    result_file = open(os.path.join(args.output_dir,'output_'+str(job_id)+'.txt'), "w")
    progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt')
    infer_time_start = time.time()
    while ret:
        dims = ""
        ret, next_frame = cap.read()
        if not ret:
            break

        initial_wh = [cap.get(3), cap.get(4)]

        if next_frame is None:
            log.error("ERROR! blank FRAME grabbed")
            break

        # If either default values or negative numbers are given,
        # then we will default to start of the FRAME
        if roi_x <= 0 or roi_y <= 0:
            roi_x = 0
            roi_y = 0
        if roi_w <= 0:
            roi_w = next_frame.shape[1]
        if roi_h <= 0:
            roi_h = next_frame.shape[0]
        key_pressed = cv2.waitKey(int(DELAY))

        selected_region = [roi_x, roi_y, roi_w, roi_h]
        selected_region = [roi_x, roi_y, roi_w, roi_h]
        x_max1= str(selected_region[0])
        x_min1=str(selected_region[0] + selected_region[2])
        y_min1=str(selected_region[1] + selected_region[3])
        y_max1=str(selected_region[1])
        
        in_frame_fd = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame_fd)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start
        # Results of the output layer of the network
        res = infer_network.get_output(0)
        # Parse SSD output
        ssd_out(res, args, initial_wh, selected_region)

        est = str(render_time * 1000)
        time1 = round(det_time*1000)
        Worker = INFO.safe
        out_list = [str(frame_count), x_min1, y_min1, x_max1, y_max1,str(Worker), est, str(time1)]
        for i in range(len(out_list)):
            dims += out_list[i]+' '
        dims += '\n'
        result_file.write(dims)

        render_start = time.time()
        render_end = time.time()
        render_time = render_end - render_start
        
        
        frame_count += 1
        if frame_count%10 == 0: 
            progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len)
        frame = next_frame

        if key_pressed == 27:
            print("Attempting to stop background threads")
            break
    if args.output_dir is None:
        cv2.destroyAllWindows()
    else:
        total_time = time.time() - infer_time_start
        with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
            f.write(str(round(total_time, 1))+'\n')
            f.write(str(frame_count)+'\n')


    infer_network.clean()
    cap.release()
    cv2.destroyAllWindows()
示例#11
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    model = args.model
    input_mode = args.input
    device = args.device

    # Single image flag
    single_image_input_mode = False

    start_time = 0
    cur_request_id = 0
    last_count = 0
    total_count = 0
    duration = 0
    color = (255, 0, 0)
    temp_dist = 0
    tk = 0

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(model, device)
    net_input_shape = infer_network.get_input_shape()

    n, c, h, w = infer_network.load_model(model, device)[1]

    log.info("Input Dimensions of the loaded model {}{}{}{}".format(
        n, c, h, w))

    ### TODO: Handle the input stream ###
    # Live Camera feed
    if input_mode == 'CAMERA':
        input_stream = 0

    # Single Image
    elif input_mode.endswith('.jpg') or input_mode.endswith('.bmp'):
        single_image_input_mode = True
        input_stream = input_mode

    else:
        input_stream = input_mode
        assert os.path.isfile(input_mode), "Specified input file doesn't exist"

    try:
        cap = cv2.VideoCapture(input_stream)

    except FileNotFoundError:
        print("Cannot locate input stream file: " + video_file)
    except Exception as e:
        print("Unknown error in input stream: ", e)

    global initial_w, initial_h, prob_threshold

    # Input frame width and height.
    width = cap.get(3)
    height = cap.get(4)
    prob_threshold = args.prob_threshold

    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        print("coming here")
        ### TODO: Pre-process the image as needed ###
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        log.info("Input frame size:- {}".format(frame.shape))
        pro_image = cv2.resize(frame, (w, h))
        log.info("resize frame shape:- {}".format(pro_image.shape))
        pro_image = pro_image.transpose((2, 0, 1))
        log.info("transposing frame:- {}".format(pro_image.shape))
        pro_image = pro_image.reshape((n, c, h, w))
        log.info("final processed image {}".format(pro_image.shape))
        ### TODO: Start asynchronous inference for specified request ###
        inf_start = time.time()
        log.info("starting the inference engine")
        infer_network.exec_net(pro_image)
        ### TODO: Wait for the result ###

        if infer_network.wait() == 0:
            log.info("Coming to infer network result section")
            det_time = time.time() - inf_start
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output()
            ### TODO: Extract any desired stats from the results ###
            out_frame, current_count, dist, tk = draw_masks(
                result, frame, width, height, temp_dist, tk)
            # Printing Inference Time
            inf_time_message = "Inference time: {:.3f}ms".format(det_time *
                                                                 1000)
            cv2.putText(out_frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1)

            # Calculate and send relevant information
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            if current_count < last_count:
                duration = int(time.time() - start_time)
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            # Adding overlays to the frame
            txt2 = "Distance: %d" % dist + " Lost frame: %d" % tk
            cv2.putText(out_frame, txt2, (15, 30), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, color, 1)

            txt2 = "Current count: %d " % current_count
            cv2.putText(out_frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX,
                        0.5, color, 1)

            if current_count > 3:
                txt2 = "Alert! Maximum count reached"
                (text_width,
                 text_height) = cv2.getTextSize(txt2,
                                                cv2.FONT_HERSHEY_COMPLEX,
                                                0.5,
                                                thickness=1)[0]
                text_offset_x = 10
                text_offset_y = frame.shape[0] - 10
                # make the coords of the box with a small padding of two pixels
                box_coords = ((text_offset_x, text_offset_y + 2),
                              (text_offset_x + text_width,
                               text_offset_y - text_height - 2))
                cv2.rectangle(out_frame, box_coords[0], box_coords[1],
                              (0, 0, 0), cv2.FILLED)

                cv2.putText(out_frame, txt2, (text_offset_x, text_offset_y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)

            client.publish("person",
                           json.dumps({"count":
                                       current_count}))  # People Count

            last_count = current_count
            temp_dist = dist
            # Display the resulting frame
            cv2.imshow('Output_Frame', out_frame)
            # Break if escape key is key_pressed
            if key_pressed == 27:
                break

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(out_frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_input_mode:
            cv2.imwrite('output_image.jpg', out_frame)

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#12
0
def infer_on_stream(args, client):
    # Initialise the class
    infer_network = Network()

    single_img = False
    start_time = 0
    cur_request_id = 0
    last_count = 0
    total_count = 0

    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    # Handle the input stream
    if args.input == 'CAM':
        args.input = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_img = True
    else:
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap, width, height = get_stream_source(args.input)

    # initial setup
    total_count = 0
    duration = 0
    color = (255, 0, 0)

    # Loop until stream is over
    while cap.isOpened():
        # Read from the video capture
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Pre-process the image as needed
        # Start async inference
        image = preprocess_image(frame, n, c, h, w)

        # Start asynchronous inference for specified request
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)

        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start

            # Get the results of the inference request
            result = infer_network.get_output(cur_request_id)

            # Draw Bounting Box
            frame, current_count = draw_outputs(result, frame, width, height,
                                                args.prob_threshold)

            # Printing Inference Time
            inf_time_message = "Inference time: {:.3f}ms".format(det_time *
                                                                 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1)

            # Calculate and send relevant information
            if current_count > last_count:  # New entry
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            if current_count < last_count:
                duration = int(time.time() - start_time)
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            txt2 = "Current count: {}".format(current_count)
            cv2.putText(frame, txt2, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                        color, 1)

            if current_count > 5:
                txt2 = "Alert! Maximum count reached"
                (text_width,
                 text_height) = cv2.getTextSize(txt2,
                                                cv2.FONT_HERSHEY_COMPLEX,
                                                0.5,
                                                thickness=1)[0]
                text_offset_x = 10
                text_offset_y = frame.shape[0] - 10
                # make the coords of the box with a small padding of two pixels
                box_coords = ((text_offset_x, text_offset_y + 2),
                              (text_offset_x + text_width,
                               text_offset_y - text_height - 2))
                cv2.rectangle(frame, box_coords[0], box_coords[1], (0, 0, 0),
                              cv2.FILLED)
                cv2.putText(frame, txt2, (text_offset_x, text_offset_y),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1)

            client.publish("person", json.dumps({"count": current_count}))

            last_count = current_count

            if key_pressed == 27:
                break

        # Send the frame to the FFMPEG server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        # Save the Image
        if single_img:
            cv2.imwrite('output_image.jpg', frame)

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#13
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### Load the model through `infer_network` ###
    log.info("Loading the model through Inference Engine...")
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()

    ### Handle the input stream ###
    # Set flag for the input image
    single_image_mode = False

    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")

    width = int(cap.get(3))
    height = int(cap.get(4))

    ## Define and set global variables
    global had_found
    global total_count
    duration = 0

    ### Loop until stream is over ###
    while cap.isOpened():

        ### Read from the video capture ###
        # get return value and frame
        retval, frame = cap.read()

        if not retval:
            break
        key_pressed = cv2.waitKey(60)  #wait for 60 ms

        ### Pre-process the image as needed ###
        pr_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        pr_frame = pr_frame.transpose(
            (2, 0, 1))  #transpose layout from HWC to CHW
        pr_frame = pr_frame.reshape(1, *pr_frame.shape)

        ### Start asynchronous inference for specified request ###
        inf_start = time.time()
        infer_network.exec_net(pr_frame)

        ### Wait for the result ###
        if infer_network.wait() == 0:
            det_time = time.time() - inf_start

            ### Get the results of the inference request ###
            result = infer_network.get_output()

            ### Extract any desired stats from the results ###
            # get and draw the bounding box for person
            frame, p_counts = count_draw(frame, result, args, width, height)

            ### Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

            # get unique class from the frame
            # because our scenario is one person entering frame and exiting at a time.
            unique_classes = get_uclasses(result, width, height)

            # check if the ID (15) of persons enters frame (present)
            # and use function get_total to calculate and get total count and duration
            if 15 in unique_classes:
                total_count, duration = get_total()

            # check if the ID (15) of persons exits the frame (absent)
            # set alreadyFound to False if person already counted and already found
            # and publish the person/duration to MQTT Server with duration and total count
            elif 15 not in unique_classes and had_counted == True and had_found == True:
                client.publish("person/duration",
                               json.dumps({"duration": duration}))
                had_found = False

            # otherwise, set alreadyFound to false
            else:
                if had_found == True:
                    log.info("Person counted already...")
                    had_found = False

            # Draw performance stats on the frame
            total_message = "The Total Count: {}".format(total_count)
            current_message = "The Current Count: {}".format(p_counts)
            duration_message = "Duration in Frame: {} sec".format(duration)
            inf_time_message = "Inference time: {:.3f}ms".format(det_time *
                                                                 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            cv2.putText(frame, current_message, (15, 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            cv2.putText(frame, total_message, (15, 45),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)
            cv2.putText(frame, duration_message, (15, 60),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1)

            #Publish to MQTT Server
            client.publish("person", json.dumps({"count": p_counts}))

            ### Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(frame)
            sys.stdout.flush()

        ### Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
            infer_network.clean()

    # Release the capture and destroy any OpenCV windows
    cap.release()
    cv2.destroyAllWindows()
    ### TODO: Disconnect from MQTT
    client.loop_stop()
    client.disconnect()
示例#14
0
def main():
    """
    Load the network and parse the SSD output.
    :return: None
    """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    args = build_argparser().parse_args()

    total_count = 0
    last_count = 0
    start_time = 0
    request_id = 0

    # Initialize the Inference Engine
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    infer_network.load_model(args.model, args.device, num_requests=0)
    n, c, h, w = infer_network.get_input_shape()

    if args.input == "CAM":
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    try:
        cap = cv2.VideoCapture(args.input)
    except FileNotFoundError:
        print("Cannot locate video file: " + args.input)
    except Exception as e:
        print("Something else went wrong with the video file: ", e)

    if input_stream:
        cap.open(args.input)
    if not cap.isOpened():
        log.error("Can't to open video source")
    prob_threshold = args.prob_threshold
    cap_w = cap.get(3)
    cap_h = cap.get(4)

    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        img = cv2.resize(frame, (w, h))
        img = img.transpose((2, 0, 1))
        img = img.reshape((n, c, h, w))
        inf_start = time.time()
        infer_network.exec_net(img, request_id=0)

        if infer_network.wait(request_id) == 0:
            det_time = time.time() - inf_start
            result = infer_network.get_output(request_id)

            current_count = 0
            for obj in result[0][0]:
                # Draw bounding box for object when it's probability is more than
                #  the specified threshold
                if obj[2] > prob_threshold:
                    xmin = int(obj[3] * cap_w)
                    ymin = int(obj[4] * cap_h)
                    xmax = int(obj[5] * cap_w)
                    ymax = int(obj[6] * cap_h)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (225, 225, 225), 1)
                    current_count = current_count + 1
            inf_time_message = "Inference time: {:.3f}ms" \
                .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            if current_count < last_count:
                duration = int(time.time() - start_time)
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#15
0
def main():
    """
    Load the network and parse the SSD output.

    :return: None
    """

    args = build_argparser().parse_args()

    # Flag for the input image
    single_image_mode = False
    total_count = 0
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id, args.cpu_extension)
    # Checks for live feed
    #if args.input == 'CAM':
    #input_stream = 0

    # Checks for input image
    if args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 0
    job_id = os.environ['PBS_JOBID']
    job_id = job_id.rstrip().split('.')[0]
    progress_file_path = os.path.join(args.output_dir, str(job_id),
                                      'i_progress.txt')
    infer_time_start = time.time()
    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    people_counter = cv2.VideoWriter(
        os.path.join(args.output_dir, str(job_id), "people_counter.mp4"),
        cv2.VideoWriter_fourcc(*"AVC1"), fps, (int(initial_w), int(initial_h)),
        True)
    while cap.isOpened():
        flag, frame = cap.read()
        frame_count += 1
        if not flag:
            break
        # Start async inference
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            current_count_message = "Current count: {}"\
                                     .format(current_count)
            cv2.putText(frame, current_count_message, (15, 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            last_count = current_count

            people_counter.write(frame)
        if frame_count % 10 == 0 or frame_count % video_len == 0:
            print("frame_count: {}, video_len: {}".format(
                frame_count, video_len))
            progressUpdate(progress_file_path,
                           int(time.time() - infer_time_start), frame_count,
                           video_len)

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
    if args.output_dir:
        total_time = round(time.time() - infer_time_start, 2)
        stats = {}
        stats['time'] = str(total_time)
        stats['frames'] = str(frame_count)
        stats['fps'] = str(round(frame_count / total_time, 2))
        with open(os.path.join(args.output_dir, str(job_id), 'stats.json'),
                  'w') as f:
            json.dump(stats, f)
    cap.release()
    infer_network.clean()
示例#16
0
def main():
    """
    Load the network and parse the output.

    :return: None
    """
    global DELAY
    global CLIENT
    global SIG_CAUGHT
    global KEEP_RUNNING
    CLIENT = mqtt.Client()
    CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    CLIENT.subscribe(TOPIC)
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO, stream=sys.stdout)
    args = build_argparser().parse_args()
    logger = log.getLogger()
    render_time = 0
    roi_x = args.pointx
    roi_y = args.pointy
    roi_w = args.width
    roi_h = args.height

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        sys.exit(1)

    if input_stream:
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0, args.cpu_extension)

    message_thread = Thread(target=message_runner, args=())
    message_thread.setDaemon(True)
    message_thread.start()

    ret, frame = cap.read()
    while ret:

        ret, next_frame = cap.read()
        if not ret:
            KEEP_RUNNING = False
            break

        initial_wh = [cap.get(3), cap.get(4)]

        if next_frame is None:
            KEEP_RUNNING = False
            log.error("ERROR! blank FRAME grabbed")
            break

        # If either default values or negative numbers are given,
        # then we will default to start of the FRAME
        if roi_x <= 0 or roi_y <= 0:
            roi_x = 0
            roi_y = 0
        if roi_w <= 0:
            roi_w = next_frame.shape[1]
        if roi_h <= 0:
            roi_h = next_frame.shape[0]
        key_pressed = cv2.waitKey(int(DELAY))

        # 'c' key pressed
        if key_pressed == 99:
            # Give operator chance to change the area
            # Select rectangle from left upper corner, dont display crosshair
            ROI = cv2.selectROI("Assembly Selection", frame, True, False)
            print("Assembly Area Selection: -x = {}, -y = {}, -w = {},"
                  " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3]))
            roi_x = ROI[0]
            roi_y = ROI[1]
            roi_w = ROI[2]
            roi_h = ROI[3]
            cv2.destroyAllWindows()

        cv2.rectangle(frame, (roi_x, roi_y),
                      (roi_x + roi_w, roi_y + roi_h), (0, 0, 255), 2)
        selected_region = [roi_x, roi_y, roi_w, roi_h]

        in_frame_fd = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame_fd)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start
        # Results of the output layer of the network
        res = infer_network.get_output(0)
        # Parse SSD output
        ssd_out(res, args, initial_wh, selected_region)

        # Draw performance stats
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        render_time_message = "OpenCV rendering time: {:.3f} ms". \
            format(render_time * 1000)

        if not INFO.safe:
            warning = "HUMAN IN ASSEMBLY AREA: PAUSE THE MACHINE!"
            cv2.putText(frame, warning, (15, 80), cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 0, 255), 2)

        cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, render_time_message, (15, 35), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Worker Safe: {}".format(INFO.safe), (15, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)

        render_start = time.time()
        cv2.imshow("Restricted Zone Notifier", frame)
        render_end = time.time()
        render_time = render_end - render_start

        frame = next_frame

        if key_pressed == 27:
            print("Attempting to stop background threads")
            KEEP_RUNNING = False
            break
    infer_network.clean()
    message_thread.join()
    cap.release()
    cv2.destroyAllWindows()
    CLIENT.disconnect()
示例#17
0
def main():
    args = build_argparser().parse_args()

    account_name = args.account_name
    account_key = args.account_key

    if account_name is "" or account_key is "":
        print("Invalid account name or account key!")
        sys.exit(1)
    elif account_name is not None and account_key is None:
        print("Please provide account key using -ak option!")
        sys.exit(1)
    elif account_name is None and account_key is not None:
        print("Please provide account name using -an option!")
        sys.exit(1)
    elif account_name is None and account_key is None:
        upload_azure = 0
    else:
        print("Uploading the results to Azure storage \"" + account_name +
              "\"")
        upload_azure = 1
        create_cloud_container(account_name, account_key)

    #if args.input == 'cam':
    #input_stream = 0
    #else:
    input_stream = args.input
    assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    if cap is None or not cap.isOpened():
        print('Warning: unable to open video source: ', args.input)
        sys.exit(1)

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 0,
                                          args.cpu_extension)

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    store_aisle = cv2.VideoWriter(
        os.path.join(args.output_dir, "store_aisle.mp4"),
        cv2.VideoWriter_fourcc(*'avc1'), fps, (initial_w, initial_h), True)
    job_id = os.environ['PBS_JOBID']
    progress_file_path = os.path.join(args.output_dir,
                                      'i_progress_' + str(job_id) + '.txt')
    infer_time_start = time.time()
    frame_count = 1
    ret, frame = cap.read()
    while cap.isOpened():
        ret, next_frame = cap.read()
        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start

        people_count = 0

        # Results of the output layer of the network
        res = infer_network.get_output(0)
        for obj in res[0][0]:
            # Draw only objects when probability more than specified threshold
            if obj[2] > args.prob_threshold:
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                # Draw bounding box
                color = (min(class_id * 12.5,
                             255), min(class_id * 7,
                                       255), min(class_id * 5, 255))
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                people_count = people_count + 1

        people_count_message = "People Count : " + str(people_count)
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 25),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(frame, people_count_message, (15, 65),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        store_aisle.write(frame)
        time_interval = MULTIPLICATION_FACTOR * fps
        if frame_count % time_interval == 0:
            apply_time_stamp_and_save(frame, people_count, upload_azure)
        if frame_count % 10 == 0:
            progressUpdate(progress_file_path,
                           int(time.time() - infer_time_start), frame_count,
                           video_len)
        frame = next_frame

    if args.output_dir:
        total_time = time.time() - infer_time_start
        with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
            f.write(str(round(total_time, 1)) + '\n')
            f.write(str(frame_count) + '\n')
    cap.release()
    infer_network.clean()
示例#18
0
def main():
    model_xml = (os.environ["MODEL"])
    input_source = (os.environ["INPUT"])
    device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU'
    cpu_extension = os.environ[
        'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None
    try:
        # Probability threshold for detections filtering
        prob_threshold = float(os.environ['PROB_THRESHOLD'])
    except KeyError:
        prob_threshold = 0.5
    try:
        # Specify the azure storage name to upload results to cloud.
        account_name = os.environ['ACCOUNT_NAME']
    except:
        account_name = None
    try:
        # Specify the azure storage key to upload results to cloud.
        account_key = os.environ['ACCOUNT_KEY']
    except:
        account_key = None

    if account_name is "" or account_key is "":
        print("Invalid account name or account key!")
        sys.exit(1)
    elif account_name is not None and account_key is None:
        print("Please provide account key using -ak option!")
        sys.exit(1)
    elif account_name is None and account_key is not None:
        print("Please provide account name using -an option!")
        sys.exit(1)
    elif account_name is None and account_key is None:
        upload_azure = 0
    else:
        print("Uploading the results to Azure storage \"" + account_name +
              "\"")
        upload_azure = 1
        create_cloud_container(account_name, account_key)

    if input_source == 'cam':
        input_stream = 0
    else:
        input_stream = input_source
        assert os.path.isfile(
            input_source), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    if cap is None or not cap.isOpened():
        print('Warning: unable to open video source: ', input_source)
        sys.exit(1)

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, device, 1, 1, 0,
                                          cpu_extension)

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = 1
    accumulated_image = np.zeros((initial_h, initial_w), np.uint8)
    mog = cv2.createBackgroundSubtractorMOG2()
    ret, frame = cap.read()
    while cap.isOpened():
        ret, next_frame = cap.read()
        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(0, in_frame)
        # Wait for the result
        infer_network.wait(0)
        det_time = time.time() - inf_start
        people_count = 0

        # Converting to Grayscale
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Remove the background
        fgbgmask = mog.apply(gray)

        # Thresholding the image
        thresh = 2
        max_value = 2
        threshold_image = cv2.threshold(fgbgmask, thresh, max_value,
                                        cv2.THRESH_BINARY)[1]
        # Adding to the accumulated image
        accumulated_image = cv2.add(threshold_image, accumulated_image)
        colormap_image = cv2.applyColorMap(accumulated_image, cv2.COLORMAP_HOT)

        # Results of the output layer of the network
        res = infer_network.get_output(0)
        for obj in res[0][0]:
            # Draw only objects when probability more than specified threshold
            if obj[2] > prob_threshold:
                xmin = int(obj[3] * initial_w)
                ymin = int(obj[4] * initial_h)
                xmax = int(obj[5] * initial_w)
                ymax = int(obj[6] * initial_h)
                class_id = int(obj[1])
                # Draw bounding box
                color = (min(class_id * 12.5,
                             255), min(class_id * 7,
                                       255), min(class_id * 5, 255))
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                people_count = people_count + 1

        people_count_message = "People Count : " + str(people_count)
        inf_time_message = "Inference time: {:.3f} ms".format(det_time * 1000)
        cv2.putText(frame, inf_time_message, (15, 25),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        cv2.putText(frame, people_count_message, (15, 65),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
        final_result_overlay = cv2.addWeighted(frame, P_COUNT_FRAME_WEIGHTAGE,
                                               colormap_image,
                                               COLORMAP_FRAME_WEIGHTAGE_1, 0)
        cv2.imshow("Detection Results", final_result_overlay)

        time_interval = MULTIPLICATION_FACTOR * fps
        if frame_count % time_interval == 0:
            apply_time_stamp_and_save(final_result_overlay, people_count,
                                      upload_azure)

        frame = next_frame

        key = cv2.waitKey(1)
        if key == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
    infer_network.clean()
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    get_args()
    prevReq = 0
    currReq = 1

    prevVideo = None
    vid_finished = [False] * len(videos)
    min_FPS = min([videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])
    wait_time = int(round(1000 / min_FPS / len(videos)))

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    batch_size, channels, model_height, model_width = \
        infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)

    while True:
        for index, currVideo in videos:
            # Read image from video/cam
            vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_FPS))):
                ret, current_img = currVideo.video.read()
                if not ret:
                    vid_finished[index] = True
                    break
            if vid_finished[index]:
                stream_end_frame = np.zeros((int(currVideo.height), int(currVideo.width), 1),
                                               dtype='uint8')
                cv2.putText(stream_end_frame, "Input file {} has ended".format
                (name_of_videos[index][1].split('/')[-1]) ,
                            (10, int(currVideo.height/2)),
                            cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
                cv2.imshow(currVideo.name, stream_end_frame)
                continue
            # Transform image to model input
            rsImg = cv2.resize(current_img, (model_width, model_height))
            rsImg = rsImg.transpose((2, 0, 1))
            rsImg = rsImg.reshape(
                (batch_size, channels, model_height, model_width))

            infer_start_time = datetime.datetime.now()
            # Infer current image
            infer_network.exec_net(currReq, rsImg)

            # Wait for previous request to end
            if infer_network.wait(prevReq) == 0:
                infer_end_time = (datetime.datetime.now() - infer_start_time) * 1000

                in_frame_workers = []

                people = 0
                result = infer_network.get_output(prevReq)
                # Filter output
                for obj in result[0][0]:
                    if obj[2] > conf_inferConfidenceThreshold:
                        xmin = int(obj[3] * prevVideo.width)
                        ymin = int(obj[4] * prevVideo.height)
                        xmax = int(obj[5] * prevVideo.width)
                        ymax = int(obj[6] * prevVideo.height)

                        ymin = ymin - int(padding * (ymax - ymin))
                        in_frame_workers.append((xmin, ymin, xmax, ymax))
                        people += 1

                violations = detect_workers(in_frame_workers, previous_img)
                # Check if detected violations equals previous frames
                if violations == prevVideo.currentViolationCount:
                    prevVideo.currentViolationCountConfidence += 1
                    # If frame threshold is reached, change validated count
                    if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:
                        # If another violation occurred, save image
                        if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
                            prevVideo.totalViolations += (
                                    prevVideo.currentViolationCount - prevVideo.prevViolationCount)
                        prevVideo.prevViolationCount = prevVideo.currentViolationCount
                else:
                    prevVideo.currentViolationCountConfidence = 0
                    prevVideo.currentViolationCount = violations

                # Check if detected people count equals previous frames
                if people == prevVideo.currentPeopleCount:
                    prevVideo.currentPeopleCountConfidence += 1
                    # If frame threshold is reached, change validated count
                    if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
                        prevVideo.currentTotalPeopleCount += (
                                prevVideo.currentPeopleCount - prevVideo.prevPeopleCount)
                        if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
                            prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
                        prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
                else:
                    prevVideo.currentPeopleCountConfidence = 0
                    prevVideo.currentPeopleCount = people

                frame_end_time = datetime.datetime.now()
                cv2.putText(previous_img, 'Total people count: ' + str(
                    prevVideo.totalPeopleCount), (10, prevVideo.height - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.putText(previous_img, 'Current people count: ' + str(
                    prevVideo.currentTotalPeopleCount),
                            (10, prevVideo.height - 40),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.putText(previous_img, 'Total violation count: ' + str(
                    prevVideo.totalViolations), (10, prevVideo.height - 70),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.putText(previous_img, 'FPS: %0.2fs' % (1 / (
                        frame_end_time - prevVideo.frame_start_time).total_seconds()),
                            (10, prevVideo.height - 100),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.putText(previous_img, 'Inference time: {}ms'.format((infer_end_time).total_seconds()),
                            (10, prevVideo.height - 130),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.imshow(prevVideo.name, previous_img)
                prevVideo.frame_start_time = datetime.datetime.now()
            # Swap
            currReq, prevReq = prevReq, currReq
            previous_img = current_img
            prevVideo = currVideo
        # Exit if ESC key is pressed
        if cv2.waitKey(wait_time) == 27:
            print("Attempting to stop input files")
            break
        if False not in vid_finished:
            break
    infer_network.clean()
    cv2.destroyAllWindows()
示例#20
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.
    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

     # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    ### TODO: Load the model through `infer_network` ###
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id, args.cpu_extension)[1]
    ### TODO: Handle the input stream ###
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp') :
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    lagtime = 0
    
    path_out = './output.avi'
    #fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(path_out, fourcc, 24.0, (768,432))

    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        image = pre_process_image(frame, n, c, h, w)
        ### TODO: Start asynchronous inference for specified request ###
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)
        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(cur_request_id)

            ### TODO: Extract any desired stats from the results ###
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            
            ### TODO: Calculate and send relevant information on ###
            client.publish(inf_time_message)

             # write new frame
            #print("frame size : ", frame.shape[1] ", " frame.shape[0])
            #out.write(frame)

            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if current_count < last_count:
                duration = int(time.time() - start_time)
                if duration > 0:
                    # Publish messages to the MQTT server
                    client.publish("person/duration",
                                    json.dumps({"duration": duration + lagtime}))
                    
                else:
                    lagtime += 1
                    log.warning(lagtime)
        
            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count
            
            if key_pressed == 27:
                break

        # Send frame to the ffmpeg server
        sys.stdout.buffer.write(frame)  
        sys.stdout.flush()
        ### TODO: Send the frame to the FFMPEG server ###

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

    #out.release()
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    plugin = Network()
    client = connect_mqtt()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    
    single_image_mode = False
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    current_count = 0
    

    #Load the model through `infer_network`
    plugin.load_model(args.model,args.cpu_extension,args.device,cur_request_id)
    
    
    net_input_shape = plugin.get_input_shape()
   
    #Handle the input stream
    if args.input == 'CAM':
        input_stream = 0
    elif args.input.endswith('.bmp') or args.input.endswith('.jpg'):
        single_image_mode = True
        input_stream = args.input
    else:
        input_stream = args.input
    
    cap = cv2.VideoCapture(input_stream)
    cap.open(input_stream)
    
    if not cap.isOpened():
        log.warning("Unable to open video source")
        
    # Grab the shape of the input 
    w = int(cap.get(3))
    h = int(cap.get(4))
    
    in_shape = net_input_shape['image_tensor']

    #Loop until stream is over
    while(cap.isOpened()):
        
        #Read from the video capture
        ret, frame = cap.read()
       
        if not ret:
            break
        key_pressed = cv2.waitKey(60)

        #Pre-process the image as needed
        p_frame = cv2.resize(frame, (in_shape[3], in_shape[2]))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(1, *p_frame.shape)
        
        #Start asynchronous inference for specified request
        net_input = {'image_tensor': p_frame,'image_info': p_frame.shape[1:]}
        plugin.exec_net(net_input,cur_request_id)
        
        #TODO: Wait for the result
        inf_start = time.time()
        if plugin.wait(cur_request_id) == 0:
            
            det_time = time.time() - inf_start
           
            #Get the results of the inference request
            start_time = time.time()
            result = plugin.get_output()
            end_time = time.time()
            log.warning("Elapsed Time:", end_time-start_time)

            #Extract any desired stats from the results
            current_count = 0
            for obj in result[0][0]:
            # Draw bounding box for object when it's probability is more than
            #  the specified threshold
                if obj[2] > prob_threshold:
                    xmin = int(obj[3] * w)
                    ymin = int(obj[4] * h)
                    xmax = int(obj[5] * w)
                    ymax = int(obj[6] * h)
                
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
                    current_count = current_count + 1
            
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            #Calculate and send relevant information on
            #current_count, total_count and duration to the MQTT server
            #Topic "person": keys of "count" and "total"
            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count
            
            if key_pressed == 27:
                break

        #Send the frame to the FFMPEG server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        #Write an output image if `single_image_mode`
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
        
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    plugin.clean()
示例#22
0
def main():
    global CONFIG_FILE
    global is_async_mode
    global CONFIDENCE
    global POSE_CHECKED
    global INFO
    global COUNTER
    global ALARM_ON
    global yawns
    global yawn_status
    global EYE_AR_CONSEC_FRAMES
    global ear
    global leftEye
    global rightEye

    args = build_argparser().parse_args()

    try:
        CONFIDENCE = float(os.environ['CONFIDENCE'])
    except:
        CONFIDENCE = 0.5

    assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(
        CONFIG_FILE)
    config = json.loads(open(CONFIG_FILE).read())
    for idx, item in enumerate(config['inputs']):
        if item['video'].isdigit():
            input_stream = int(item['video'])
            cap = cv2.VideoCapture(input_stream)
            if not cap.isOpened():
                print("\nCamera not plugged in... Exiting...\n")
                sys.exit(0)
        else:
            input_stream = item['video']
            cap = cv2.VideoCapture(input_stream)
            if not cap.isOpened():
                print("\nUnable to open video file... Exiting...\n")
                sys.exit(0)
    fps = cap.get(cv2.CAP_PROP_FPS)
    if args.flag == "async":
        is_async_mode = True
        print('Application running in async mode')
    else:
        is_async_mode = False
        print('Application running in sync mode')

    # Initialise the class
    infer_network = Network()

    infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer
    plugin, (n_fd, c_fd, h_fd,
             w_fd) = infer_network.load_model(args.modelface, args.device, 1,
                                              1, 2, args.cpu_extension)

    n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model(
        args.modelpose, args.device, 1, 3, 2, args.cpu_extension, plugin)[1]

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
    out = cv2.VideoWriter(CWD + '/output_snapshots/outpy.mp4', 0x00000021, 10,
                          (initial_w, initial_h))

    frame_count = 1

    #ret, frame = cap.read()
    cur_request_id = 0
    next_request_id = 1

    while cap.isOpened():
        looking = 0
        ret, frame = cap.read()
        start_time = time.time()

        if not ret:
            break
        frame_count = frame_count + 1
        initial_wh = [cap.get(3), cap.get(4)]
        in_frame = cv2.resize(frame, (w_fd, h_fd))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n_fd, c_fd, h_fd, w_fd))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        if is_async_mode:
            infer_network.exec_net(next_request_id, in_frame)
        else:
            infer_network.exec_net(cur_request_id, in_frame)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            people_count = 0

            # Converting to Grayscale
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            #Start region drowsiness detect
            # detect faces in the grayscale frame
            rects = detector(gray, 0)

            # loop over the face detections
            for rect in rects:
                shape = predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]
                leftEAR = eye_aspect_ratio(leftEye)
                rightEAR = eye_aspect_ratio(rightEye)

                ear = (leftEAR + rightEAR) / 2.0

                leftEyeHull = cv2.convexHull(leftEye)
                #print(leftEyeHull, leftEyeHull.dtype)
                rightEyeHull = cv2.convexHull(rightEye)
                cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)

                #to calculate yawn
                mouth = shape[mStart:mEnd]
                for (x, y) in mouth:
                    cv2.circle(frame, (x, y), 1, (0, 0, 255), -1)

                frame, lip_distance = mouth_open(frame)
                prev_yawn_status = yawn_status

                if ear < EYE_AR_THRESH:
                    COUNTER += 1
                    # if the eyes were closed for a sufficient number of times then sound the alarm
                    if COUNTER >= EYE_AR_CONSEC_FRAMES:
                        # if the alarm is not on, turn it on
                        if not ALARM_ON:
                            ALARM_ON = True
                            # check to see if an alarm file was supplied,
                            # and if so, start a thread to have the alarm
                            # sound played in the background
                            if alarm != "":
                                t = Thread(target=sound_alarm, args=(alarm, ))
                                t.deamon = True
                                t.start()

                else:
                    COUNTER = 0
                    ALARM_ON = False

                if lip_distance > MOUTH_OPEN_THRESH:
                    yawn_status = True

                else:
                    yawn_status = False

                if prev_yawn_status == True and yawn_status == False:
                    yawns += 1

            #end region drowsiness

            # Results of the output layer of the network
            res = infer_network.get_output(cur_request_id)

            # Parse face detection output
            faces = face_detection(res, initial_wh)

            if len(faces) != 0:
                # Look for poses
                for res_hp in faces:
                    xmin, ymin, xmax, ymax = res_hp
                    head_pose = frame[ymin:ymax, xmin:xmax]
                    in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp))
                    in_frame_hp = in_frame_hp.transpose((2, 0, 1))
                    in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp))

                    inf_start_hp = time.time()
                    infer_network_pose.exec_net(0, in_frame_hp)
                    infer_network_pose.wait(0)
                    det_time_hp = time.time() - inf_start_hp

                    # Parse head pose detection results
                    angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc")
                    angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc")
                    angle_r_fc = infer_network_pose.get_output(0, "angle_r_fc")
                    if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) &
                        (angle_p_fc > -22.5) & (angle_p_fc < 22.5) &
                        (angle_r_fc > -22.5) & (angle_r_fc < 22.5)):
                        looking += 1
                        POSE_CHECKED = True
                        INFO = INFO._replace(looker=looking)
                        #print("Subject is looking")
                        INFO = INFO._replace(
                            msg=
                            "Looking Staright, you are doing great ! Keep it up!"
                        )
                    else:
                        INFO = INFO._replace(looker=looking)
                        #print("Subject is not looking")
                        INFO = INFO._replace(msg="WATCH THE ROAD!")

            else:
                INFO = INFO._replace(looker=0)

            time_interval = MULTIPLICATION_FACTOR * fps
            if frame_count % time_interval == 0:

                (frame, people_count)

        #frame = next_frame
        if is_async_mode:
            cur_request_id, next_request_id = next_request_id, cur_request_id
        #print("FPS : {}".format(1/(time.time() - start_time)))

        # Draw performance stats
        inf_time_message = "Face Inference time: N\A for async mode" if is_async_mode else \
            "Inference time: {:.3f} ms".format(det_time * 1000)


        head_inf_time_message = "Head pose Inference time: N\A for async mode" if is_async_mode else \
                "Inference time: {:.3f} ms".format(det_time_hp * 1000)
        cv2.putText(frame, head_inf_time_message, (0, 55),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        log_message = "Async mode is on." if is_async_mode else \
            "Async mode is off."
        cv2.putText(frame, log_message, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (255, 255, 255), 1)
        cv2.putText(frame, inf_time_message, (0, 35), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Driver: {}".format(INFO.driver), (0, 90),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

        cv2.putText(frame, INFO.msg, (75, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    (0, 0, 255), 2)

        output_text = " Yawn frame Count: " + str(yawns)

        cv2.putText(frame, output_text, (0, 110), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (230, 0, 0), 2)

        if yawn_status == True:
            cv2.putText(frame, "Driver is Yawning!! BE AWAKE!!", (0, 150),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        if COUNTER >= EYE_AR_CONSEC_FRAMES:
            cv2.putText(frame, "Drowsiness Alert!!", (400, 35),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            cv2.putText(frame, " Eye Aspect Ratio(EAR): {:.2f}".format(ear),
                        (400, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255),
                        2)
        else:
            cv2.putText(frame, "Driver is Awake!! ", (400, 35),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            cv2.putText(frame, "Eye Aspect Ratio(EAR): {:.2f}".format(ear),
                        (400, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                        2)
        if ALARM_ON == True:
            cv2.putText(frame, "BE AWAKE!! Alarm ON", (0, 170),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        else:
            cv2.putText(frame, "Alarm OFF", (0, 170), cv2.FONT_HERSHEY_SIMPLEX,
                        0.5, (0, 255, 0), 2)

        cv2.imshow("Detection Results", frame)
        # Write the frame into the file 'output.avi'
        out.write(frame)

        # Frames are read at an interval of 1 millisecond
        key = cv2.waitKey(1)
        if key == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
    infer_network.clean()
    infer_network_pose.clean()
示例#23
0
def main():
    global CONFIG_FILE
    global is_async_mode
    args = build_argparser().parse_args()

    assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(
        CONFIG_FILE)
    config = json.loads(open(CONFIG_FILE).read())
    for idx, item in enumerate(config['inputs']):
        if item['video'].isdigit():
            input_stream = int(item['video'])
            cap = cv2.VideoCapture(input_stream)
            if not cap.isOpened():
                print("\nCamera not plugged in... Exiting...\n")
                sys.exit(0)
        else:
            input_stream = item['video']
            cap = cv2.VideoCapture(input_stream)
            if not cap.isOpened():
                print("\nUnable to open video file... Exiting...\n")
                sys.exit(0)
    fps = cap.get(cv2.CAP_PROP_FPS)
    if args.flag == "async":
        is_async_mode = True
        print('Application running in async mode')
    else:
        is_async_mode = False
        print('Application running in sync mode')

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1, 2,
                                          args.cpu_extension)[1]

    print("To stop the execution press Esc button")
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    frame_count = 1
    accumulated_image = np.zeros((initial_h, initial_w), np.uint8)
    mog = cv2.createBackgroundSubtractorMOG2()
    ret, frame = cap.read()
    cur_request_id = 0
    next_request_id = 1

    while cap.isOpened():
        ret, next_frame = cap.read()
        start_time = time.time()

        if not ret:
            break
        frame_count = frame_count + 1
        in_frame = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame = in_frame.transpose((2, 0, 1))
        in_frame = in_frame.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        if isasyncmode:
            infer_network.exec_net(next_request_id, in_frame)
        else:
            infer_network.exec_net(cur_request_id, in_frame)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            people_count = 0

            # Converting to Grayscale
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Remove the background
            fgbgmask = mog.apply(gray)
            # Thresholding the image
            thresh = 2
            max_value = 2
            threshold_image = cv2.threshold(fgbgmask, thresh, max_value,
                                            cv2.THRESH_BINARY)[1]
            # Adding to the accumulated image
            accumulated_image = cv2.add(threshold_image, accumulated_image)
            colormap_image = cv2.applyColorMap(accumulated_image,
                                               cv2.COLORMAP_HOT)

            # Results of the output layer of the network
            res = infer_network.get_output(cur_request_id)
            for obj in res[0][0]:
                # Draw only objects when probability more than specified threshold
                if obj[2] > args.prob_threshold:
                    xmin = int(obj[3] * initial_w)
                    ymin = int(obj[4] * initial_h)
                    xmax = int(obj[5] * initial_w)
                    ymax = int(obj[6] * initial_h)
                    class_id = int(obj[1])
                    # Draw bounding box
                    color = (min(class_id * 12.5,
                                 255), min(class_id * 7,
                                           255), min(class_id * 5, 255))
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
                    people_count = people_count + 1

            people_count_message = "People Count : " + str(people_count)
            cv2.putText(frame, people_count_message, (15, 65),
                        cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

            cv2.imshow("Detection Results", frame)

            time_interval = MULTIPLICATION_FACTOR * fps
            if frame_count % time_interval == 0:
                apply_time_stamp_and_save(frame, people_count)

        frame = next_frame
        if isasyncmode:
            cur_request_id, next_request_id = next_request_id, cur_request_id
        print("FPS : {}".format(1 / (time.time() - start_time)))

        # Frames are read at an interval of 1 millisecond
        key = cv2.waitKey(1)
        if key == 27:
            break
    cap.release()
    cv2.destroyAllWindows()
    infer_network.clean()
def main():
    """
    Load the network and parse the SSD output.

    :return: None
    """

    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    client.subscribe(TOPIC)

    args = build_argparser().parse_args()

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(
        CONFIG_FILE)
    config = json.loads(open(CONFIG_FILE).read())

    for idx, item in enumerate(config['inputs']):
        if item['video'].isdigit():
            input_stream = int(item['video'])
        elif [
                item['video'].endswith('.jpg')
                or item['video'].endswith('.bmp')
        ]:
            single_image_mode = True
            input_stream = item['video']

        else:
            input_stream = item['video']

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(input_stream)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(1)
        # Start async inference
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        # Start asynchronous inference for specified request.
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        # Send frame to the ffmpeg server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
示例#25
0
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    get_args()
    global is_async_mode
    nextReq = 1
    currReq = 0
    nextReq_s = 1
    currReq_s = 0
    prevVideo = None
    vid_finished = [False] * len(videos)
    min_FPS = min(
        [videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])

    # Initialise the class
    infer_network = Network()
    infer_network_safety = Network()
    # Load the network to IE plugin to get shape of input layer
    plugin, (batch_size, channels, model_height, model_width) = \
        infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
    if use_safety_model:
        batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
            infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]

    while True:
        for index, currVideo in videos:
            # Read image from video/cam
            vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_FPS))):
                ret, current_img = currVideo.video.read()
                if not ret:
                    vid_finished[index] = True
                    break
            if vid_finished[index]:
                stream_end_frame = np.zeros(
                    (int(currVideo.height), int(currVideo.width), 1),
                    dtype='uint8')
                cv2.putText(
                    stream_end_frame, "Input file {} has ended".format(
                        name_of_videos[index][1].split('/')[-1]),
                    (10, int(currVideo.height / 2)), cv2.FONT_HERSHEY_COMPLEX,
                    1, (255, 255, 255), 2)
                cv2.imshow(currVideo.name, stream_end_frame)
                continue
            # Transform image to person detection model input
            rsImg = cv2.resize(current_img, (model_width, model_height))
            rsImg = rsImg.transpose((2, 0, 1))
            rsImg = rsImg.reshape(
                (batch_size, channels, model_height, model_width))

            infer_start_time = datetime.datetime.now()
            # Infer current image
            if is_async_mode:
                infer_network.exec_net(nextReq, rsImg)
            else:
                infer_network.exec_net(currReq, rsImg)
                prevVideo = currVideo
                previous_img = current_img

            # Wait for previous request to end
            if infer_network.wait(currReq) == 0:
                infer_end_time = (datetime.datetime.now() -
                                  infer_start_time) * 1000

                in_frame_workers = []

                people = 0
                violations = 0
                hard_hat_detection = False
                vest_detection = False
                result = infer_network.get_output(currReq)
                # Filter output
                for obj in result[0][0]:
                    if obj[2] > conf_inferConfidenceThreshold:
                        xmin = int(obj[3] * prevVideo.width)
                        ymin = int(obj[4] * prevVideo.height)
                        xmax = int(obj[5] * prevVideo.width)
                        ymax = int(obj[6] * prevVideo.height)
                        xmin = int(xmin -
                                   padding) if (xmin - padding) > 0 else 0
                        ymin = int(ymin -
                                   padding) if (ymin - padding) > 0 else 0
                        xmax = int(xmax + padding) if (
                            xmax +
                            padding) < prevVideo.width else prevVideo.width
                        ymax = int(ymax + padding) if (
                            ymax +
                            padding) < prevVideo.height else prevVideo.height
                        cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax),
                                      (0, 255, 0), 2)
                        people += 1
                        in_frame_workers.append((xmin, ymin, xmax, ymax))
                        new_frame = previous_img[ymin:ymax, xmin:xmax]
                        if use_safety_model:

                            # Transform image to safety model input
                            in_frame_sm = cv2.resize(
                                new_frame, (model_width_sm, model_height_sm))
                            in_frame_sm = in_frame_sm.transpose((2, 0, 1))
                            in_frame_sm = in_frame_sm.reshape(
                                (batch_size_sm, channels_sm, model_height_sm,
                                 model_width_sm))

                            infer_start_time_sm = datetime.datetime.now()
                            if is_async_mode:
                                infer_network_safety.exec_net(
                                    nextReq_s, in_frame_sm)
                            else:
                                infer_network_safety.exec_net(
                                    currReq_s, in_frame_sm)
                            # Wait for the result
                            infer_network_safety.wait(currReq_s)
                            infer_end_time_sm = (datetime.datetime.now() -
                                                 infer_start_time_sm) * 1000

                            result_sm = infer_network_safety.get_output(
                                currReq_s)
                            # Filter output
                            hard_hat_detection = False
                            vest_detection = False
                            detection_list = []
                            for obj_sm in result_sm[0][0]:

                                if (obj_sm[2] > 0.4):
                                    # Detect safety vest
                                    if (int(obj_sm[1])) == 2:
                                        xmin_sm = int(obj_sm[3] *
                                                      (xmax - xmin))
                                        ymin_sm = int(obj_sm[4] *
                                                      (ymax - ymin))
                                        xmax_sm = int(obj_sm[5] *
                                                      (xmax - xmin))
                                        ymax_sm = int(obj_sm[6] *
                                                      (ymax - ymin))
                                        if vest_detection == False:
                                            detection_list.append([
                                                xmin_sm + xmin, ymin_sm + ymin,
                                                xmax_sm + xmin, ymax_sm + ymin
                                            ])
                                            vest_detection = True

                                    # Detect hard-hat
                                    if int(obj_sm[1]) == 4:
                                        xmin_sm_v = int(obj_sm[3] *
                                                        (xmax - xmin))
                                        ymin_sm_v = int(obj_sm[4] *
                                                        (ymax - ymin))
                                        xmax_sm_v = int(obj_sm[5] *
                                                        (xmax - xmin))
                                        ymax_sm_v = int(obj_sm[6] *
                                                        (ymax - ymin))
                                        if hard_hat_detection == False:
                                            detection_list.append([
                                                xmin_sm_v + xmin,
                                                ymin_sm_v + ymin,
                                                xmax_sm_v + xmin,
                                                ymax_sm_v + ymin
                                            ])
                                            hard_hat_detection = True

                            if hard_hat_detection is False or vest_detection is False:
                                violations += 1
                            for _rect in detection_list:
                                cv2.rectangle(current_img,
                                              (_rect[0], _rect[1]),
                                              (_rect[2], _rect[3]),
                                              (0, 255, 0), 2)
                            if is_async_mode:
                                currReq_s, nextReq_s = nextReq_s, currReq_s

                    # Use OpenCV if worker-safety-model is not provided
                        else:
                            violations = detect_workers(
                                in_frame_workers, previous_img)

                # Check if detected violations equals previous frames
                if violations == prevVideo.currentViolationCount:
                    prevVideo.currentViolationCountConfidence += 1

                    # If frame threshold is reached, change validated count
                    if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:

                        # If another violation occurred, save image
                        if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
                            prevVideo.totalViolations += (
                                prevVideo.currentViolationCount -
                                prevVideo.prevViolationCount)
                        prevVideo.prevViolationCount = prevVideo.currentViolationCount
                else:
                    prevVideo.currentViolationCountConfidence = 0
                    prevVideo.currentViolationCount = violations

                # Check if detected people count equals previous frames
                if people == prevVideo.currentPeopleCount:
                    prevVideo.currentPeopleCountConfidence += 1

                    # If frame threshold is reached, change validated count
                    if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
                        prevVideo.currentTotalPeopleCount += (
                            prevVideo.currentPeopleCount -
                            prevVideo.prevPeopleCount)
                        if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
                            prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
                        prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
                else:
                    prevVideo.currentPeopleCountConfidence = 0
                    prevVideo.currentPeopleCount = people

                frame_end_time = datetime.datetime.now()
                cv2.putText(
                    previous_img,
                    'Total people count: ' + str(prevVideo.totalPeopleCount),
                    (10, prevVideo.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img, 'Current people count: ' +
                    str(prevVideo.currentTotalPeopleCount),
                    (10, prevVideo.height - 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img,
                    'Total violation count: ' + str(prevVideo.totalViolations),
                    (10, prevVideo.height - 70), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img, 'FPS: %0.2fs' %
                    (1 / (frame_end_time -
                          prevVideo.frame_start_time).total_seconds()),
                    (10, prevVideo.height - 100), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else\
    "Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
                            (10, prevVideo.height - 130),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

                cv2.imshow(prevVideo.name, previous_img)
                prevVideo.frame_start_time = datetime.datetime.now()
            # Swap
            if is_async_mode:
                currReq, nextReq = nextReq, currReq
                previous_img = current_img
                prevVideo = currVideo
            if cv2.waitKey(1) == 27:
                print("Attempting to stop input files")
                infer_network.clean()
                infer_network_safety.clean()
                cv2.destroyAllWindows()
                return

        if False not in vid_finished:
            infer_network.clean()
            infer_network_safety.clean()
            cv2.destroyAllWindows()
            break
示例#26
0
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global INFO
    global DELAY
    global POSE_CHECKED

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO, stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()

    #if args.input == 'cam':
       # input_stream = 0
    #else:
    input_stream = args.input
    assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    shopper = cv2.VideoWriter(os.path.join(args.output_dir, "shopper.mp4"), cv2.VideoWriter_fourcc(*"AVC1"), fps, (initial_w, initial_h), True)
    frame_count = 0
    job_id = os.environ['PBS_JOBID']
    progress_file_path = os.path.join(args.output_dir,'i_progress_'+str(job_id)+'.txt')
    infer_time_start = time.time()

    if input_stream:
        cap.open(args.input)
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    # Initialise the class
    infer_network = Network()
    infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer
    
    
    plugin, (n_fd, c_fd, h_fd, w_fd) = infer_network.load_model(args.model,
                                                      args.device, 1, 1, 0,
                                                      args.cpu_extension)
    n_hp, c_hp, h_hp, w_hp = infer_network_pose.load_model(args.posemodel,
                                                           args.device, 1,
                                                           3, 0,
                                                           args.cpu_extension, plugin)[1]
    
    ret, frame = cap.read()
    
    while ret:
        looking = 0
        ret, next_frame = cap.read()
        frame_count += 1
        if not ret:
            print ("checkpoint *BREAKING")
            break

        if next_frame is None:
            log.error("checkpoint ERROR! blank FRAME grabbed")
            break

        initial_wh = [cap.get(3), cap.get(4)]
        in_frame_fd = cv2.resize(next_frame, (w_fd, h_fd))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n_fd, c_fd, h_fd, w_fd))

        
        # Start asynchronous inference for specified request
        inf_start_fd = time.time()
        infer_network.exec_net(0, in_frame_fd)
        # Wait for the result
        infer_network.wait(0)
        det_time_fd = time.time() - inf_start_fd
        
        # Results of the output layer of the network
        res = infer_network.get_output(0)

        # Parse face detection output
        faces = face_detection(res, args, initial_wh)

        if len(faces) != 0:
            # Look for poses
            for res_hp in faces:
                xmin, ymin, xmax, ymax = res_hp
                head_pose = frame[ymin:ymax, xmin:xmax]
                in_frame_hp = cv2.resize(head_pose, (w_hp, h_hp))
                in_frame_hp = in_frame_hp.transpose((2, 0, 1))
                in_frame_hp = in_frame_hp.reshape((n_hp, c_hp, h_hp, w_hp))

                inf_start_hp = time.time()
                infer_network_pose.exec_net(0, in_frame_hp)
                infer_network_pose.wait(0)
                det_time_hp = time.time() - inf_start_hp


                # Parse head pose detection results
                angle_p_fc = infer_network_pose.get_output(0, "angle_p_fc")
                angle_y_fc = infer_network_pose.get_output(0, "angle_y_fc")
                if ((angle_y_fc > -22.5) & (angle_y_fc < 22.5) & (angle_p_fc > -22.5) &
                        (angle_p_fc < 22.5)):
                    looking += 1
                    POSE_CHECKED = True
                    INFO = INFO._replace(looker=looking)
                else:
                    INFO = INFO._replace(looker=looking)
        else:
            INFO = INFO._replace(looker=0)

        # Draw performance stats
        inf_time_message = "Face Inference time: {:.3f} ms.".format(det_time_fd * 1000)

        if POSE_CHECKED:
            cv2.putText(frame, "Head pose Inference time: {:.3f} ms.".format(det_time_hp * 1000), (0, 35),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, inf_time_message, (0, 15), cv2.FONT_HERSHEY_COMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Shopper: {}".format(INFO.shopper), (0, 90), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Looker: {}".format(INFO.looker), (0, 110), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255, 255, 255), 1)

        shopper.write(frame)
        if frame_count%10 == 0: 
            progressUpdate(progress_file_path, int(time.time()-infer_time_start), frame_count, video_len)
        frame = next_frame
        if args.output_dir:
            total_time = time.time() - infer_time_start
            with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
                f.write(str(round(total_time, 1))+'\n')
                f.write(str(frame_count)+'\n')
    infer_network.clean()
    infer_network_pose.clean()
    cap.release()
def intruder_detector():
    """
    Process the input source frame by frame and detects intruder, if any.

    :return status: 0 on success, negative value on failure
    """
    global CONF_CANDIDATE_CONFIDENCE
    global LOG_WIN_HEIGHT
    global LOG_WIN_WIDTH
    global CONF_FILE
    global video_caps
    global conf_labels_file_path

    parse_args()
    if not os.path.isfile(CONF_FILE):
        return -12, ""

    if not os.path.isfile(conf_labels_file_path):
        return -13, ""

    # Creates subdirectory to save output snapshots
    pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True)

    # Read the configuration file
    ret, req_labels = get_input()
    if ret != 0:
        return ret, req_labels[0]

    if not video_caps:
        return -14, ''

    # Get the labels that are used in the application
    ret, label_names, used_labels = get_used_labels(req_labels)
    if ret != 0:
        return ret, ''
    if True not in used_labels:
        return -15, ''

    # Init a rolling log to store events
    rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20)
    log_list = collections.deque(maxlen=rolling_log_size)

    # Open a file for intruder logs
    log_file = open(LOG_FILE_PATH, 'w')
    if not log_file:
        return -16, ''

    # Initializing VideoWriter for each source
    for video_cap in video_caps:

        ret, ret_value = video_cap.init_vw(int(video_cap.input_height),
                                           int(video_cap.input_width))
        if ret != 0:
            return ret, ret_value
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 0,
                                          CPU_EXTENSION)

    min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps])
    no_more_data = [False] * len(video_caps)
    start_time = time.time()
    inf_time = 0
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    statsVideo = cv2.VideoWriter(os.path.join(output_dir, 'Statistics.mp4'),
                                 fourcc, min_fps,
                                 (LOG_WIN_WIDTH, LOG_WIN_HEIGHT), True)
    job_id = os.environ['PBS_JOBID']
    progress_file_path = os.path.join(output_dir,
                                      'i_progress_' + str(job_id) + '.txt')
    infer_start_time = time.time()
    # Main loop starts here. Loop over all the video captures
    while True:
        for idx, video_cap in enumerate(video_caps):
            # Get a new frame
            vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_fps))):
                ret, video_cap.frame = video_cap.vc.read()
                video_cap.loop_frames += 1
                # If no new frame or error in reading a frame, exit the loop
                if not ret:
                    no_more_data[idx] = True
                    break
            if no_more_data[idx]:
                stream_end_frame = numpy.zeros((int(
                    video_cap.input_height), int(video_cap.input_width), 1),
                                               dtype='uint8')
                stream_end_message = "Stream from {} has ended.".format(
                    video_cap.cam_name)
                cv2.putText(stream_end_frame, stream_end_message,
                            (int(video_cap.input_width / 2) - 30,
                             int(video_cap.input_height / 2) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                continue
            for i in range(video_cap.no_of_labels):
                video_cap.current_count[i] = 0
                video_cap.changed_count[i] = False

            # Resize to expected size (in model .xml file)
            # Input frame is resized to infer resolution
            in_frame = cv2.resize(video_cap.frame, (w, h))

            # PRE-PROCESS STAGE:
            # Convert image to format expected by inference engine
            # IE expects planar, convert from packed
            # Change data layout from HWC to CHW
            in_frame = in_frame.transpose((2, 0, 1))
            in_frame = in_frame.reshape((n, c, h, w))
            # Start asynchronous inference for specified request.
            inf_start = time.time()
            infer_network.exec_net(0, in_frame)
            # Wait for the result
            if infer_network.wait(0) == 0:
                inf_time = time.time() - inf_start
                # Results of the output layer of the network
                res = infer_network.get_output(0)
                for obj in res[0][0]:
                    label = int(obj[1]) - 1
                    # Draw the bounding box around the object when the probability is more than specified threshold
                    if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]:
                        video_cap.current_count[label] += 1
                        xmin = int(obj[3] * video_cap.input_width)
                        ymin = int(obj[4] * video_cap.input_height)
                        xmax = int(obj[5] * video_cap.input_width)
                        ymax = int(obj[6] * video_cap.input_height)
                        # Draw bounding box around the intruder detected
                        cv2.rectangle(video_cap.frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                for i in range(video_cap.no_of_labels):
                    if video_cap.candidate_count[i] == video_cap.current_count[
                            i]:
                        video_cap.candidate_confidence[i] += 1
                    else:
                        video_cap.candidate_confidence[i] = 0
                        video_cap.candidate_count[i] = video_cap.current_count[
                            i]

                    if video_cap.candidate_confidence[
                            i] == CONF_CANDIDATE_CONFIDENCE:
                        video_cap.candidate_confidence[i] = 0
                        video_cap.changed_count[i] = True
                    else:
                        continue

                    if video_cap.current_count[
                            i] > video_cap.last_correct_count[i]:
                        video_cap.total_count[i] += video_cap.current_count[
                            i] - video_cap.last_correct_count[i]
                        det_objs = video_cap.current_count[
                            i] - video_cap.last_correct_count[i]
                        total_count = sum(video_cap.total_count)
                        for det_obj in range(det_objs):
                            current_time = time.strftime("%H:%M:%S")
                            log = "{} - Intruder {} detected on {}".format(
                                current_time, label_names[i],
                                video_cap.cam_name)
                            print(log)
                            log_list.append(log)
                            log_file.write(log + "\n")
                            event = Event(event_time=current_time,
                                          intruder=label_names[i],
                                          count=total_count,
                                          frame=video_cap.frame_count)
                            video_cap.events.append(event)

                        snapshot_name = "output/intruder_{}.png".format(
                            total_count)
                        cv2.imwrite(snapshot_name, video_cap.frame)
                    video_cap.last_correct_count[i] = video_cap.current_count[
                        i]
            # Create intruder log window, add logs to the frame and display it
            log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1),
                                     dtype='uint8')
            for i, log in enumerate(log_list):
                cv2.putText(log_window, log, (10, 20 * i + 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            log_window = cv2.cvtColor(log_window, cv2.COLOR_GRAY2BGR)
            statsVideo.write(log_window)
            video_cap.frame_count += 1

            # Video output
            inf_time_message = "Inference time: {:.3f} ms".format(inf_time *
                                                                  1000)
            cv2.putText(video_cap.frame, inf_time_message,
                        (10, int(video_cap.input_height) - 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            fps_time = time.time() - start_time
            fps_message = "FPS: {:.3f} fps".format(1 / fps_time)
            cv2.putText(video_cap.frame, fps_message,
                        (10, int(video_cap.input_height) - 10),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # Display the video output
            video_cap.vw.write(video_cap.frame)
            if video_cap.frame_count % 10 == 0:
                progressUpdate(progress_file_path,
                               time.time() - infer_start_time,
                               video_cap.frame_count,
                               int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)))
            start_time = time.time()

            # Loop video to mimic continuous input if LOOP_VIDEO flag is True
            if LOOP_VIDEO and not video_cap.is_cam:
                vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if video_cap.loop_frames > video_cap.vc.get(
                        cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)):
                    video_cap.loop_frames = 0
                    video_cap.vc.set(cv2.CAP_PROP_POS_FRAMES, 0)

        if False not in no_more_data:
            progressUpdate(progress_file_path,
                           time.time() - infer_start_time,
                           int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)),
                           int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)))
            break

    no_more_data = False
    t2 = time.time() - infer_start_time
    for videos in video_caps:
        with open(os.path.join(output_dir, 'stats.txt'), 'w') as f:
            f.write('{} \n'.format(round(t2)))
            f.write('{} \n'.format(videos.frame_count))

    infer_network.clean()
    log_file.close()
    return 0, ''
def main():
    """
    Load the network and parse the output.

    :return: None
    """
    global DELAY
    global CLIENT
    global SIG_CAUGHT
    global KEEP_RUNNING
    global TARGET_DEVICE
    global is_async_mode
    CLIENT = mqtt.Client()
    CLIENT.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    CLIENT.subscribe(TOPIC)
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = build_argparser().parse_args()
    logger = log.getLogger()
    render_time = 0
    roi_x = args.pointx
    roi_y = args.pointy
    roi_w = args.width
    roi_h = args.height
    check_args()

    assert os.path.isfile(CONFIG_FILE), "{} file doesn't exist".format(
        CONFIG_FILE)
    config = json.loads(open(CONFIG_FILE).read())

    for idx, item in enumerate(config['inputs']):
        if item['video'].isdigit():
            input_stream = int(item['video'])
        else:
            input_stream = item['video']

    cap = cv2.VideoCapture(input_stream)
    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        sys.exit(1)

    # Init inference request IDs
    cur_request_id = 0
    next_request_id = 1

    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(args.model, TARGET_DEVICE, 1, 1, 2,
                                          args.cpu_extension)[1]

    message_thread = Thread(target=message_runner, args=())
    message_thread.setDaemon(True)
    message_thread.start()

    if is_async_mode:
        print("Application running in async mode...")
    else:
        print("Application running in sync mode...")

    ret, frame = cap.read()
    while ret:

        ret, next_frame = cap.read()
        if not ret:
            KEEP_RUNNING = False
            break

        initial_wh = [cap.get(3), cap.get(4)]

        if next_frame is None:
            KEEP_RUNNING = False
            log.error("ERROR! blank FRAME grabbed")
            break

        # If either default values or negative numbers are given,
        # then we will default to start of the FRAME
        if roi_x <= 0 or roi_y <= 0:
            roi_x = 0
            roi_y = 0
        if roi_w <= 0:
            roi_w = next_frame.shape[1]
        if roi_h <= 0:
            roi_h = next_frame.shape[0]
        key_pressed = cv2.waitKey(1)

        # 'c' key pressed
        if key_pressed == 99:
            # Give operator chance to change the area
            # Select rectangle from left upper corner, dont display crosshair
            ROI = cv2.selectROI("Restricted Area Selection", frame, True,
                                False)
            print("Restricted Area Selection: -x = {}, -y = {}, -w = {},"
                  " -h = {}".format(ROI[0], ROI[1], ROI[2], ROI[3]))
            roi_x = ROI[0]
            roi_y = ROI[1]
            roi_w = ROI[2]
            roi_h = ROI[3]
            cv2.destroyAllWindows()

        cv2.rectangle(frame, (roi_x, roi_y), (roi_x + roi_w, roi_y + roi_h),
                      (0, 0, 255), 2)
        selected_region = [roi_x, roi_y, roi_w, roi_h]

        in_frame_fd = cv2.resize(next_frame, (w, h))
        # Change data layout from HWC to CHW
        in_frame_fd = in_frame_fd.transpose((2, 0, 1))
        in_frame_fd = in_frame_fd.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.
        inf_start = time.time()
        if is_async_mode:
            # Async enabled and only one video capture
            infer_network.exec_net(next_request_id, in_frame_fd)
        else:
            # Async disabled
            infer_network.exec_net(cur_request_id, in_frame_fd)
        # Wait for the result
        infer_network.wait(cur_request_id)
        det_time = time.time() - inf_start
        # Results of the output layer of the network
        res = infer_network.get_output(cur_request_id)
        # Parse SSD output
        ssd_out(res, args, initial_wh, selected_region)

        # Draw performance stats
        inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \
            "Inference time: {:.3f} ms".format(det_time * 1000)
        render_time_message = "OpenCV rendering time: {:.3f} ms". \
            format(render_time * 1000)

        if not INFO.safe:
            warning = "Baby in Dangerous Zone: Please Act Fast!"
            cv2.putText(frame, warning, (15, 100), cv2.FONT_HERSHEY_COMPLEX,
                        0.8, (0, 0, 255), 2)

        log_message = "Async mode is on." if is_async_mode else \
            "Async mode is off."
        cv2.putText(frame, log_message, (15, 15), cv2.FONT_HERSHEY_SIMPLEX,
                    0.5, (255, 255, 255), 1)
        cv2.putText(frame, inf_time_message, (15, 35),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, render_time_message, (15, 55),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        cv2.putText(frame, "Baby Safe: {}".format(INFO.safe), (15, 75),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)

        render_start = time.time()
        cv2.imshow("Restricted Zone Notifier", frame)
        render_end = time.time()
        render_time = render_end - render_start

        frame = next_frame

        if key_pressed == 27:
            print("Attempting to stop background threads")
            KEEP_RUNNING = False
            break
            # Tab key pressed
        if key_pressed == 9:
            is_async_mode = not is_async_mode
            print("Switched to {} mode".format(
                "async" if is_async_mode else "sync"))

        if is_async_mode:
            # Swap infer request IDs
            cur_request_id, next_request_id = next_request_id, cur_request_id

    infer_network.clean()
    message_thread.join()
    cap.release()
    cv2.destroyAllWindows()
    CLIENT.disconnect()
def main():
    # Plugin initialization for specified device and load extensions library
    global rolling_log
    global TARGET_DEVICE
    global videoCapsJson

    env_parser()
    check_args()
    parse_conf_file()

    if TARGET_DEVICE not in acceptedDevices:
        print("Unsupporterd device " + TARGET_DEVICE + ". Defaulting to CPU")
        TARGET_DEVICE = 'CPU'

    # Initialize the class
    infer_network = Network()
    # Load the network to IE Plugin
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2,
                                          CPU_EXTENSION)[1]
    minFPS = min([i.cap.get(cv2.CAP_PROP_FPS) for i in videoCaps])
    waitTime = int(
        round(1000 / minFPS /
              len(videoCaps)))  # wait time in ms between showing frames
    for vc in videoCaps:
        vc.init_vw(h, w, minFPS)

    statsWidth = w if w > 345 else 345
    statsHeight = h if h > (len(videoCaps) * 20 + 15) else (
        len(videoCaps) * 20 + 15)
    statsVideo = cv2.VideoWriter(os.path.join('resources',
                                              'Statistics.mp4'), 0x00000021,
                                 minFPS, (statsWidth, statsHeight), True)
    if not statsVideo.isOpened():
        print("Couldn't open stats video for writing")
        sys.exit(4)

    # Read the labels file
    if labels_file:
        with open(labels_file, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    # Init a rolling log to store events
    rolling_log_size = int((h - 15) / 20)
    rolling_log = collections.deque(maxlen=rolling_log_size)

    # Init inference request IDs
    cur_request_id = 0
    next_request_id = 1
    # Start with async mode enabled
    is_async_mode = True

    if not UI_OUTPUT:
        # Arrange windows so they are not overlapping
        arrange_windows(w, h)
        print("To stop the execution press Esc button")

    for idx, vc in enumerate(videoCaps):
        vc.start_time = datetime.datetime.now()
        vc.pos = idx

    if UI_OUTPUT:
        videoCapsJson = videoCaps.copy()

    while True:

        # If all video captures are closed stop the loop
        no_more_data = [videoCap.closed for videoCap in videoCaps]
        # loop over all video captures
        for idx, videoCapInfer in enumerate(videoCaps):

            # read the next frame
            vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / minFPS))):
                ret, frame = videoCapInfer.cap.read()
                videoCapInfer.cur_frame_count += 1
                # If the read failed close the program
                if not ret:
                    no_more_data[idx] = True
                    break

            if no_more_data[idx]:
                if UI_OUTPUT:
                    videoCaps.pop(idx)
                    continue
                else:
                    stream_end_frame = np.zeros((h, w, 1), dtype='uint8')
                    cv2.putText(
                        stream_end_frame, "Input file {} has ended".format(
                            videoCapInfer.cap_name), (20, 150),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                    cv2.imshow(videoCapInfer.cap_name, stream_end_frame)
                    cv2.waitKey(waitTime)
                    videoCaps.pop(idx)
                    continue
            # Copy the current frame for later use
            videoCapInfer.cur_frame = frame.copy()
            videoCapInfer.initial_w = videoCapInfer.cap.get(3)
            videoCapInfer.initial_h = videoCapInfer.cap.get(4)
            # Resize and change the data layout so it is compatible
            in_frame = cv2.resize(videoCapInfer.cur_frame, (w, h))
            in_frame = in_frame.transpose(
                (2, 0, 1))  # Change data layout from HWC to CHW
            in_frame = in_frame.reshape((n, c, h, w))

            infer_start = datetime.datetime.now()
            if is_async_mode:
                # Async enabled and only one video capture
                infer_network.exec_net(next_request_id, in_frame)
                if (len(videoCaps) == 1):
                    videoCapResult = videoCapInfer
                # Async enabled and more than one video capture
                else:
                    # Get previous index
                    videoCapResult = videoCaps[idx - 1 if idx -
                                               1 >= 0 else len(videoCaps) - 1]
            else:
                # Async disabled
                infer_network.exec_net(next_request_id, in_frame)
                videoCapResult = videoCapInfer

            if infer_network.wait(cur_request_id) == 0:
                infer_end = datetime.datetime.now()
                res = infer_network.get_output(cur_request_id)
                infer_duration = infer_end - infer_start
                current_count = 0
                # Parse detection results of the current request
                for obj in res[0][0]:
                    class_id = int(obj[1])
                    # Draw only objects when probability more than specified threshold
                    if (obj[2] > PROB_THRESHOLD
                            and videoCapResult.req_label in labels_map
                            and labels_map.index(
                                videoCapResult.req_label) == class_id - 1):
                        current_count += 1
                        xmin = int(obj[3] * videoCapResult.initial_w)
                        ymin = int(obj[4] * videoCapResult.initial_h)
                        xmax = int(obj[5] * videoCapResult.initial_w)
                        ymax = int(obj[6] * videoCapResult.initial_h)
                        # Draw box
                        cv2.rectangle(videoCapResult.cur_frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                if videoCapResult.candidate_count is current_count:
                    videoCapResult.candidate_confidence += 1
                else:
                    videoCapResult.candidate_confidence = 0
                    videoCapResult.candidate_count = current_count

                if videoCapResult.candidate_confidence is FRAME_THRESHOLD:
                    videoCapResult.candidate_confidence = 0
                    if current_count > videoCapResult.last_correct_count:
                        videoCapResult.total_count += current_count - videoCapResult.last_correct_count

                    if current_count is not videoCapResult.last_correct_count:
                        if UI_OUTPUT:
                            currtime = datetime.datetime.now().strftime(
                                "%H:%M:%S")
                            fr = FrameInfo(videoCapResult.frames,
                                           current_count, currtime)
                            videoCapResult.countAtFrame.append(fr)

                        new_objects = current_count - videoCapResult.last_correct_count
                        for _ in range(new_objects):
                            strng = "{} - {} detected on {}". \
                                format(time.strftime("%H:%M:%S"),
                                       videoCapResult.req_label,
                                       videoCapResult.cap_name)
                            rolling_log.append(strng)

                    videoCapResult.frames += 1
                    videoCapResult.last_correct_count = current_count
                else:
                    videoCapResult.frames += 1

                videoCapResult.cur_frame = cv2.resize(videoCapResult.cur_frame,
                                                      (w, h))

                if UI_OUTPUT:
                    imgName = videoCapResult.cap_name
                    imgName = imgName.split()[0] + "_" + chr(
                        ord(imgName.split()[1]) + 1)
                    imgName += "_" + str(videoCapResult.frames)
                    frameNames.append(imgName)
                    imgName = CONF_VIDEODIR + imgName + ".jpg"
                    cv2.imwrite(imgName, videoCapResult.cur_frame)
                    videoCapsJson[
                        videoCapResult.
                        pos].countAtFrame = videoCapResult.countAtFrame
                    a = saveJSON()
                    if a:
                        return a
                if not UI_OUTPUT:
                    # Add log text to each frame
                    log_message = "Async mode is on." if is_async_mode else \
                        "Async mode is off."
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    log_message = "Total {} count: {}" \
                        .format(videoCapResult.req_label,
                                videoCapResult.total_count)
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    log_message = "Current {} count: {}" \
                        .format(videoCapResult.req_label,
                                videoCapResult.last_correct_count)
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (10, h - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    cv2.putText(
                        videoCapResult.cur_frame, 'Infer wait: %0.3fs' %
                        (infer_duration.total_seconds()), (10, h - 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

                    # Display inferred frame and stats
                    stats = numpy.zeros((statsHeight, statsWidth, 1),
                                        dtype='uint8')
                    for i, log in enumerate(rolling_log):
                        cv2.putText(stats, log, (10, i * 20 + 15),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                    (255, 255, 255), 1)
                    cv2.imshow(STATS_WINDOW_NAME, stats)
                    if idx == 0:
                        stats = cv2.cvtColor(stats, cv2.COLOR_GRAY2BGR)
                        statsVideo.write(stats)
                    end_time = datetime.datetime.now()
                    cv2.putText(
                        videoCapResult.cur_frame, 'FPS: %0.2fs' %
                        (1 / (end_time -
                              videoCapResult.start_time).total_seconds()),
                        (10, h - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 255), 1)
                    cv2.imshow(videoCapResult.cap_name,
                               videoCapResult.cur_frame)
                    videoCapResult.start_time = datetime.datetime.now()
                    videoCapResult.video.write(videoCapResult.cur_frame)

            # Wait if necessary for the required time
            key = cv2.waitKey(waitTime)

            # Esc key pressed
            if key == 27:
                cv2.destroyAllWindows()
                infer_network.clean()
                print("Finished")
                return
            # Tab key pressed
            if key == 9:
                is_async_mode = not is_async_mode
                print("Switched to {} mode".format(
                    "async" if is_async_mode else "sync"))

            if is_async_mode:
                # Swap infer request IDs
                cur_request_id, next_request_id = next_request_id, cur_request_id

            # Loop video if LOOP_VIDEO = True and input isn't live from USB camera
            if LOOP_VIDEO and not videoCapInfer.is_cam:
                vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if (videoCapInfer.cur_frame_count >
                        videoCapInfer.cap.get(cv2.CAP_PROP_FRAME_COUNT) -
                        int(round(vfps / minFPS))):
                    videoCapInfer.cur_frame_count = 0
                    videoCapInfer.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

        if False not in no_more_data:
            break

    infer_network.clean()
    cv2.destroyAllWindows()
示例#30
0
def main():

    args = build_argparser().parse_args()

    client = connect_mqtt()

    global initial_w, initial_h, prob_threshold

    # Initialise the class
    network = Network()
    # Set Probability threshold for detections
    if args.prob_threshold is None:
        prob_threshold = args.prob_threshold
    else:
        prob_threshold = 0.4

    image_mode = False

    cur_request_id = 0
    last = 0
    total = 0
    start = 0

    # Load the network to IE plugin to get shape of input layer

    n, c, h, w = network.load_model(args.model, args.device, 1, 1,
                                    cur_request_id, args.cpu_extension)[1]

    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    initial_w = cap.get(3)
    initial_h = cap.get(4)

    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start async inference

        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        # Start asynchronous inference for specified request.

        inf_start = time.time()
        network.exec_net(cur_request_id, image)
        # Wait for the result

        if network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network

        result = network.get_output(cur_request_id)
        frame, current_count = model_out(frame, result)

        inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)

        cv2.putText(frame, inf_time_message, (15, 15),
                    cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

        # When new person enters the video

        if current_count > last:
            start = time.time()
            total += current_count - last
            client.publish("person", json.dumps({"total": total}))

            # Person duration in the video is calculated
        if current_count < last:
            duration = int(time.time() - start)
            # Publish messages to the MQTT server
            client.publish("person/duration",
                           json.dumps({"duration": duration}))

        client.publish("person", json.dumps({"count": current_count}))
        last = current_count

        if key_pressed == 27:
            break

        # Send frame to the ffmpeg server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if image_mode:
            cv2.imwrite('output_image.jpg', frame)

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    network.clean()