Exemplo n.º 1
0
def infer_on_stream():
    # Initialise the class
    infer_network = Network()
    infer_network.load_model(
        "./models/mobilenet_ssd_pedestrian_detection/MobileNetSSD_deploy10695.xml",
        "CPU", CPU_EXTENSION)
    net_input_shape = infer_network.get_input_shape()
    ### TODO: Handle the input stream ###
    img = cv2.imread('./frame1.jpg', 0)

    img = cv2.resize(img, (net_input_shape[3], net_input_shape[2]))
    imgProcessed = img - 127.5
    imgProcessed = imgProcessed * 0.007843
    imgProcessed = imgProcessed.astype(np.float32)

    infer_network.exec_net(imgProcessed)
    if infer_network.wait() == 0:
        ### TODO: Get the results of the inference request ###
        width, height = imgProcessed.shape[:2]
        result = infer_network.get_output()
        h = img.shape[0]
        w = img.shape[1]
        box = result[0, 0, :, 3:7] * np.array([w, h, w, h])
        cls = result[0, 0, :, 1]
        conf = result[0, 0, :, 2]
        for i in range(len(box)):
            aR = abs(box[i][2] - box[i][0]) * (box[i][3] - box[i][1])
            if conf[i] > 0.25 and aR < 30000:
                cv2.rectangle(img, (int(box[i][0]), int(box[i][1])),
                              (int(box[i][2]), int(box[i][3])), (0, 255, 0))
        cv2.imwrite("frameProcessed.jpg", img)
Exemplo n.º 2
0
def infer_on_stream(cap, out, width, height, args):
    infer_network = Network()
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()

    while cap.isOpened():

        flag, frame = cap.read()
        if not flag:
            break

        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        infer_network.exec_net(p_frame)

        if infer_network.wait() == 0:

            result = infer_network.get_output()

            frame, current_count = draw_boxes(frame, result, width, height,
                                              float(args.prob_threshold))
            send_update = update_count(current_count,
                                       cap.get(cv2.CAP_PROP_POS_MSEC) / 1000)
            if send_update:
                print(current_count, total_count, duration)

        out.write(frame)
Exemplo n.º 3
0
class FacialLandmarkDetection:
    """
        Facial Landmark Detection Class
    """
    def __init__(self, model, device="CPU", extensions=None):
        """
            set instance variables
        """
        self.model_xml = model
        self.device = device
        self.extensions = extensions
        self.infer_network = Network()

    def load_model(self):
        """
            load the model specified by the user
        """
        self.infer_network.load_model(self.model_xml, self.device,
                                      self.extensions)

    def predict(self, image):
        """
            run predictions on the input image
        """
        self.infer_network.exec_net(image)
        return (self.infer_network.get_output()[self.infer_network.output_blob]
                if self.infer_network.wait() == 0 else None)

    def preprocess_input(self, image):
        """
            preprocess input image
        """
        input_shape = self.infer_network.get_input_shape()
        frame = np.copy(image)
        frame = cv2.resize(frame, (input_shape[3], input_shape[2])).transpose(
            (2, 0, 1))
        return frame.reshape(1, *frame.shape)

    def preprocess_output(self, outputs, box, img, overlay_inference):
        """
            preprocess output image
        """
        landmarks = outputs.reshape(1, 10)[0]
        h, w = (box[3] - box[1], box[2] - box[0])

        # This is broken, but I can't figure out why...
        overlay_inference = False  # ...so I've disabled it for now.
        if overlay_inference:
            for e in range(2):
                x, y = (w * int(landmarks[e * 2]),
                        h * int(landmarks[e * 2 + 1]))
                cv2.circle(img, (box[0] + x, box[1] + y), 30,
                           (0, 255, e * 255), 2)

        return (
            img,
            [w * landmarks[0], h * landmarks[1]],
            [w * landmarks[2], h * landmarks[3]],
        )
Exemplo n.º 4
0
class GazeEstimator:
    def __init__(self, model_name, device='CPU', extensions=None):
        self.network = Network(model_name, device, extensions)

    def load_model(self):
        self.network.load_model()

    def predict(self, right_eye_image, head_pose_angles, left_eye_image):
        _, _, roll = head_pose_angles
        right_eye_image, head_pose_angles, left_eye_image, preprocess_input_time = self._preprocess_input(
            right_eye_image, head_pose_angles, left_eye_image)
        input_dict = {
            "left_eye_image": left_eye_image,
            "right_eye_image": right_eye_image,
            "head_pose_angles": head_pose_angles
        }
        self.network.exec_net(0, input_dict)
        status = self.network.wait(0)
        if status == 0:
            outputs = self.network.get_output(0)
            gaze_vector, preprocess_output_time = self._preprocess_output(
                outputs, roll)
            self.preprocess_time = preprocess_input_time + preprocess_output_time
            return gaze_vector

    def _preprocess_input(self, right_eye_image, head_pose_angles,
                          left_eye_image):
        start_preprocess_time = time.time()
        left_eye_image = self._preprocess_eye_image(left_eye_image)
        right_eye_image = self._preprocess_eye_image(right_eye_image)
        head_pose_angles = self._preprocess_angels(head_pose_angles)
        total_preprocess_time = time.time() - start_preprocess_time
        return right_eye_image, head_pose_angles, left_eye_image, total_preprocess_time

    def _preprocess_angels(self, head_pose_angles):
        input_shape = self.network.get_input_shape("head_pose_angles")
        head_pose_angles = np.reshape(head_pose_angles, input_shape)
        return head_pose_angles

    def _preprocess_eye_image(self, image):
        n, c, h, w = self.network.get_input_shape("left_eye_image")
        input_image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w))
        return input_image

    def _preprocess_output(self, outputs, roll):
        start_preprocess_time = time.time()
        gaze_vector = outputs[0]
        gaze_vector_n = gaze_vector / np.linalg.norm(gaze_vector)
        vcos = math.cos(math.radians(roll))
        vsin = math.sin(math.radians(roll))
        x = gaze_vector_n[0] * vcos + gaze_vector_n[1] * vsin
        y = -gaze_vector_n[0] * vsin + gaze_vector_n[1] * vcos
        total_preprocess_time = time.time() - start_preprocess_time
        return [x, y], total_preprocess_time
Exemplo n.º 5
0
class FaceDetector:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self,
                 model_name,
                 device='CPU',
                 extensions=None,
                 threshold=0.60):
        self.threshold = threshold
        self.network = Network(model_name, device, extensions)

    def load_model(self):
        self.network.load_model()

    def predict(self, image):
        input_image, preprocess_input_time = self._preprocess_input(image)
        self.network.exec_net(0, input_image)
        status = self.network.wait(0)
        if status == 0:
            outputs = self.network.get_output(0)
            face_boxes, preprocess_output_time = self._preprocess_output(
                outputs, image)
            self.preprocess_time = preprocess_input_time + preprocess_output_time
            return face_boxes

    def _preprocess_input(self, image):
        start_preprocess_time = time.time()
        n, c, h, w = self.network.get_input_shape()
        input_image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w))
        total_preprocess_time = time.time() - start_preprocess_time
        return input_image, total_preprocess_time

    def _preprocess_output(self, outputs, image):
        start_preprocess_time = time.time()
        face_boxes = []
        h, w, _ = image.shape
        color = (255, 0, 0)
        for obj in outputs[0][0]:
            if obj[2] > self.threshold:
                xmin = int(obj[3] * w)
                ymin = int(obj[4] * h)
                xmax = int(obj[5] * w)
                ymax = int(obj[6] * h)
                face_boxes.append([xmin, ymin, xmax, ymax])
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 1)
        total_preprocess_time = time.time() - start_preprocess_time
        return face_boxes, total_preprocess_time
Exemplo n.º 6
0
def post_convertion(frame, model, cpu_extension, device):
    network = Network()
    network.load_model(model, cpu_extension, device)
    processed_frame = pre_process(frame,
                                  net_input_shape=network.get_input_shape())
    inference_start_time = time.time()
    network.exec_net(processed_frame)
    if network.wait() == 0:
        inference_end_time = time.time()
        total_inference_time = inference_end_time - inference_start_time
        result = network.get_all_output()
        output = result['DetectionOutput']
        detection = output[0][0][0]
        image_id, label, conf, x_min, y_min, x_max, y_max = detection
        return str(round(total_inference_time * 1000, 3)) + "ms", conf
Exemplo n.º 7
0
def infer_on_image(args):

    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    # Initialize the Inference Engine
    plugin = Network()
    time_stamp = []

    # Load the network model into the IE
    plugin.load_model(args.m, args.d, CPU_EXTENSION)
    net_input_shape = plugin.get_input_shape()

    # Get and open video capture
    img = cv2.imread(args.i)
    height, width, _ = img.shape

    #Preprocess the image
    p_frame = cv2.resize(img, (net_input_shape[3], net_input_shape[2]))
    p_frame = p_frame.transpose((2, 0, 1))
    p_frame = p_frame.reshape(1, *p_frame.shape)

    #Execute the network
    plugin.exec_net(p_frame)

    #Extract result
    result = plugin.get_output()

    #statistics on image
    ppl = 0
    times = []
    counter_frame = 10
    iflag = False
    iflag, ppl, times = count_ppl(result, counter_frame, iflag, ppl, times)

    #Draw bounding box
    out_img = draw_bb(result, width, height, img)
    cv2.imwrite('file.jpg', out_img)

    client.publish('person', json.dumps({'count': ppl}))
    #client.publish('Duration',json.dumps({'duration':times}))

    #Publish the image
    sys.stdout.buffer.write(out_img)
    sys.stdout.flush()

    client.disconnect()
    return (ppl)
class FacialLandmarksDetector:

    def __init__(self, model_name, device='CPU', extensions=None):
        self.network = Network(model_name, device, extensions)

    def load_model(self):
        self.network.load_model()

    def predict(self, face_image):
        input_image, preprocess_input_time = self._preprocess_input(face_image)
        self.network.exec_net(0, input_image)
        status = self.network.wait(0)
        if status == 0:
            outputs = self.network.get_output(0)
            eye_boxes, eye_centers, preprocess_output_time = self._preprocess_output(outputs, face_image)
            self.preprocess_time = preprocess_input_time + preprocess_output_time
            return eye_boxes, eye_centers

    def _preprocess_input(self, image):
        start_preprocess_time = time.time()
        n, c, h, w = self.network.get_input_shape()
        input_image = cv2.resize(image, (w,h), interpolation = cv2.INTER_AREA)
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w))
        total_preprocess_time = time.time() - start_preprocess_time
        return input_image, total_preprocess_time


    def _preprocess_output(self, outputs, image):
        start_preprocess_time = time.time()
        normalized_landmarks = np.squeeze(outputs).reshape((5,2))
        h, w, _ = image.shape
        color = (255,255,255)
        length_offset = int(w * 0.15) 
        eye_boxes, eye_centers = [], []
        for i in range(2):
            normalized_x, normalized_y = normalized_landmarks[i]
            x = int(normalized_x*w)
            y = int(normalized_y*h)
            eye_centers.append([x, y])
            xmin, xmax = max(0, x - length_offset), min(w, x + length_offset)
            ymin, ymax = max(0, y - length_offset), min(h, y + length_offset)
            eye_boxes.append([xmin, ymin, xmax, ymax])
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 1)
        total_preprocess_time = time.time() - start_preprocess_time
        return eye_boxes, eye_centers, total_preprocess_time

        
Exemplo n.º 9
0
class HeadPoseEstimator:
    '''
    Class for the Head Pose Estimation Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        self.network = Network(model_name, device, extensions)

    def load_model(self):
        self.network.load_model()

    def predict(self, image):
        input_image, preprocess_input_time = self._preprocess_input(image)
        self.network.exec_net(0, input_image)
        status = self.network.wait(0)
        if status == 0:
            outputs = self.network.get_outputs(0)
            head_pose_angles, preprocess_output_time = self._preprocess_output(outputs, image)
            self.preprocess_time = preprocess_input_time + preprocess_output_time
            return head_pose_angles

    def _preprocess_input(self, image):
        start_preprocess_time = time.time()
        n, c, h, w = self.network.get_input_shape()
        input_image = cv2.resize(image, (w,h), interpolation = cv2.INTER_AREA)
        input_image = input_image.transpose((2, 0, 1))
        input_image = input_image.reshape((n, c, h, w))
        total_preprocess_time = time.time() - start_preprocess_time
        return input_image, total_preprocess_time


    def _preprocess_output(self, outputs, image):
        start_preprocess_time = time.time()
        yaw = outputs['angle_y_fc'][0][0]
        pitch = outputs['angle_p_fc'][0][0]
        roll = outputs['angle_r_fc'][0][0]
        total_preprocess_time = time.time() - start_preprocess_time
        return [yaw, pitch, roll], total_preprocess_time
Exemplo n.º 10
0
def main():
    """
    Load the network and parse the SSD output.

    :return: None
   """
    # Connect to the MQTT server
    client = mqtt.Client()
    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0

    model = os.environ['MODEL']
    device = os.environ['DEVICE'] if 'DEVICE' in os.environ.keys() else 'CPU'
    cpu_extension = os.environ[
        'CPU_EXTENSION'] if 'CPU_EXTENSION' in os.environ.keys() else None

    # Checks for live feed
    if os.environ['INPUT'] == 'CAM':
        input_stream = 0

    # Checks for input image
    elif os.environ['INPUT'].endswith('.jpg') or os.environ['INPUT'].endswith(
            '.bmp'):
        single_image_mode = True
        input_stream = os.environ['INPUT']

    # Checks for video file
    else:
        input_stream = os.environ['INPUT']
        assert os.path.isfile(
            os.environ['INPUT']), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(os.environ['INPUT'])

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model, device, 1, 1, cur_request_id,
                                          cpu_extension)[1]
    global initial_w, initial_h
    initial_w = cap.get(3)
    initial_h = cap.get(4)
    fps = cap.get(cv2.CAP_PROP_FPS)
    cmdstring = (
        'ffmpeg',
        '-y',
        '-r',
        '%d' % (fps),  # overwrite, 60fps
        '-s',
        '%dx%d' % (initial_w, initial_h),  # size of image string
        '-pixel_format',
        'bgr24',  # format
        '-f',
        'rawvideo',
        '-i',
        '-',  # tell ffmpeg to expect raw video from the pipe
        'http://localhost:8090/fac.ffm')  # output encoding
    p = subprocess.Popen(cmdstring, stdin=subprocess.PIPE)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start async inference
        inf_start = time.time()
        image = cv2.resize(frame, (w, h))
        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))
        # Start asynchronous inference for specified request.
        infer_network.exec_net(cur_request_id, image)
        # Wait for the result
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            result = infer_network.get_output(cur_request_id)
            if os.environ['PERF_COUNTS'] > str(0):
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)
            frame, current_count = ssd_parser(frame, result)
            inf_time_message = "Inference time: {:.3f}ms" \
                .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        p.stdin.write(frame.tostring())
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
            infer_network.clean()
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
Exemplo n.º 11
0
def intruder_detector():
    """
    Process the input source frame by frame and detects intruder, if any.

    :return status: 0 on success, negative value on failure
    """
    global CONF_CANDIDATE_CONFIDENCE
    global LOG_WIN_HEIGHT
    global LOG_WIN_WIDTH
    global CONF_FILE
    global video_caps
    global conf_labels_file_path

    parse_args()
    if not os.path.isfile(CONF_FILE):
        return -12, ""

    if not os.path.isfile(conf_labels_file_path):
        return -13, ""

    # Creates subdirectory to save output snapshots
    pathlib.Path(os.getcwd() + '/output/').mkdir(parents=True, exist_ok=True)

    # Read the configuration file
    ret, req_labels = get_input()
    if ret != 0:
        return ret, req_labels[0]

    if not video_caps:
        return -14, ''

    # Get the labels that are used in the application
    ret, label_names, used_labels = get_used_labels(req_labels)
    if ret != 0:
        return ret, ''
    if True not in used_labels:
        return -15, ''

    # Init a rolling log to store events
    rolling_log_size = int((LOG_WIN_HEIGHT - 15) / 20)
    log_list = collections.deque(maxlen=rolling_log_size)

    # Open a file for intruder logs
    log_file = open(LOG_FILE_PATH, 'w')
    if not log_file:
        return -16, ''

    # Initializing VideoWriter for each source
    for video_cap in video_caps:

        ret, ret_value = video_cap.init_vw(int(video_cap.input_height),
                                           int(video_cap.input_width))
        if ret != 0:
            return ret, ret_value
    # Initialise the class
    infer_network = Network()
    # Load the network to IE plugin to get shape of input layer
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 0,
                                          CPU_EXTENSION)

    min_fps = min([i.vc.get(cv2.CAP_PROP_FPS) for i in video_caps])
    no_more_data = [False] * len(video_caps)
    start_time = time.time()
    inf_time = 0
    fourcc = cv2.VideoWriter_fourcc(*'avc1')
    statsVideo = cv2.VideoWriter(os.path.join(output_dir, 'Statistics.mp4'),
                                 fourcc, min_fps,
                                 (LOG_WIN_WIDTH, LOG_WIN_HEIGHT), True)
    job_id = os.environ['PBS_JOBID']
    progress_file_path = os.path.join(output_dir,
                                      'i_progress_' + str(job_id) + '.txt')
    infer_start_time = time.time()
    # Main loop starts here. Loop over all the video captures
    while True:
        for idx, video_cap in enumerate(video_caps):
            # Get a new frame
            vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_fps))):
                ret, video_cap.frame = video_cap.vc.read()
                video_cap.loop_frames += 1
                # If no new frame or error in reading a frame, exit the loop
                if not ret:
                    no_more_data[idx] = True
                    break
            if no_more_data[idx]:
                stream_end_frame = numpy.zeros((int(
                    video_cap.input_height), int(video_cap.input_width), 1),
                                               dtype='uint8')
                stream_end_message = "Stream from {} has ended.".format(
                    video_cap.cam_name)
                cv2.putText(stream_end_frame, stream_end_message,
                            (int(video_cap.input_width / 2) - 30,
                             int(video_cap.input_height / 2) - 30),
                            cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                continue
            for i in range(video_cap.no_of_labels):
                video_cap.current_count[i] = 0
                video_cap.changed_count[i] = False

            # Resize to expected size (in model .xml file)
            # Input frame is resized to infer resolution
            in_frame = cv2.resize(video_cap.frame, (w, h))

            # PRE-PROCESS STAGE:
            # Convert image to format expected by inference engine
            # IE expects planar, convert from packed
            # Change data layout from HWC to CHW
            in_frame = in_frame.transpose((2, 0, 1))
            in_frame = in_frame.reshape((n, c, h, w))
            # Start asynchronous inference for specified request.
            inf_start = time.time()
            infer_network.exec_net(0, in_frame)
            # Wait for the result
            if infer_network.wait(0) == 0:
                inf_time = time.time() - inf_start
                # Results of the output layer of the network
                res = infer_network.get_output(0)
                for obj in res[0][0]:
                    label = int(obj[1]) - 1
                    # Draw the bounding box around the object when the probability is more than specified threshold
                    if obj[2] > CONF_THRESHOLD_VALUE and used_labels[label]:
                        video_cap.current_count[label] += 1
                        xmin = int(obj[3] * video_cap.input_width)
                        ymin = int(obj[4] * video_cap.input_height)
                        xmax = int(obj[5] * video_cap.input_width)
                        ymax = int(obj[6] * video_cap.input_height)
                        # Draw bounding box around the intruder detected
                        cv2.rectangle(video_cap.frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                for i in range(video_cap.no_of_labels):
                    if video_cap.candidate_count[i] == video_cap.current_count[
                            i]:
                        video_cap.candidate_confidence[i] += 1
                    else:
                        video_cap.candidate_confidence[i] = 0
                        video_cap.candidate_count[i] = video_cap.current_count[
                            i]

                    if video_cap.candidate_confidence[
                            i] == CONF_CANDIDATE_CONFIDENCE:
                        video_cap.candidate_confidence[i] = 0
                        video_cap.changed_count[i] = True
                    else:
                        continue

                    if video_cap.current_count[
                            i] > video_cap.last_correct_count[i]:
                        video_cap.total_count[i] += video_cap.current_count[
                            i] - video_cap.last_correct_count[i]
                        det_objs = video_cap.current_count[
                            i] - video_cap.last_correct_count[i]
                        total_count = sum(video_cap.total_count)
                        for det_obj in range(det_objs):
                            current_time = time.strftime("%H:%M:%S")
                            log = "{} - Intruder {} detected on {}".format(
                                current_time, label_names[i],
                                video_cap.cam_name)
                            print(log)
                            log_list.append(log)
                            log_file.write(log + "\n")
                            event = Event(event_time=current_time,
                                          intruder=label_names[i],
                                          count=total_count,
                                          frame=video_cap.frame_count)
                            video_cap.events.append(event)

                        snapshot_name = "output/intruder_{}.png".format(
                            total_count)
                        cv2.imwrite(snapshot_name, video_cap.frame)
                    video_cap.last_correct_count[i] = video_cap.current_count[
                        i]
            # Create intruder log window, add logs to the frame and display it
            log_window = numpy.zeros((LOG_WIN_HEIGHT, LOG_WIN_WIDTH, 1),
                                     dtype='uint8')
            for i, log in enumerate(log_list):
                cv2.putText(log_window, log, (10, 20 * i + 15),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            log_window = cv2.cvtColor(log_window, cv2.COLOR_GRAY2BGR)
            statsVideo.write(log_window)
            video_cap.frame_count += 1

            # Video output
            inf_time_message = "Inference time: {:.3f} ms".format(inf_time *
                                                                  1000)
            cv2.putText(video_cap.frame, inf_time_message,
                        (10, int(video_cap.input_height) - 30),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            fps_time = time.time() - start_time
            fps_message = "FPS: {:.3f} fps".format(1 / fps_time)
            cv2.putText(video_cap.frame, fps_message,
                        (10, int(video_cap.input_height) - 10),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # Display the video output
            video_cap.vw.write(video_cap.frame)
            if video_cap.frame_count % 10 == 0:
                progressUpdate(progress_file_path,
                               time.time() - infer_start_time,
                               video_cap.frame_count,
                               int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)))
            start_time = time.time()

            # Loop video to mimic continuous input if LOOP_VIDEO flag is True
            if LOOP_VIDEO and not video_cap.is_cam:
                vfps = int(round(video_cap.vc.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if video_cap.loop_frames > video_cap.vc.get(
                        cv2.CAP_PROP_FRAME_COUNT) - int(round(vfps / min_fps)):
                    video_cap.loop_frames = 0
                    video_cap.vc.set(cv2.CAP_PROP_POS_FRAMES, 0)

        if False not in no_more_data:
            progressUpdate(progress_file_path,
                           time.time() - infer_start_time,
                           int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)),
                           int(video_cap.vc.get(cv2.CAP_PROP_FRAME_COUNT)))
            break

    no_more_data = False
    t2 = time.time() - infer_start_time
    for videos in video_caps:
        with open(os.path.join(output_dir, 'stats.txt'), 'w') as f:
            f.write('{} \n'.format(round(t2)))
            f.write('{} \n'.format(videos.frame_count))

    infer_network.clean()
    log_file.close()
    return 0, ''
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    single_image_mode = False  # flag for the imput images
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    track_threshold = 0.1
    max_len = 30
    track = deque(maxlen=max_len)

    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###

    n, c, h, w = infer_network.load_model(args.model, args.device,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    ### TODO: Handle the input stream ###

    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    ### TODO: Loop until stream is over ###

    initial_w = cap.get(3)
    initial_h = cap.get(4)
    while cap.isOpened():
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)
        # Start async inference

        ### TODO: Read from the video capture ###
        image = cv2.resize(frame, (w, h))

        ### TODO: Pre-process the image as needed ###
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        ### TODO: Start asynchronous inference for specified request ###
        inf_start = time.time()
        infer_network.exec_net(cur_request_id, image)

        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            det_time = time.time() - inf_start
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(cur_request_id)

            ### TODO: Extract any desired stats from the results ###

            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time*1000)
            cv2.putText(frame, inf_time_message, (25, 25),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 150), 1)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            current_count = 0
            for obj in result[0][0]:
                # Draw bounding box for object when it's probability is more than
                #  the specified threshold
                if obj[2] > prob_threshold:
                    xmin = int(obj[3] * initial_w)
                    ymin = int(obj[4] * initial_h)
                    xmax = int(obj[5] * initial_w)
                    ymax = int(obj[6] * initial_h)
                    cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                  (255, 0, 150), 1)
                    current_count = current_count + 1

            track.append(current_count)
            # proportion of frames with a positive detection
            num_tracked = 0
            if np.sum(track) / max_len > track_threshold:
                num_tracked = 1

            if num_tracked > last_count:
                start_time = time.time()
                total_count = total_count + num_tracked - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if num_tracked < last_count:
                fps = cap.get(cv2.CAP_PROP_FPS)
                #duration = int(total_count/fps)
                #duration = int(fps/last_count)
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = num_tracked

        key_pressed = cv2.waitKey(60)
        if key_pressed == 27:
            cap.release()
            cv2.destroyAllWindows()
            client.disconnect()
            break

        ### TODO: Send the frame to the FFMPEG server ###
        frame = cv2.resize(frame, (768, 432))
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
Exemplo n.º 13
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Flag for the input image
    single_image_mode = False

    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    # Initialize the Inference Engine
    infer_network = Network()

    # Load the network model into the IE
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]

    # Checks for live feed
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
    global initial_w, initial_h, prob_threshold
    prob_threshold = args.prob_threshold
    initial_w = cap.get(3)
    initial_h = cap.get(4)

    while cap.isOpened():

        #Reading the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        # Pre-process the frame
        image = cv2.resize(frame, (w, h))

        # Change data layout from HWC to CHW
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        inf_start = time.time()

        # Perform inference on the frame
        infer_network.exec_net(cur_request_id, image)

        if infer_network.wait(cur_request_id) == 0:

            det_time = time.time() - inf_start

            result = infer_network.get_output(cur_request_id)
            #if args.perf_counts:
            perf_count = infer_network.performance_counter(cur_request_id)
            #performance_counts(perf_count)

            frame, current_count = ssd_out(frame, result)

            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)

            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
class GazeEstimationModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_xml = model_name
        self.device = device
        self.extensions = extensions
        # Initialise the class
        self.infer_network = Network()
        #raise NotImplementedError

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.infer_network.load_model(self.model_xml, self.device,
                                      self.extensions)
        #raise NotImplementedError

    def predict(self, left_eye_image, right_eye_image, headpose_angles):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        self.infer_network.exec_net(headpose_angles, left_eye_image,
                                    right_eye_image)

        # Wait for the result
        if self.infer_network.wait() == 0:
            # end time of inference
            end_time = time.time()
            result = (self.infer_network.get_output()
                      )[self.infer_network.output_blob]
            return result

    def check_model(self):
        raise NotImplementedError

    def preprocess_input(self,
                         frame,
                         face,
                         left_eye_point,
                         right_eye_point,
                         print_flag=True):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.

       Blob in the format [BxCxHxW] where:
        B - batch size
        C - number of channels
        H - image height
        W - image width
        with the name left_eye_image and the shape [1x3x60x60].
        Blob in the format [BxCxHxW] where:
        B - batch size
        C - number of channels
        H - image height
        W - image width
        with the name right_eye_image and the shape [1x3x60x60].
        Blob in the format [BxC] where:
        B - batch size
        C - number of channels
        with the name head_pose_angles and the shape [1x3].

        '''

        lefteye_input_shape = [1, 3, 60,
                               60]  #self.infer_network.get_input_shape()
        righteye_input_shape = [1, 3, 60, 60
                                ]  #self.infer_network.get_next_input_shape(2)

        # crop left eye
        x_center = left_eye_point[0]
        y_center = left_eye_point[1]
        width = lefteye_input_shape[3]
        height = lefteye_input_shape[2]
        # ymin:ymax, xmin:xmax
        facewidthedge = face.shape[1]
        faceheightedge = face.shape[0]

        # check for edges to not crop
        ymin = int(y_center -
                   height // 2) if int(y_center - height // 2) >= 0 else 0
        ymax = int(y_center +
                   height // 2) if int(y_center + height //
                                       2) <= faceheightedge else faceheightedge

        xmin = int(x_center -
                   width // 2) if int(x_center - width // 2) >= 0 else 0
        xmax = int(x_center +
                   width // 2) if int(x_center + width //
                                      2) <= facewidthedge else facewidthedge

        left_eye_image = face[ymin:ymax, xmin:xmax]
        # print out left eye to frame
        if (print_flag):
            frame[150:150 + left_eye_image.shape[0],
                  20:20 + left_eye_image.shape[1]] = left_eye_image
        # left eye [1x3x60x60]
        p_frame_left = cv2.resize(
            left_eye_image, (lefteye_input_shape[3], lefteye_input_shape[2]))
        p_frame_left = p_frame_left.transpose((2, 0, 1))
        p_frame_left = p_frame_left.reshape(1, *p_frame_left.shape)

        # crop right eye
        x_center = right_eye_point[0]
        y_center = right_eye_point[1]
        width = righteye_input_shape[3]
        height = righteye_input_shape[2]
        # ymin:ymax, xmin:xmax
        # check for edges to not crop
        ymin = int(y_center -
                   height // 2) if int(y_center - height // 2) >= 0 else 0
        ymax = int(y_center +
                   height // 2) if int(y_center + height //
                                       2) <= faceheightedge else faceheightedge

        xmin = int(x_center -
                   width // 2) if int(x_center - width // 2) >= 0 else 0
        xmax = int(x_center +
                   width // 2) if int(x_center + width //
                                      2) <= facewidthedge else facewidthedge

        right_eye_image = face[ymin:ymax, xmin:xmax]
        # print out left eye to frame
        if (print_flag):
            frame[150:150 + right_eye_image.shape[0],
                  100:100 + right_eye_image.shape[1]] = right_eye_image

        # right eye [1x3x60x60]
        p_frame_right = cv2.resize(
            right_eye_image,
            (righteye_input_shape[3], righteye_input_shape[2]))
        p_frame_right = p_frame_right.transpose((2, 0, 1))
        p_frame_right = p_frame_right.reshape(1, *p_frame_right.shape)

        #headpose_angles

        return frame, p_frame_left, p_frame_right
        #raise NotImplementedError

    def preprocess_output(self,
                          outputs,
                          image,
                          facebox,
                          left_eye_point,
                          right_eye_point,
                          print_flag=True,
                          threshold=0.5):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.

        The net outputs a blob with the shape: [1, 3], containing Cartesian coordinates of gaze direction vector. Please note that the output vector is not normalizes and has non-unit length.
        Output layer name in Inference Engine format:
        gaze_vector
        '''
        x = outputs[0][0]
        y = outputs[0][1]
        z = outputs[0][2]
        #Draw output
        if (print_flag):
            cv2.putText(
                image, "x:" + str('{:.1f}'.format(x * 100)) + ",y:" +
                str('{:.1f}'.format(y * 100)) + ",z:" +
                str('{:.1f}'.format(z)), (20, 100), 0, 0.6, (0, 0, 255), 1)

            #left eye
            xmin, ymin, _, _ = facebox
            x_center = left_eye_point[0]
            y_center = left_eye_point[1]
            left_eye_center_x = int(xmin + x_center)
            left_eye_center_y = int(ymin + y_center)
            #right eye
            x_center = right_eye_point[0]
            y_center = right_eye_point[1]
            right_eye_center_x = int(xmin + x_center)
            right_eye_center_y = int(ymin + y_center)

            cv2.arrowedLine(image, (left_eye_center_x, left_eye_center_y),
                            (left_eye_center_x + int(x * 100),
                             left_eye_center_y + int(-y * 100)),
                            (255, 100, 100), 5)
            cv2.arrowedLine(image, (right_eye_center_x, right_eye_center_y),
                            (right_eye_center_x + int(x * 100),
                             right_eye_center_y + int(-y * 100)),
                            (255, 100, 100), 5)

        return image, [x, y, z]
Exemplo n.º 15
0
def infer_on_stream(args, client):

    isImage = False

    # Handle the input stream
    if args.input != 'CAM':
        assert os.path.isfile(args.input)

    if args.input == 'CAM':
        args.input = 0
    elif args.input.endswith(('.jpg', '.bmp', '.png')):
        isImage = True

    last_count = 0
    total_count = 0
    durationList = []
    inferenceList = []
    f_n = 0  #False Negatives for analysis purpose...

    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    # Load the model through `infer_network`
    mdl_start = cv2.getTickCount()

    infer_network.load_model(args.model, args.device, args.cpu_extension)
    load_time = utils.timeLapse(mdl_start)

    cap = cv2.VideoCapture(args.input)
    cap.open(args.input)

    if not cap.isOpened():
        log.error("ERROR! Unable to open video source")
        exit(1)

    w, h = utils.getSrcDim(cap)  #dimensions from the captured source

    if not isImage:
        out = cv2.VideoWriter('out.mp4', utils.getCODEC(),
                              cap.get(cv2.CAP_PROP_FPS), (w, h))
    else:
        out = None

    #Loop until stream is over
    while cap.isOpened():
        # Read from the video capture
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        # Pre-process the image as needed
        p_frame = utils.preprocessed_input(infer_network, frame)

        # Start asynchronous inference for specified request
        inf_start = time.time()
        infer_network.exec_net(p_frame, request_id=0)

        # Wait for the result
        if infer_network.wait(request_id=0) == 0:
            det_time = time.time() - inf_start
            inferenceList.append(det_time * 1000)

            # Get the results of the inference request
            result = infer_network.get_output(request_id=0)

            # Extract any desired stats from the results
            frame, count, f_n = drawBBoxes(frame, result, prob_threshold, w, h,
                                           last_count, f_n)

            # Calculate and send relevant information on
            inf_time_message = "Inference time: {:.3f}ms".format(det_time *
                                                                 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            # When new person enters the video
            if count > last_count:

                frameProcessor.to_PersonOut(
                )  # re-assuring by Setting Previous person to have moved out.
                frameProcessor.newPersonEntered(
                )  #Set the state as new person entered and initialize timer.
                total_count = total_count + count - last_count
                client.publish("person", json.dumps({"total": total_count}))
                ''' 
                log.info('Entered Scene at MS {}'.format(cap.get(cv2.CAP_PROP_POS_MSEC)))
                log.info('Entered Scene at FRM {}'.format(cap.get(cv2.CAP_PROP_POS_FRAMES)))
                log.info('Entered Scene at AVI {}'.format(cap.get(cv2.CAP_PROP_POS_AVI_RATIO)))
                log.info('Frame Rate {}'.format(cap.get(cv2.CAP_PROP_FPS)))
                
                ## The logic of calculating duration with above CV2 attribs worked fine.
                ##  But realised it may not work in CAM mode.. so need to build a generic logic.
                '''

            # current_count, total_count and duration to the MQTT server
            # Person duration in the video is calculated

            # Topic "person": keys of "count" and "total"
            # Topic "person/duration": key of "duration"
            if count < last_count:
                duration = float(time.time() -
                                 frameProcessor.getPersonEntrytime())
                frameProcessor.to_PersonOut()
                durationList.append(duration)

                # Publish average duration spent by people to the MQTT server
                client.publish(
                    "person/duration",
                    json.dumps({"duration": round(np.mean(durationList))}))

            client.publish("person", json.dumps({"count": count}))
            last_count = count

            if key_pressed == 27:
                break

        # Send the frame to the FFMPEG server
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        # Write an output image if `single_image_mode`
        if isImage:
            cv2.imwrite('output/output_image.jpg', frame)
        else:
            out.write(frame)

    log.info('######################################################')
    log.info(
        '# Average Inference Time                                             ::  {:.3f} ms'
        .format(np.mean(inferenceList)))
    log.info(
        '# (IR) Model Size   (XML)                                            ::  {}'
        .format(metrics.getSize(utils.getMOFiles(args.model)['model'])))
    log.info(
        '# (IR) Model Weight (BIN)                                            ::  {}'
        .format(metrics.getSize(utils.getMOFiles(args.model)['weights'])))
    log.info(
        '# Total Model Load Time                                              ::  {:.3f} ms'
        .format(load_time))
    log.info(
        '# Set Probability Threshold                                          ::  {}'
        .format(prob_threshold))
    log.info(
        '# No. of False Negatives @ 0.75 & 0.5 times of the set threhold      ::  {}'
        .format(f_n))
    log.info(
        '# Error_percent in detecting Total ppl                               ::  {}'
        .format(metrics.getErrorPercent(total_count, "people")))
    log.info(
        '# Error_percent in average duration                                  ::  {}'
        .format(
            metrics.getErrorPercent(round(np.mean(durationList)), "duration")))
    log.info('######################################################')

    release(out, cap, client)
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(model=args.model,cpu_extension=args.cpu_extension)

    ### TODO: Handle the input stream ###

    cap = cv2.VideoCapture(args.input)
    cap.open(args.input)  
    width = int(cap.get(3))
    height = int(cap.get(4))
    
    #out = cv2.VideoWriter('out2.mp4', 0x00000021, 30, (width,height)) Used for create an Ouput video file
    counter=0
    start_flag=0
    time_start=0
    count_person=0
    total_count_person=0
    last_count=0
    
    elapsed=0
    elapsed_prom=0
    frame_out=0
    time_counter=0
    conf_prom=0
    single_image_mode=0
    count_frame_person_total=0

    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        counter+=1
        time_counter+=1

        ### TODO: Read from the video capture ###
        frame_prev_out=frame_out
        flag, frame = cap.read()

        if not flag:
            if (counter==2):
                single_image_mode=1
            break
            
        ### TODO: Pre-process the image as needed ###
        shape_input=infer_network.get_input_shape()       
        frame_proc=cv2.resize(frame,(shape_input[3],shape_input[2]))
        frame_proc=np.transpose(frame_proc,(2,0,1))
        frame_proc=np.reshape(frame_proc,(1,3,shape_input[2],shape_input[3]))

        ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(frame_proc)
        
        ### It's use for measuring the inference time
        start = timer()
        ### TODO: Wait for the result ###
        if infer_network.wait()==0:
            end = timer()
            elapsed=(end - start)
            elapsed_prom=(elapsed_prom+elapsed)
            #print(elapsed)

            ### TODO: Get the results of the inference request ###
            output_boxes=infer_network.get_output()
                
            ### TODO: Extract any desired stats from the results ###
            #This part has been adapted from: https://knowledge.udacity.com/questions/139281
            frame_out,count_person,conf=draw_boxes(frame,output_boxes,args,width,height)
            if(count_person>0):
                conf_prom+=conf
                count_frame_person_total+=count_person
            
            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

            client.publish("person", json.dumps({"count": count_person}))

            if count_person > last_count:
                time_start=counter/10
                total_count_person = total_count_person + count_person - last_count
                client.publish("person", json.dumps({"total": total_count_person}))
                
            # Person duration in the video is calculated
            if  count_person < last_count:
                duration = int(counter/10 - time_start)
                counter=couter=0
                # Publish messages to the MQTT server
                client.publish("person/duration",json.dumps({"duration": duration}))
            
            last_count = count_person                       
            #out.write(frame) Used for create an Ouput video file

            ### TODO: Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(frame)
            sys.stdout.flush()

    ### TODO: Write an output image if `single_image_mode` ###
    if(single_image_mode==1):
        cv2.imwrite("/home/workspace/resources/out.png",frame_prev_out)
    
    #print(elapsed_prom/(time_counter-1))
    #print(conf_prom/count_frame_person_total)
    #out.release()
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    model = args.model
    
    DEVICE = args.device
    CPU_EXTENSION = args.cpu_extension
    
    infer_network.load_model(model, CPU_EXTENSION, DEVICE)
    network_shape = infer_network.get_input_shape()

    ### TODO: Handle the input stream ###
    # Checks for live feed
    if args.input == 'CAM':
        input_validated = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp') :
        single_image_mode = True
        input_validated = args.input

    # Checks for video file
    else:
        input_validated = args.input
        assert os.path.isfile(args.input), "file doesn't exist"

    ### TODO: Handle the input stream ###
    cap = cv2.VideoCapture(input_validated)
    cap.open(input_validated)
    
    w = int(cap.get(3))
    h = int(cap.get(4))

    in_shape = network_shape['image_tensor']

    #iniatilize variables
    report = 0
    counter = 0
    counter_prev = 0
    duration_prev = 0
    counter_total = 0
    dur = 0
    request_id=0

    ### TODO: Loop until stream is over ###
    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        if not flag:
            break

        ### TODO: Pre-process the image as needed ###
        image = cv2.resize(frame, (in_shape[3], in_shape[2]))
        image_p = image.transpose((2, 0, 1))
        image_p = image_p.reshape(1, *image_p.shape)
  

        ### TODO: Start asynchronous inference for specified request ###
        net_input = {'image_tensor': image_p,'image_info': image_p.shape[1:]}
        duration_report = None
        infer_network.exec_net(net_input, request_id)

        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:

            ### TODO: Get the results of the inference request ###
            net_output = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###
            pointer = 0
            probs = net_output[0, 0, :, 2]
            for i, p in enumerate(probs):
                if p > prob_threshold:
                    pointer += 1
                    box = net_output[0, 0, i, 3:]
                    p1 = (int(box[0] * w), int(box[1] * h))
                    p2 = (int(box[2] * w), int(box[3] * h))
                    frame = cv2.rectangle(frame, p1, p2, (0, 255, 0), 3)
        
            if pointer != counter:
                counter_prev = counter
                counter = pointer
                if dur >= 3:
                    duration_prev = dur
                    dur = 0
                else:
                    dur = duration_prev + dur
                    duration_prev = 0  # unknown, not needed in this case
            else:
                dur += 1
                if dur >= 3:
                    report = counter
                    if dur == 3 and counter > counter_prev:
                        counter_total += counter - counter_prev
                    elif dur == 3 and counter < counter_prev:
                        duration_report = int((duration_prev / 10.0) * 1000)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            client.publish('person',
                           payload=json.dumps({
                               'count': report, 'total': counter_total}),
                           qos=0, retain=False)
            if duration_report is not None:
                client.publish('person/duration',
                               payload=json.dumps({'duration': duration_report}),
                               qos=0, retain=False)

        ### TODO: Send the frame to the FFMPEG server ###

        ### TODO: Write an output image if `single_image_mode` ###
        frame = cv2.resize(frame, (768, 432))
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

    cap.release()
    cv2.destroyAllWindows()
Exemplo n.º 18
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    modelPath = args.model
    deviceType = args.device
    cpuExt = args.cpu_extension
    probThresh = args.prob_threshold
    filePath = args.input
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = probThresh

    ### TODO: Load the model through `infer_network` ###

    if filePath.lower() == "cam":
        camera = cv2.VideoCapture(0)
    elif filePath.split(".")[-1].lower() in ['jpg', 'jpeg', 'png', 'bmp']:
        infer_network.load_model(modelPath, 1, deviceType, cpuExt)
        image_input_shape = infer_network.get_input_shape()
        #print(image_input_shape)
        img = cv2.imread(filePath, cv2.IMREAD_COLOR)
        resized_frame = cv2.resize(
            img, (image_input_shape[3], image_input_shape[2]))
        frame_preproc = np.transpose(
            np.expand_dims(resized_frame.copy(), axis=0), (0, 3, 1, 2))
        infer_network.exec_net(frame_preproc)
        if infer_network.wait() == 0:
            outputs = infer_network.get_output()
            box_frame, count, bbox = extract_box(img, outputs, prob_threshold)
            cv2.putText(box_frame, "Count:" + str(count), (20, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
            cv2.imwrite('output.jpg', box_frame)
        return
    else:
        if not os.path.isfile(filePath):
            #print(" Given input file is not present.")
            exit(1)
        camera = cv2.VideoCapture(filePath)
    ### TODO: Handle the input stream ###

    client.connect(MQTT_HOST, MQTT_PORT, MQTT_KEEPALIVE_INTERVAL)
    if (camera.isOpened() == False):
        #print("Error opening video stream or file")
        exit(1)
    cur_req_id = 0
    next_req_id = 1
    num_requests = 2
    infer_network.load_model(modelPath, num_requests, deviceType, cpuExt)
    image_input_shape = infer_network.get_input_shape()
    #print(image_input_shape)
    ret, frame = camera.read()
    ### TODO: Loop until stream is over ###
    total_count = 0
    pres_count = 0
    prev_count = 0
    start_time = 0
    no_bbox = 0
    duration = 0
    prev_bbox_x = 0

    while camera.isOpened():

        ### TODO: Read from the video capture ###
        ret, next_frame = camera.read()
        if not ret:
            break
        key = cv2.waitKey(60)
        ### TODO: Pre-process the image as needed ###
        resized_frame = cv2.resize(
            next_frame.copy(), (image_input_shape[3], image_input_shape[2]))
        frame_preproc = np.transpose(
            np.expand_dims(resized_frame.copy(), axis=0), (0, 3, 1, 2))
        ### TODO: Start asynchronous inference for specified request ###
        infer_network.exec_net(frame_preproc.copy(), req_id=next_req_id)
        ### TODO: Wait for the result ###
        if infer_network.wait(cur_req_id) == 0:
            ### TODO: Get the results of the inference request ###
            outputs = infer_network.get_output(cur_req_id)
            ### TODO: Extract any desired stats from the results ###
            frame, pres_count, bbox = extract_box(frame.copy(), outputs[0],
                                                  prob_threshold)
            box_w = frame.shape[1]
            tl, br = bbox  #top_left, bottom_right

            if pres_count > prev_count:
                start_time = time.time()
                total_count += pres_count - prev_count
                no_bbox = 0
                client.publish("person", json.dumps({"total": total_count}))
            elif pres_count < prev_count:
                if no_bbox <= 20:
                    pres_count = prev_count
                    no_bbox += 1
                elif prev_bbox_x < box_w - 200:
                    pres_count = prev_count
                    no_bbox = 0
                else:
                    duration = int(time.time() - start_time)
                    client.publish("person/duration",
                                   json.dumps({"duration": duration}))
            if not (tl == None and br == None):
                prev_bbox_x = int((tl[0] + br[0]) / 2)
            prev_count = pres_count

            client.publish("person", json.dumps({"count": pres_count}))

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()
        ### TODO: Write an output image if `single_image_mode` ###
        cur_req_id, next_req_id = next_req_id, cur_req_id
        frame = next_frame
        if key == 27:
            break
    #output_video.release()
    camera.release()
    client.disconnect()
class HeadPoseEstimationModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_xml = model_name
        self.device =  device
        self.extensions = extensions
         # Initialise the class
        self.infer_network = Network()
        #raise NotImplementedError

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.infer_network.load_model(self.model_xml, self.device, self.extensions)
        #raise NotImplementedError

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        self.infer_network.exec_net(image)

        # Wait for the result
        if self.infer_network.wait() == 0:
            # end time of inference
            end_time = time.time()
            result = (self.infer_network.get_output())#[self.infer_network.output_blob]
            return result


    def check_model(self):
        raise NotImplementedError

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        # [1x3x60x60]
        net_input_shape = self.infer_network.get_input_shape()

        p_frame = np.copy(image)
        p_frame = cv2.resize(p_frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        return p_frame
        #raise NotImplementedError

    def preprocess_output(self, outputs, image, face, facebox, print_flag=True, threshold = 0.5):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.

        Output layer names in Inference Engine format:
        name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees).
        name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees).
        name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees).

        Each output contains one float value  (yaw, pitсh, roll).
        '''

        yaw = outputs['angle_y_fc'][0][0]   # Axis of rotation: z
        pitch = outputs['angle_p_fc'][0][0] # Axis of rotation: y
        roll = outputs['angle_r_fc'][0][0] # Axis of rotation: x
        
        #Draw output
        if(print_flag):
            cv2.putText(image,"y:{:.1f}".format(yaw), (20,20), 0, 0.6, (255,255,0))
            cv2.putText(image,"p:{:.1f}".format(pitch), (20,40), 0, 0.6, (255,255,0))
            cv2.putText(image,"r:{:.1f}".format(roll), (20,60), 0, 0.6, (255,255,0))
            
            xmin, ymin,_ , _ = facebox
            face_center = (xmin + face.shape[1] / 2, ymin + face.shape[0] / 2, 0)
            self.draw_axes(image, face_center, yaw, pitch, roll)
        
        return image, [yaw, pitch, roll]
    
    # code source: https://knowledge.udacity.com/questions/171017
    def draw_axes(self, frame, center_of_face, yaw, pitch, roll):
        focal_length = 950.0
        scale = 50

        yaw *= np.pi / 180.0
        pitch *= np.pi / 180.0
        roll *= np.pi / 180.0
        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        Rx = np.array([[1, 0, 0],
                    [0, math.cos(pitch), -math.sin(pitch)],
                    [0, math.sin(pitch), math.cos(pitch)]])
        Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)],
                    [0, 1, 0],
                    [math.sin(yaw), 0, math.cos(yaw)]])
        Rz = np.array([[math.cos(roll), -math.sin(roll), 0],
                    [math.sin(roll), math.cos(roll), 0],
                    [0, 0, 1]])
        # R = np.dot(Rz, Ry, Rx)
        # ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/
        # R = np.dot(Rz, np.dot(Ry, Rx))
        R = Rz @ Ry @ Rx
        # print(R)
        camera_matrix = self.build_camera_matrix(center_of_face, focal_length)
        xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1)
        yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1)
        zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1)
        zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1)
        o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1)
        o[2] = camera_matrix[0][0]
        xaxis = np.dot(R, xaxis) + o
        yaxis = np.dot(R, yaxis) + o
        zaxis = np.dot(R, zaxis) + o
        zaxis1 = np.dot(R, zaxis1) + o
        xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2)
        xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2)
        xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx
        yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy
        p1 = (int(xp1), int(yp1))
        xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx
        yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy
        p2 = (int(xp2), int(yp2))
        cv2.line(frame, p1, p2, (255, 0, 0), 2)
        cv2.circle(frame, p2, 3, (255, 0, 0), 2)
        return frame
    # code source: https://knowledge.udacity.com/questions/171017
    def build_camera_matrix(self, center_of_face, focal_length):
        cx = int(center_of_face[0])
        cy = int(center_of_face[1])
        camera_matrix = np.zeros((3, 3), dtype='float32')
        camera_matrix[0][0] = focal_length
        camera_matrix[0][2] = cx
        camera_matrix[1][1] = focal_length
        camera_matrix[1][2] = cy
        camera_matrix[2][2] = 1
        return camera_matrix
Exemplo n.º 20
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    global i_w, i_h, prob_threshold
    current_request_num = 0
    total_count = 0
    latest_count = 0
    previous_count = 0
    duration_sum = 0
    duration_in_frame = 0.0
    frame_count = 0
    infer_frame_count = 0
    single_image_mode = False
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    client.connect(HOSTNAME,
                   port=MQTT_PORT,
                   keepalive=60,
                   bind_address=IPADDRESS)
    ### Load the model through `infer_network` ###
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          current_request_num,
                                          args.cpu_extension)

    ### Handle the input stream ###
    if args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input
    else:
        input_stream = args.input

    capture_frames = cv2.VideoCapture(input_stream)
    length_of_video = int(capture_frames.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_rate = int(capture_frames.get(cv2.CAP_PROP_FPS))
    ### Read from the video capture ###
    infer_time_start = time.time()
    if input_stream:
        capture_frames.open(args.input)
    if not capture_frames.isOpened():
        log.error("Unable to Open the Video File.")

    i_w = capture_frames.get(3)
    i_h = capture_frames.get(4)
    out = cv2.VideoWriter(os.path.join("people_counter.mp4"), 0x00000021,
                          frame_rate, (int(i_w), int(i_h)), True)
    while capture_frames.isOpened():

        isEnd, frame = capture_frames.read()
        frame_count += 1
        current_count = 0
        if not isEnd:
            break
        cv2.waitKey(10)
        ### Pre-process the image as needed ###
        inf_image = cv2.resize(frame, (w, h))
        inf_image = inf_image.transpose((2, 0, 1))
        inf_image = inf_image.reshape((n, c, h, w))

        # Starting the Asynchronous Inference:
        inf_start = time.time()
        infer_network.exec_net(current_request_num, inf_image)

        ### Waiting for the result ###
        if infer_network.wait(current_request_num) == 0:
            duration = (time.time() - inf_start)
            results = infer_network.get_output(current_request_num)
            out_frame, current_count = draw_frame_on_inference(frame, results)
            duration_message = "Inference Time Per Frame: {:.3f}ms".format(
                duration * 1000)

        if current_count > 0:
            infer_frame_count += 1
            duration_sum += float(infer_frame_count) / frame_rate

        if current_count > 0 and infer_frame_count > args.frames_ignore and previous_count > 0:
            '''
            If the Count of People Goes up and keeps like that for more than 
            '''
            previous_count = max(previous_count, current_count)

        if previous_count == 0 and infer_frame_count > args.frames_ignore:
            total_count += current_count
            #             infer_frame_count = 0
            previous_count = max(previous_count, current_count)
            client.publish("person", json.dumps({"count": current_count}))
            client.publish("person", json.dumps({"total": total_count}))

        if args.enable_alert_limit is not None and current_count >= args.enable_alert_limit:
            client.publish(
                "alert",
                json.dumps({
                    "alert_msg": "Stampede",
                    "count": current_count
                }))
            intruder_msg = "STAMPEDE ALERT, CURRENT COUNT {} IS SAME OR EXCEEDED SAFE LIMIT {}".format(
                current_count, args.enable_alert_limit)
            cv2.putText(out_frame, intruder_msg, (15, 45),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (10, 10, 210), 1)

        if previous_count != 0 and current_count == 0:
            duration_in_frame = infer_frame_count / frame_rate
            for i in range(previous_count):
                client.publish("person/duration",
                               json.dumps({"duration": duration_in_frame}))

        if current_count == 0:
            infer_frame_count = 0
            previous_count = current_count
            duration_sum = 0.0
            client.publish("person", json.dumps({"count": current_count}))

        cv2.putText(out_frame, duration_message, (15, 15),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (210, 10, 10), 1)
        people_count_msg = "People counted: in Current Frame: {} ; Total: {}".format(
            current_count, total_count)
        cv2.putText(out_frame, people_count_msg, (15, 30),
                    cv2.FONT_HERSHEY_DUPLEX, 0.5, (210, 10, 10), 1)
        #         person_duration_msg = "Duration in Frame: {:.2f} seconds".format(duration_sum%60)
        #         cv2.putText(out_frame, person_duration_msg, (15, 45), cv2.FONT_HERSHEY_DUPLEX, 0.5, (210, 10, 10), 1)

        out.write(out_frame)

        client.publish("person", json.dumps({"count": current_count}))

        ### Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(out_frame)
        sys.stdout.flush()

        ### Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imWrite('infer_out.jpg', frame)

    capture_frames.release()
    client.disconnect()
Exemplo n.º 21
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    frame_count = 0
    frame_time = 0

    duration_prev = 0
    total_count = 0
    time_thresh = 0
    person_count_in_each_frame = 0
    last_count = 0
    previous_last_count = 0

    font_scale = 0.5
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Flag for the input image
    single_image_mode = False

    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold
    model = args.model
    device = args.device

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(model, device, CPU_EXTENSION)
    infer_network_input_shape = infer_network.get_input_shape()
    #print("\n.... network input shape...  ",infer_network_input_shape,"\n")

    # Check if the input is a webcam
    if args.input == 'CAM':
        input_Type = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_Type = args.input

    # Checks for video file
    else:
        input_Type = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    ### TODO: Handle the input stream ###
    input_stream = cv2.VideoCapture(input_Type)
    if input_Type:
        input_stream.open(args.input)
    if not input_stream.isOpened():
        log.error("ERROR! Unable to open video source")

    # Grab the shape of the input
    width = int(input_stream.get(3))
    height = int(input_stream.get(4))

    if not single_image_mode:
        # The second argument should be `cv2.VideoWriter_fourcc('M','J','P','G')`
        # on Mac, and `0x00000021` on Linux
        # 100x100 to match desired resizing
        out = cv2.VideoWriter('output_video.mp4', 0x00000021, 30,
                              (width, height))
    else:
        out = None

    ### TODO: Loop until stream is over ###
    while input_stream.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = input_stream.read()
        if not flag:
            break
        frame_count += 1
        t = time.time()
        key_pressed = cv2.waitKey(60)

        ### TODO: Pre-process the image as needed ### n c h w
        preProcessed_frame = cv2.resize(
            frame,
            (infer_network_input_shape[3], infer_network_input_shape[2]))
        preProcessed_frame = preProcessed_frame.transpose((2, 0, 1))
        preProcessed_frame = preProcessed_frame.reshape(
            1, *preProcessed_frame.shape)

        ### TODO: Start asynchronous inference for specified request ###
        inferencing_start = time.time()
        total_time_spent = None
        infer_network.exec_net(preProcessed_frame)

        ### TODO: Wait for the result ###
        if infer_network.wait() == 0:

            detection_time = time.time() - inferencing_start
            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output()
            frame, current_count = draw_bounding_boxes(frame, result,
                                                       prob_threshold, width,
                                                       height)
            inference_time_message = "Inference time: {:.3f}ms".format(
                detection_time * 1000)
            cv2.putText(frame, inference_time_message, (25, 25),
                        cv2.FONT_HERSHEY_COMPLEX, font_scale, (0, 10, 250), 1)

            ### TODO: Extract any desired stats from the results ###
            if current_count == last_count:
                time_thresh += 1
                if time_thresh >= 10:
                    person_count_in_each_frame = last_count
                    if time_thresh == 10 and last_count > previous_last_count:
                        total_count += last_count - previous_last_count
                    elif time_thresh == 10 and last_count < previous_last_count:
                        total_time_spent = int(
                            (duration_prev / 10.0) * 1000)  # in ms
            else:
                previous_last_count = last_count
                last_count = current_count
                if time_thresh >= 10:
                    duration_prev = time_thresh
                    time_thresh = 0
                else:
                    time_thresh = duration_prev + time_thresh

            current_count_label = "No of Persons : {:.2f}".format(
                current_count)
            cv2.putText(frame, current_count_label, (25, 50), font, font_scale,
                        (255, 0, 0), 1)

            total_count_label = "Total Detected Person : {:.2f}".format(
                total_count)
            cv2.putText(frame, total_count_label, (25, 75), font, font_scale,
                        (255, 0, 0), 1)

            alert_flag = False
            alert_msg = None
            if current_count > 5:
                alert_msg = "ALERT!!! " + str(
                    current_count) + " persons are at same place"
                alert_flag = True
            if total_time_spent is not None and total_time_spent > 3000000:  # 5 min
                alert_msg = "ALERT!!! " + str(
                    current_count) + " person are in store from long time."
                alert_flag = True
            if alert_flag:
                # set the rectangle background to white
                rectangle_bgr = (0, 0, 255)
                # get the width and height of the text box
                (text_width,
                 text_height) = cv2.getTextSize(alert_msg,
                                                font,
                                                fontScale=font_scale,
                                                thickness=1)[0]
                # set the text start position
                text_offset_x = 0
                text_offset_y = frame.shape[0] - 15
                # make the coords of the box with a small padding of two pixels
                box_coords = ((text_offset_x, text_offset_y),
                              (text_offset_x + text_width + 5,
                               text_offset_y - text_height - 5))
                cv2.rectangle(frame, box_coords[0], box_coords[1],
                              rectangle_bgr, cv2.FILLED)
                cv2.putText(frame,
                            alert_msg, (text_offset_x, text_offset_y),
                            font,
                            0.45,
                            color=(255, 255, 255),
                            thickness=1)

            frame_time += time.time() - t
            fps = frame_count / float(frame_time)
            fps_label = "FPS : {:.2f}".format(fps)
            cv2.putText(frame, fps_label, (25, 100), font, font_scale,
                        (255, 0, 0), 1)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            client.publish(
                "person",
                json.dumps({
                    "count": current_count,
                    "total": total_count
                }))
            if total_time_spent is not None:
                client.publish("person/duration",
                               json.dumps({"duration": total_time_spent}))

        ### TODO: Send the frame to the FFMPEG server ###
        frame = cv2.resize(frame, (768, 432))
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        # Break if escape key pressed
        if key_pressed == 27:
            break

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            frame = cv2.resize(frame, (1980, 1080))
            cv2.imwrite('output_image.jpg', frame)
        else:
            out.write(frame)

    # Release the capture and destroy any OpenCV windows
    if not single_image_mode:
        out.release()
    input_stream.release()
    cv2.destroyAllWindows()

    ### TODO: Disconnect from MQTT
    client.disconnect()
Exemplo n.º 22
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    global single_image_mode
    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    plugin = Network()
    plugin.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = plugin.get_input_shape()

    ### TODO: Handle the input stream ###
    if args.input == 'CAM':
        args.input = 0

    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True

    cap = cv2.VideoCapture(args.input)
    cap.open(args.input)

    width = int(cap.get(3))
    height = int(cap.get(4))
    fps = cap.get(cv2.CAP_PROP_FPS)  # storing the fps of the video

    ### TODO: Loop until stream is over ###
    frame_no = 0
    conf_arr = []
    while cap.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = cap.read()
        frame_no = frame_no + 1
        start_infr = time.time()
        if frame_no % (args.frame_skip_rate +
                       1) == 0 or single_image_mode == True:
            # frame will be skipped based on the -s argument (frame_skip_rate) to decrease the inference time.
            if not flag:
                break
            ### TODO: Pre-process the image as needed ###
            key_pressed = cv2.waitKey(60)
            new_frame = np.copy(frame)
            p_frame = cv2.resize(frame,
                                 (net_input_shape[3], net_input_shape[2]))
            p_frame = p_frame.transpose((2, 0, 1))
            p_frame = p_frame.reshape(1, *p_frame.shape)

            ### TODO: Start asynchronous inference for specified request ###
            plugin.exec_net(p_frame)

            ### TODO: Wait for the result ###
            if plugin.wait() == 0:
                ### TODO: Get the results of the inference request ###
                result = plugin.get_output()
                ### TODO: Extract any desired stats from the results ###
                out_frame, conf = draw_boxes(new_frame, result, prob_threshold,
                                             width, height)
                conf_arr.append(conf)

                ### TODO: Calculate and send relevant information on ###
                ### current_count, total_count and duration to the MQTT server ###
                ### Topic "person": keys of "count" and "total" ###
                ### Topic "person/duration": key of "duration" ###
                postprocess(conf, frame_no, conf_arr, client, fps)
                infr_arr.append((time.time() - start_infr))

            ### TODO: Send the frame to the FFMPEG server ###
            sys.stdout.buffer.write(out_frame)
            sys.stdout.flush()
            if key_pressed == 27:
                break
            ### TODO: Write an output image if `single_image_mode` ###
            if single_image_mode:
                cv2.imwrite('output_image.jpg', out_frame)
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
Exemplo n.º 23
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    # prob_threshold = args.prob_threshold
    cur_request_id = 0
    last_count = 0
    total_count = 0
    start_time = 0
    time_on_video = 0
    time_not_on_video = 0
    image_mode = False
    positive_count = 0
    ### TODO: Load the model through `infer_network` ###
    n, c, h, w = infer_network.load_model(args.model, args.device, 1, 1,
                                          cur_request_id,
                                          args.cpu_extension)[1]
    ### TODO: Handle the input stream ###
    # Checks for image input
    if args.input.endswith('.jpg') or args.input.endswith('.png') or \
            args.input.endswith('.bmp'):
        image_mode = True
        media_stream = args.input

    # Checks for webcam input
    elif args.input == 'CAM':
        media_stream = 0

    # Check for video input
    else:
        media_stream = args.input
        assert os.path.isfile(args.input)

    ### TODO: Loop until stream is over ###
    capture = cv2.VideoCapture(media_stream)

    if media_stream:
        capture.open(args.input)

    if not capture.isOpened():
        log.error("Not able to open the video file!")

        ### TODO: Read from the video capture ###
    # global width, height, prob_threshold
    prob_threshold = args.prob_threshold
    width = capture.get(3)
    height = capture.get(4)

    while capture.isOpened():
        check, frame = capture.read()
        if not check:
            break

        ### TODO: Pre-process the image as needed ###
        image = cv2.resize(frame, (w, h))
        image = image.transpose(2, 0, 1)
        image = image.reshape(n, c, h, w)

        ### TODO: Start asynchronous inference for specified request ###
        inference_start = time.time()
        infer_network.exec_net(cur_request_id, image)

        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            inference_time = time.time() - inference_start

            ### TODO: Get the results of the inference request ###
            result = infer_network.get_output(cur_request_id)

            # if perf_counts:
            # perf_count = infer_network.exec_net(cur_request_id)
            # performance_counts(perf_count)

            ### TODO: Extract any desired stats from the results ###
            current_count = 0
            track_frames = {}
            track_person = {positive_count: 0}
            frame_count = 0

            for character in result[0][0]:
                if character[2] > prob_threshold:
                    frame_count += 1
                    track_frames[frame_count] = character[2]
                    start_time_not_on_video = time.time()
                    positive_count += 1
                    track_person[positive_count] = time_on_video
                    xmin = int(character[3] * width)
                    ymin = int(character[4] * height)
                    xmax = int(character[5] * width)
                    ymax = int(character[6] * height)
                    frame = cv2.rectangle(frame, (xmin, ymin), (xmax, ymax),
                                          (0, 55, 255), 1)

                    time_on_video = start_time_not_on_video - start_time
                    if time_on_video > 3:
                        if current_count > 1:
                            current_count = last_count
                        else:
                            current_count += 1
                    else:
                        current_count = last_count

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if current_count > last_count:
                start_time = time.time()
                time_not_on_video = time.time() - start_time_not_on_video
                if current_count == 1 and last_count == 0:
                    if time_on_video > 2:
                        total_count = total_count + current_count - last_count

            client.publish("person", json.dumps({"total": total_count}))
            if current_count < last_count:
                if current_count == 0:
                    start_time_not_on_video = time.time()
                time_on_video = int(time.time() - start_time)
                if last_count == 0 and time_not_on_video < 0.005:
                    time_on_video = track_person[positive_count] + time_on_video
                client.publish("person/duration",
                               json.dumps({"duration": time_on_video}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            cv2.putText(
                frame, "Inference time =  {:.2f} ms".format(
                    (inference_time * 1000)), (15, 15),
                cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)
            cv2.putText(frame,
                        "Persons in video frame = {:}".format(last_count),
                        (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(frame, "Total count = {:}".format(total_count),
                        (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(frame,
                        "Time on video = {:.2f} s".format(time_on_video),
                        (15, 60), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10),
                        1)
            cv2.putText(
                frame, "Time not on video = {:.3f} s".format(
                    time_not_on_video * 1000), (15, 75),
                cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            key = cv2.waitKey(15)
            if key == ord('q'):
                break

        ### TODO: Send the frame to the FFMPEG server ###
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if image_mode:
            cv2.imwrite('output.jpg', frame)

        # cv2.imshow('frame', frame)

    capture.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
Exemplo n.º 24
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # my) init parameters
    current_count = 0
    total_count = 0
    duration = 0
    last_count = 0
    start_time = 0
    isFirst = True
    single_image_mode = False

    # Initialise the class (ok)
    infer_network = Network()

    # Set Probability threshold for detections (ok)
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ### (ok)
    infer_network.load_model(args.model,
                             device="CPU",
                             cpu_extension=args.cpu_extension)
    n, c, h, w = infer_network.get_input_shape()

    ### TODO: Handle the input stream ### (ok)
    if args.input == 'CAM':
        input_stream = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input
    else:
        input_stream = args.input
        #assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    cap.open(input_stream)

    ### TODO: Loop until stream is over ###(ok)
    while cap.isOpened():
        ### TODO: Read from the video capture ###(ok)
        ret, frame = cap.read()
        key_pressed = cv2.waitKey(60)
        if not ret:
            break

        ### TODO: Pre-process the image as needed ###(ok)
        image = cv2.resize(frame, (w, h))
        image = image.transpose((2, 0, 1))
        image = image.reshape((n, c, h, w))

        ### TODO: Start asynchronous inference for specified request ###(ok)
        infer_network.exec_net(image)

        ### TODO: Wait for the result ###(ok)
        if infer_network.wait() == 0:
            ### TODO: Get the results of the inference request ###(ok)
            result = infer_network.get_output()

            ### TODO: Extract any desired stats from the results ###(ok)
            boxes, score = post_detection(result, frame.shape, prob_threshold)

            for box in boxes:
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 238, 255),
                              1)

            ### TODO: Calculate and send relevant information on ###(ok)
            ### current_count, total_count and duration to the MQTT server ###
            if len(boxes) != current_count:
                if isFirst:
                    ts1 = time.time()
                    isFirst = False
                if time.time() - ts1 > 0.5:
                    current_count = len(boxes)
                    isFirst = True

            ### Topic "person": keys of "count" and "total" ###(ok)
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            ### Topic "person/duration": key of "duration" ###(ok)
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))
            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count
            if key_pressed == ord('q'):
                break

        ### TODO: Send the frame to the FFMPEG server ###(ok)
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###(ok)
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)

    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
    infer_network.clean()
Exemplo n.º 25
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # declaring variables to count the people and duration
    total_number = 0
    last_number = 0
    missed_number = 0
    start = 0
    duration = 0
    frame_number = 0

    # Initialise the class
    infer_network = Network()

    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()

    # This applies only for faster rcnn since it outputs two things for
    # input shape: image: [1, 3] and image tensor: [1, 3, 600, 600]
    # We need image tensor
    # input_shape = net_input_shape['image_tensor']

    ### TODO: Handle the input stream ###
    single_image_mode = False

    if args.input == 'CAM':
        args.input = 0
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
    else:
        # Check the input value
        assert os.path.isfile(args.input), "Input file doesn't exist..."

    captured = cv2.VideoCapture(args.input)
    captured.open(args.input)

    # Grab the shape of the input
    width = int(captured.get(3))
    height = int(captured.get(4))

    # Processing the video
    # Create a video writer for the output video
    # if not single_image_mode:
    #     # out = cv2.VideoWriter('out.mp4', 0x00000021, 30, (width, height))    # for linux
    #     out = cv2.VideoWriter('out_frcnn.mp4', cv2.VideoWriter_fourcc(
    #         'M', 'J', 'P', 'G'), 30, (width, height))   # for Mac
    # else:
    #     out = None

    ### TODO: Loop until stream is over ###
    while captured.isOpened():
        ### TODO: Read from the video capture ###
        flag, frame = captured.read()
        frame_number += 1
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(
            frame, (net_input_shape[3], net_input_shape[2]))  # for SSD model
        # p_frame = cv2.resize(
        #     frame, (input_shape[3], input_shape[2]))  # for faster rcnn
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        # Input to the network (only required for faster rcnn)
        # network_input_data = {'image_tensor': p_frame,
        #                       'image_info': p_frame.shape[1:]}

        # request id for making inferences
        request_id = 0

        ### TODO: Start asynchronous inference for specified request ###
        # Start asynchronous inference for specified request.
        infer_start = time.time()
        infer_network.exec_net(request_id, p_frame)  # for SSD
        # infer_network.exec_net(
        #     request_id, network_input_data)    # for faster rcnn

        ### TODO: Wait for the result ###
        if infer_network.wait(request_id) == 0:
            ### TODO: Get the results of the inference request ###
            det_time = time.time() - infer_start

            result = infer_network.get_output()
            ### TODO: Extract any desired stats from the results ###

            # Draw bounding box
            frame, current_number = draw_bbox(prob_threshold, result, frame,
                                              width, height)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###

            # When a person enters the frame
            if current_number > last_number:
                start = time.time()
                total_number += current_number - last_number
                client.publish("person", json.dumps({"total": total_number}))

            # when a person leaves the frame
            if current_number == 0 and last_number != 0:
                missed_number += 1

                # wait for few frames to make sure the person has actually left the frame
                # this number should be bigger for SSD because of high false negatives
                # missing frame threshold
                if missed_number >= 30:  # use 30 for SSD and 5 for faster rcnn
                    duration = int(time.time() - start)
                    client.publish("person/duration",
                                   json.dumps({"duration": duration}))
                    # resetting the dropped frames
                    missed_number = 0
                    # updating the last number
                    last_number = current_number
            else:
                # publishing the results
                client.publish("person", json.dumps({"count": current_number}))
                # updating the last number
                last_number = current_number

            # Write out the frame
            # out.write(frame)
            # Break if escape key pressed
            if key_pressed == 27:
                break
        ### TODO: Send the frame to the FFMPEG server ###
        # Resize the frame according to the video
        frame = cv2.resize(frame, (768, 432))
        sys.stdout.buffer.write(frame)
        sys.stdout.flush()

        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite("output_image.jpg", p_frame)

    # Release the capture and destroy any OpenCV windows
    captured.release()
    cv2.destroyAllWindows()
    # TODO: Disconnect from MQTT
    client.disconnect()
Exemplo n.º 26
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """

    # Input arguments
    modelArgs = args.model
    deviceArgs = args.device
    cpuExtensionArgs = args.cpu_extension
    propThresholdArgs = args.prob_threshold
    filePathArgs = args.input

    # Initialise the class
    infer_network = Network()

    #Load the model through `infer_network`
    infer_network.load_model(modelArgs, deviceArgs, cpuExtensionArgs)
    net_input_shape = infer_network.get_input_shape()

    # Set Probability threshold for detections
    prob_threshold = propThresholdArgs

    # Handle image, video or webcam
    # Create a flag for single images
    # Flag for the input image
    single_image_mode = False
    # Check if the input is a webcam
    if filePathArgs == 'CAM':
        filePathArgs = 0
    elif filePathArgs.endswith('.jpg') or filePathArgs.endswith('.bmp'):
        single_image_mode = True

    # Handle the input stream
    # Get and open video capture
    capture = cv2.VideoCapture(filePathArgs)
    capture.open(filePathArgs)

    # Grab the shape of the input
    width = int(capture.get(3))
    height = int(capture.get(4))

    # initlise some variable
    report = 0
    counter = 0
    counter_prev = 0
    duration_prev = 0
    counter_total = 0
    dur = 0
    request_id = 0

    # Process frames until the video ends, or process is exited
    while capture.isOpened():
        # Read the next frame
        flag, frame = capture.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        # Pre-process the frame
        #Re-size the frame to inputshape_width x inputshape_height
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        #Start asynchronous inference for specified request
        #Perform inference on the frame
        duration_report = None
        inf_start = time.time()
        infer_network.exec_net(p_frame)
        # Get the output of inference
        if infer_network.wait() == 0:
            det_time = time.time() - inf_start
            # Results of the output layer of the network
            output_results = infer_network.get_output()
            #Extract any desired stats from the results
            #Update the frame to include detected bounding boxes
            frame_with_box, pointer = draw_boxes(frame, output_results,
                                                 prob_threshold, width, height)
            #Display inference time
            inf_time_message = "Manasse_Ngudia | Inference time: {:.3f}ms"\
                               .format(det_time * 1000)
            cv2.putText(frame_with_box, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.45, (200, 10, 10), 1)

            #Calculate and send relevant information on
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if pointer != counter:
                counter_prev = counter
                counter = pointer
                if dur >= 3:
                    duration_prev = dur
                    dur = 0
                else:
                    dur = duration_prev + dur
                    duration_prev = 0  # unknown, not needed in this case
            else:
                dur += 1
                if dur >= 3:
                    report = counter
                    if dur == 3 and counter > counter_prev:
                        counter_total += counter - counter_prev
                    elif dur == 3 and counter < counter_prev:
                        duration_report = int((duration_prev / 10.0) * 1000)

            client.publish('person',
                           payload=json.dumps({
                               'count': report,
                               'total': counter_total
                           }),
                           qos=0,
                           retain=False)
            if duration_report is not None:
                client.publish('person/duration',
                               payload=json.dumps(
                                   {'duration': duration_report}),
                               qos=0,
                               retain=False)

            #Send frame to the ffmpeg server
            #  Resize the frame
            #frame = cv2.resize(frame, (768, 432))
            sys.stdout.buffer.write(frame_with_box)
            sys.stdout.flush()

            if single_image_mode:
                cv2.imwrite('output_image.jpg', frame_with_box)

        # Break if escapturee key pressed
        if key_pressed == 27:
            break

    # Release the out writer, captureture, and destroy any OpenCV windows
    capture.release()
    cv2.destroyAllWindows()
    client.disconnect()
Exemplo n.º 27
0
def infer_on_stream(args, client):
    """
    Initialize the inference network, stream video to network,
    and output stats and video.

    :param args: Command line arguments parsed by `build_argparser()`
    :param client: MQTT client
    :return: None
    """
    # Initialise the class
    infer_network = Network()
    # Set Probability threshold for detections
    prob_threshold = args.prob_threshold

    ### TODO: Load the model through `infer_network` ###
    infer_network.load_model(args.model, args.device, args.cpu_extension)
    net_input_shape = infer_network.get_input_shape()

    ### TODO: Handle the input stream ###
    if args.input == 'CAM':
        input_stream = 0

    # Checks for input image
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        single_image_mode = True
        input_stream = args.input

    # Checks for video file
    else:
        input_stream = args.input
        assert os.path.isfile(args.i), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)

    if input_stream:
        cap.open(args.i)
    width = int(cap.get(3))
    hight = int(cap.get(4))

    ### TODO: Loop until stream is over ###
    while cap.isOpened():
        # Read the next frame
        flag, frame = cap.read()
        if not flag:
            break
        key_pressed = cv2.waitKey(60)

        ### TODO: Read from the video capture ###

        ### TODO: Pre-process the image as needed ###
        p_frame = cv2.resize(frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2, 0, 1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        ### TODO: Start asynchronous inference for specified request ###
        single_image_mode = False

        cur_request_id = 0
        last_count = 0
        total_count = 0
        start_time = 0
        time_start = time.time()
        infer_network.exec_net(cur_request_id, p_frame)

        ### TODO: Wait for the result ###
        if infer_network.wait(cur_request_id) == 0:
            total_time = time.time() - time_start
            result = infer_network.get_output(cur_request_id)
            if args.perf_counts:
                perf_count = infer_network.performance_counter(cur_request_id)
                performance_counts(perf_count)

            ### TODO: Get the results of the inference request ###

            ### TODO: Extract any desired stats from the results ###
            frame, current_count = ssd_out(p_frame, width, height)
            inf_time_message = "Inference time: {:.3f}ms"\
                               .format(total_time * 1000)
            cv2.putText(frame, inf_time_message, (15, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1)

            ### TODO: Calculate and send relevant information on ###
            ### current_count, total_count and duration to the MQTT server ###
            ### Topic "person": keys of "count" and "total" ###
            ### Topic "person/duration": key of "duration" ###
            if current_count > last_count:
                start_time = time.time()
                total_count = total_count + current_count - last_count
                client.publish("person", json.dumps({"total": total_count}))

            # Person duration in the video is calculated
            if current_count < last_count:
                duration = int(time.time() - start_time)
                # Publish messages to the MQTT server
                client.publish("person/duration",
                               json.dumps({"duration": duration}))

            client.publish("person", json.dumps({"count": current_count}))
            last_count = current_count

            if key_pressed == 27:
                break

        ### TODO: Send the frame to the FFMPEG server ###
        print(frame.shape)
        ### TODO: Write an output image if `single_image_mode` ###
        if single_image_mode:
            cv2.imwrite('output_image.jpg', frame)
Exemplo n.º 28
0
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    get_args()
    global is_async_mode
    nextReq = 1
    currReq = 0
    nextReq_s = 1
    currReq_s = 0
    prevVideo = None
    vid_finished = [False] * len(videos)
    min_FPS = min(
        [videos[i][1].video.get(cv2.CAP_PROP_FPS) for i in range(len(videos))])

    # Initialise the class
    infer_network = Network()
    infer_network_safety = Network()
    # Load the network to IE plugin to get shape of input layer
    plugin, (batch_size, channels, model_height, model_width) = \
        infer_network.load_model(conf_modelLayers, targetDevice, 1, 1, 2, cpu_extension)
    if use_safety_model:
        batch_size_sm, channels_sm, model_height_sm, model_width_sm = \
            infer_network_safety.load_model(conf_safety_modelLayers, targetDevice, 1, 1, 2, cpu_extension, plugin)[1]

    while True:
        for index, currVideo in videos:
            # Read image from video/cam
            vfps = int(round(currVideo.video.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / min_FPS))):
                ret, current_img = currVideo.video.read()
                if not ret:
                    vid_finished[index] = True
                    break
            if vid_finished[index]:
                stream_end_frame = np.zeros(
                    (int(currVideo.height), int(currVideo.width), 1),
                    dtype='uint8')
                cv2.putText(
                    stream_end_frame, "Input file {} has ended".format(
                        name_of_videos[index][1].split('/')[-1]),
                    (10, int(currVideo.height / 2)), cv2.FONT_HERSHEY_COMPLEX,
                    1, (255, 255, 255), 2)
                cv2.imshow(currVideo.name, stream_end_frame)
                continue
            # Transform image to person detection model input
            rsImg = cv2.resize(current_img, (model_width, model_height))
            rsImg = rsImg.transpose((2, 0, 1))
            rsImg = rsImg.reshape(
                (batch_size, channels, model_height, model_width))

            infer_start_time = datetime.datetime.now()
            # Infer current image
            if is_async_mode:
                infer_network.exec_net(nextReq, rsImg)
            else:
                infer_network.exec_net(currReq, rsImg)
                prevVideo = currVideo
                previous_img = current_img

            # Wait for previous request to end
            if infer_network.wait(currReq) == 0:
                infer_end_time = (datetime.datetime.now() -
                                  infer_start_time) * 1000

                in_frame_workers = []

                people = 0
                violations = 0
                hard_hat_detection = False
                vest_detection = False
                result = infer_network.get_output(currReq)
                # Filter output
                for obj in result[0][0]:
                    if obj[2] > conf_inferConfidenceThreshold:
                        xmin = int(obj[3] * prevVideo.width)
                        ymin = int(obj[4] * prevVideo.height)
                        xmax = int(obj[5] * prevVideo.width)
                        ymax = int(obj[6] * prevVideo.height)
                        xmin = int(xmin -
                                   padding) if (xmin - padding) > 0 else 0
                        ymin = int(ymin -
                                   padding) if (ymin - padding) > 0 else 0
                        xmax = int(xmax + padding) if (
                            xmax +
                            padding) < prevVideo.width else prevVideo.width
                        ymax = int(ymax + padding) if (
                            ymax +
                            padding) < prevVideo.height else prevVideo.height
                        cv2.rectangle(previous_img, (xmin, ymin), (xmax, ymax),
                                      (0, 255, 0), 2)
                        people += 1
                        in_frame_workers.append((xmin, ymin, xmax, ymax))
                        new_frame = previous_img[ymin:ymax, xmin:xmax]
                        if use_safety_model:

                            # Transform image to safety model input
                            in_frame_sm = cv2.resize(
                                new_frame, (model_width_sm, model_height_sm))
                            in_frame_sm = in_frame_sm.transpose((2, 0, 1))
                            in_frame_sm = in_frame_sm.reshape(
                                (batch_size_sm, channels_sm, model_height_sm,
                                 model_width_sm))

                            infer_start_time_sm = datetime.datetime.now()
                            if is_async_mode:
                                infer_network_safety.exec_net(
                                    nextReq_s, in_frame_sm)
                            else:
                                infer_network_safety.exec_net(
                                    currReq_s, in_frame_sm)
                            # Wait for the result
                            infer_network_safety.wait(currReq_s)
                            infer_end_time_sm = (datetime.datetime.now() -
                                                 infer_start_time_sm) * 1000

                            result_sm = infer_network_safety.get_output(
                                currReq_s)
                            # Filter output
                            hard_hat_detection = False
                            vest_detection = False
                            detection_list = []
                            for obj_sm in result_sm[0][0]:

                                if (obj_sm[2] > 0.4):
                                    # Detect safety vest
                                    if (int(obj_sm[1])) == 2:
                                        xmin_sm = int(obj_sm[3] *
                                                      (xmax - xmin))
                                        ymin_sm = int(obj_sm[4] *
                                                      (ymax - ymin))
                                        xmax_sm = int(obj_sm[5] *
                                                      (xmax - xmin))
                                        ymax_sm = int(obj_sm[6] *
                                                      (ymax - ymin))
                                        if vest_detection == False:
                                            detection_list.append([
                                                xmin_sm + xmin, ymin_sm + ymin,
                                                xmax_sm + xmin, ymax_sm + ymin
                                            ])
                                            vest_detection = True

                                    # Detect hard-hat
                                    if int(obj_sm[1]) == 4:
                                        xmin_sm_v = int(obj_sm[3] *
                                                        (xmax - xmin))
                                        ymin_sm_v = int(obj_sm[4] *
                                                        (ymax - ymin))
                                        xmax_sm_v = int(obj_sm[5] *
                                                        (xmax - xmin))
                                        ymax_sm_v = int(obj_sm[6] *
                                                        (ymax - ymin))
                                        if hard_hat_detection == False:
                                            detection_list.append([
                                                xmin_sm_v + xmin,
                                                ymin_sm_v + ymin,
                                                xmax_sm_v + xmin,
                                                ymax_sm_v + ymin
                                            ])
                                            hard_hat_detection = True

                            if hard_hat_detection is False or vest_detection is False:
                                violations += 1
                            for _rect in detection_list:
                                cv2.rectangle(current_img,
                                              (_rect[0], _rect[1]),
                                              (_rect[2], _rect[3]),
                                              (0, 255, 0), 2)
                            if is_async_mode:
                                currReq_s, nextReq_s = nextReq_s, currReq_s

                    # Use OpenCV if worker-safety-model is not provided
                        else:
                            violations = detect_workers(
                                in_frame_workers, previous_img)

                # Check if detected violations equals previous frames
                if violations == prevVideo.currentViolationCount:
                    prevVideo.currentViolationCountConfidence += 1

                    # If frame threshold is reached, change validated count
                    if prevVideo.currentViolationCountConfidence == conf_inFrameViolationsThreshold:

                        # If another violation occurred, save image
                        if prevVideo.currentViolationCount > prevVideo.prevViolationCount:
                            prevVideo.totalViolations += (
                                prevVideo.currentViolationCount -
                                prevVideo.prevViolationCount)
                        prevVideo.prevViolationCount = prevVideo.currentViolationCount
                else:
                    prevVideo.currentViolationCountConfidence = 0
                    prevVideo.currentViolationCount = violations

                # Check if detected people count equals previous frames
                if people == prevVideo.currentPeopleCount:
                    prevVideo.currentPeopleCountConfidence += 1

                    # If frame threshold is reached, change validated count
                    if prevVideo.currentPeopleCountConfidence == conf_inFrameViolationsThreshold:
                        prevVideo.currentTotalPeopleCount += (
                            prevVideo.currentPeopleCount -
                            prevVideo.prevPeopleCount)
                        if prevVideo.currentTotalPeopleCount > prevVideo.prevPeopleCount:
                            prevVideo.totalPeopleCount += prevVideo.currentTotalPeopleCount - prevVideo.prevPeopleCount
                        prevVideo.prevPeopleCount = prevVideo.currentPeopleCount
                else:
                    prevVideo.currentPeopleCountConfidence = 0
                    prevVideo.currentPeopleCount = people

                frame_end_time = datetime.datetime.now()
                cv2.putText(
                    previous_img,
                    'Total people count: ' + str(prevVideo.totalPeopleCount),
                    (10, prevVideo.height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img, 'Current people count: ' +
                    str(prevVideo.currentTotalPeopleCount),
                    (10, prevVideo.height - 40), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img,
                    'Total violation count: ' + str(prevVideo.totalViolations),
                    (10, prevVideo.height - 70), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(
                    previous_img, 'FPS: %0.2fs' %
                    (1 / (frame_end_time -
                          prevVideo.frame_start_time).total_seconds()),
                    (10, prevVideo.height - 100), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 2)
                cv2.putText(previous_img, "Inference time: N\A for async mode" if is_async_mode else\
    "Inference time: {:.3f} ms".format((infer_end_time).total_seconds()),
                            (10, prevVideo.height - 130),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

                cv2.imshow(prevVideo.name, previous_img)
                prevVideo.frame_start_time = datetime.datetime.now()
            # Swap
            if is_async_mode:
                currReq, nextReq = nextReq, currReq
                previous_img = current_img
                prevVideo = currVideo
            if cv2.waitKey(1) == 27:
                print("Attempting to stop input files")
                infer_network.clean()
                infer_network_safety.clean()
                cv2.destroyAllWindows()
                return

        if False not in vid_finished:
            infer_network.clean()
            infer_network_safety.clean()
            cv2.destroyAllWindows()
            break
def main():
    # Plugin initialization for specified device and load extensions library
    global rolling_log
    global TARGET_DEVICE
    global videoCapsJson

    env_parser()
    check_args()
    parse_conf_file()

    if TARGET_DEVICE not in acceptedDevices:
        print("Unsupporterd device " + TARGET_DEVICE + ". Defaulting to CPU")
        TARGET_DEVICE = 'CPU'

    # Initialize the class
    infer_network = Network()
    # Load the network to IE Plugin
    n, c, h, w = infer_network.load_model(model_xml, TARGET_DEVICE, 1, 1, 2,
                                          CPU_EXTENSION)[1]
    minFPS = min([i.cap.get(cv2.CAP_PROP_FPS) for i in videoCaps])
    waitTime = int(
        round(1000 / minFPS /
              len(videoCaps)))  # wait time in ms between showing frames
    for vc in videoCaps:
        vc.init_vw(h, w, minFPS)

    statsWidth = w if w > 345 else 345
    statsHeight = h if h > (len(videoCaps) * 20 + 15) else (
        len(videoCaps) * 20 + 15)
    statsVideo = cv2.VideoWriter(os.path.join('resources',
                                              'Statistics.mp4'), 0x00000021,
                                 minFPS, (statsWidth, statsHeight), True)
    if not statsVideo.isOpened():
        print("Couldn't open stats video for writing")
        sys.exit(4)

    # Read the labels file
    if labels_file:
        with open(labels_file, 'r') as f:
            labels_map = [x.strip() for x in f]
    else:
        labels_map = None

    # Init a rolling log to store events
    rolling_log_size = int((h - 15) / 20)
    rolling_log = collections.deque(maxlen=rolling_log_size)

    # Init inference request IDs
    cur_request_id = 0
    next_request_id = 1
    # Start with async mode enabled
    is_async_mode = True

    if not UI_OUTPUT:
        # Arrange windows so they are not overlapping
        arrange_windows(w, h)
        print("To stop the execution press Esc button")

    for idx, vc in enumerate(videoCaps):
        vc.start_time = datetime.datetime.now()
        vc.pos = idx

    if UI_OUTPUT:
        videoCapsJson = videoCaps.copy()

    while True:

        # If all video captures are closed stop the loop
        no_more_data = [videoCap.closed for videoCap in videoCaps]
        # loop over all video captures
        for idx, videoCapInfer in enumerate(videoCaps):

            # read the next frame
            vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS)))
            for i in range(0, int(round(vfps / minFPS))):
                ret, frame = videoCapInfer.cap.read()
                videoCapInfer.cur_frame_count += 1
                # If the read failed close the program
                if not ret:
                    no_more_data[idx] = True
                    break

            if no_more_data[idx]:
                if UI_OUTPUT:
                    videoCaps.pop(idx)
                    continue
                else:
                    stream_end_frame = np.zeros((h, w, 1), dtype='uint8')
                    cv2.putText(
                        stream_end_frame, "Input file {} has ended".format(
                            videoCapInfer.cap_name), (20, 150),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
                    cv2.imshow(videoCapInfer.cap_name, stream_end_frame)
                    cv2.waitKey(waitTime)
                    videoCaps.pop(idx)
                    continue
            # Copy the current frame for later use
            videoCapInfer.cur_frame = frame.copy()
            videoCapInfer.initial_w = videoCapInfer.cap.get(3)
            videoCapInfer.initial_h = videoCapInfer.cap.get(4)
            # Resize and change the data layout so it is compatible
            in_frame = cv2.resize(videoCapInfer.cur_frame, (w, h))
            in_frame = in_frame.transpose(
                (2, 0, 1))  # Change data layout from HWC to CHW
            in_frame = in_frame.reshape((n, c, h, w))

            infer_start = datetime.datetime.now()
            if is_async_mode:
                # Async enabled and only one video capture
                infer_network.exec_net(next_request_id, in_frame)
                if (len(videoCaps) == 1):
                    videoCapResult = videoCapInfer
                # Async enabled and more than one video capture
                else:
                    # Get previous index
                    videoCapResult = videoCaps[idx - 1 if idx -
                                               1 >= 0 else len(videoCaps) - 1]
            else:
                # Async disabled
                infer_network.exec_net(next_request_id, in_frame)
                videoCapResult = videoCapInfer

            if infer_network.wait(cur_request_id) == 0:
                infer_end = datetime.datetime.now()
                res = infer_network.get_output(cur_request_id)
                infer_duration = infer_end - infer_start
                current_count = 0
                # Parse detection results of the current request
                for obj in res[0][0]:
                    class_id = int(obj[1])
                    # Draw only objects when probability more than specified threshold
                    if (obj[2] > PROB_THRESHOLD
                            and videoCapResult.req_label in labels_map
                            and labels_map.index(
                                videoCapResult.req_label) == class_id - 1):
                        current_count += 1
                        xmin = int(obj[3] * videoCapResult.initial_w)
                        ymin = int(obj[4] * videoCapResult.initial_h)
                        xmax = int(obj[5] * videoCapResult.initial_w)
                        ymax = int(obj[6] * videoCapResult.initial_h)
                        # Draw box
                        cv2.rectangle(videoCapResult.cur_frame, (xmin, ymin),
                                      (xmax, ymax), (0, 255, 0), 4, 16)

                if videoCapResult.candidate_count is current_count:
                    videoCapResult.candidate_confidence += 1
                else:
                    videoCapResult.candidate_confidence = 0
                    videoCapResult.candidate_count = current_count

                if videoCapResult.candidate_confidence is FRAME_THRESHOLD:
                    videoCapResult.candidate_confidence = 0
                    if current_count > videoCapResult.last_correct_count:
                        videoCapResult.total_count += current_count - videoCapResult.last_correct_count

                    if current_count is not videoCapResult.last_correct_count:
                        if UI_OUTPUT:
                            currtime = datetime.datetime.now().strftime(
                                "%H:%M:%S")
                            fr = FrameInfo(videoCapResult.frames,
                                           current_count, currtime)
                            videoCapResult.countAtFrame.append(fr)

                        new_objects = current_count - videoCapResult.last_correct_count
                        for _ in range(new_objects):
                            strng = "{} - {} detected on {}". \
                                format(time.strftime("%H:%M:%S"),
                                       videoCapResult.req_label,
                                       videoCapResult.cap_name)
                            rolling_log.append(strng)

                    videoCapResult.frames += 1
                    videoCapResult.last_correct_count = current_count
                else:
                    videoCapResult.frames += 1

                videoCapResult.cur_frame = cv2.resize(videoCapResult.cur_frame,
                                                      (w, h))

                if UI_OUTPUT:
                    imgName = videoCapResult.cap_name
                    imgName = imgName.split()[0] + "_" + chr(
                        ord(imgName.split()[1]) + 1)
                    imgName += "_" + str(videoCapResult.frames)
                    frameNames.append(imgName)
                    imgName = CONF_VIDEODIR + imgName + ".jpg"
                    cv2.imwrite(imgName, videoCapResult.cur_frame)
                    videoCapsJson[
                        videoCapResult.
                        pos].countAtFrame = videoCapResult.countAtFrame
                    a = saveJSON()
                    if a:
                        return a
                if not UI_OUTPUT:
                    # Add log text to each frame
                    log_message = "Async mode is on." if is_async_mode else \
                        "Async mode is off."
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    log_message = "Total {} count: {}" \
                        .format(videoCapResult.req_label,
                                videoCapResult.total_count)
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (10, h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    log_message = "Current {} count: {}" \
                        .format(videoCapResult.req_label,
                                videoCapResult.last_correct_count)
                    cv2.putText(videoCapResult.cur_frame, log_message,
                                (10, h - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                (255, 255, 255), 1)
                    cv2.putText(
                        videoCapResult.cur_frame, 'Infer wait: %0.3fs' %
                        (infer_duration.total_seconds()), (10, h - 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

                    # Display inferred frame and stats
                    stats = numpy.zeros((statsHeight, statsWidth, 1),
                                        dtype='uint8')
                    for i, log in enumerate(rolling_log):
                        cv2.putText(stats, log, (10, i * 20 + 15),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                    (255, 255, 255), 1)
                    cv2.imshow(STATS_WINDOW_NAME, stats)
                    if idx == 0:
                        stats = cv2.cvtColor(stats, cv2.COLOR_GRAY2BGR)
                        statsVideo.write(stats)
                    end_time = datetime.datetime.now()
                    cv2.putText(
                        videoCapResult.cur_frame, 'FPS: %0.2fs' %
                        (1 / (end_time -
                              videoCapResult.start_time).total_seconds()),
                        (10, h - 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                        (255, 255, 255), 1)
                    cv2.imshow(videoCapResult.cap_name,
                               videoCapResult.cur_frame)
                    videoCapResult.start_time = datetime.datetime.now()
                    videoCapResult.video.write(videoCapResult.cur_frame)

            # Wait if necessary for the required time
            key = cv2.waitKey(waitTime)

            # Esc key pressed
            if key == 27:
                cv2.destroyAllWindows()
                infer_network.clean()
                print("Finished")
                return
            # Tab key pressed
            if key == 9:
                is_async_mode = not is_async_mode
                print("Switched to {} mode".format(
                    "async" if is_async_mode else "sync"))

            if is_async_mode:
                # Swap infer request IDs
                cur_request_id, next_request_id = next_request_id, cur_request_id

            # Loop video if LOOP_VIDEO = True and input isn't live from USB camera
            if LOOP_VIDEO and not videoCapInfer.is_cam:
                vfps = int(round(videoCapInfer.cap.get(cv2.CAP_PROP_FPS)))
                # If a video capture has ended restart it
                if (videoCapInfer.cur_frame_count >
                        videoCapInfer.cap.get(cv2.CAP_PROP_FRAME_COUNT) -
                        int(round(vfps / minFPS))):
                    videoCapInfer.cur_frame_count = 0
                    videoCapInfer.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)

        if False not in no_more_data:
            break

    infer_network.clean()
    cv2.destroyAllWindows()
Exemplo n.º 30
0
class FaceDetectionModel:
    '''
    Class for the Face Detection Model.
    '''
    def __init__(self, model_name, device='CPU', extensions=None):
        '''
        TODO: Use this to set your instance variables.
        '''
        self.model_xml = model_name
        self.device =  device
        self.extensions = extensions
         # Initialise the class
        self.infer_network = Network()
        #raise NotImplementedError

    def load_model(self):
        '''
        TODO: You will need to complete this method.
        This method is for loading the model to the device specified by the user.
        If your model requires any Plugins, this is where you can load them.
        '''
        self.infer_network.load_model(self.model_xml, self.device, self.extensions)
        #raise NotImplementedError

    def predict(self, image):
        '''
        TODO: You will need to complete this method.
        This method is meant for running predictions on the input image.
        '''
        self.infer_network.exec_net(image)

        # Wait for the result
        if self.infer_network.wait() == 0:
            # end time of inference
            end_time = time.time()
            result = (self.infer_network.get_output())[self.infer_network.output_blob]
            return result


    def check_model(self):
        raise NotImplementedError

    def preprocess_input(self, image):
        '''
        Before feeding the data into the model for inference,
        you might have to preprocess it. This function is where you can do that.
        '''
        # [1x3x384x672]
        net_input_shape = self.infer_network.get_input_shape()

        p_frame = np.copy(image)
        p_frame = cv2.resize(p_frame, (net_input_shape[3], net_input_shape[2]))
        p_frame = p_frame.transpose((2,0,1))
        p_frame = p_frame.reshape(1, *p_frame.shape)

        return p_frame
        #raise NotImplementedError

    def preprocess_output(self, outputs, image, print_flag=True, threshold = 0.5):
        '''
        Before feeding the output of this model to the next model,
        you might have to preprocess the output. This function is where you can do that.
        '''
        # [1, 1, N, 7]
        # [image_id, label, conf, x_min, y_min, x_max, y_max]
        height = image.shape[0]
        width = image.shape[1]
        faceboxes = []
        # Drawing the box/boxes 
        for i in range(len(outputs[0][0])):
            box = outputs[0][0][i] # i-th box
            confidence = box[2]
            if confidence>threshold:
                xmin = int(box[3] * width)
                ymin = int(box[4] * height)
                xmax = int(box[5] * width)
                ymax = int(box[6] * height)

                # Drawing the box in the image
                if(print_flag):
                    cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255,0,0), 1)
                faceboxes.append([xmin, ymin,xmax, ymax])
        return image, faceboxes