def dnn_detector(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    frame_height = frame.shape[0]
    frame_width = frame.shape[1]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123],
                                 False, False)

    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    idx = 0
    offset = 15
    x_pos, y_pos = 10, 40

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            idx += 1
            x1 = int(detections[0, 0, i, 3] * frame_width)
            y1 = int(detections[0, 0, i, 4] * frame_height)
            x2 = int(detections[0, 0, i, 5] * frame_width)
            y2 = int(detections[0, 0, i, 6] * frame_height)
            bboxes.append([x1, y1, x2, y2])

            face = [x1, y1, x2 - x1, y2 - y1]

            if hist_eq:
                gray_frame = cv2.equalizeHist(gray_frame)

            model_in = get_model_compatible_input(gray_frame,
                                                  utils.bb_to_rect(face))
            predicted_proba = model.predict(model_in)
            predicted_label = np.argmax(predicted_proba[0])

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"Person {idx}: {label2text[predicted_label]}"
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x1 + 5,
                                           y1,
                                           font_scale=0.4)

            text = f"Person {idx} :  "
            y_pos = y_pos + 2 * offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
            for k, v in label2text.items():
                text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
                y_pos = y_pos + offset
                utils.draw_text_with_backgroud(frame,
                                               text,
                                               x_pos,
                                               y_pos,
                                               font_scale=0.3,
                                               box_coords_2=(2, -2))
Пример #2
0
def haar_detector(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    offset = 15
    x_pos, y_pos = 10, 40

    faces = cascade_detector.detectMultiScale(gray_frame, 1.32, 5)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                   desiredLeftEye)
        face_frame = cv2.resize(img_arr, (48, 48),
                                interpolation=cv2.INTER_CUBIC)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
    return frame, face_frame
Пример #3
0
def dlib_detector(frame_orig):
    frame = frame_orig.copy()
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    offset = 15
    x_pos, y_pos = 10, 40

    faces = hog_detector(gray_frame)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        img_arr = utils.align_face(gray_frame, face, desiredLeftEye)
        img_arr = utils.preprocess_img(img_arr, resize=False)

        predicted_proba = model.predict(img_arr)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = rect_to_bb(face)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))

    return frame
def dlib_detector(frame):
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    offset = 15
    x_pos, y_pos = 10, 40

    faces = hog_detector(gray_frame)
    for idx, face in enumerate(faces):
        if hist_eq:
            gray_frame = cv2.equalizeHist(gray_frame)

        model_in = get_model_compatible_input(gray_frame, face)
        predicted_proba = model.predict(model_in)
        predicted_label = np.argmax(predicted_proba[0])

        x, y, w, h = rect_to_bb(face)
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
        text = f"Person {idx+1}: {label2text[predicted_label]}"
        utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4)

        text = f"Person {idx+1} :  "
        y_pos = y_pos + 2 * offset
        utils.draw_text_with_backgroud(frame,
                                       text,
                                       x_pos,
                                       y_pos,
                                       font_scale=0.3,
                                       box_coords_2=(2, -2))
        for k, v in label2text.items():
            text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
            y_pos = y_pos + offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
        if iswebcam:
            frame = cv2.flip(frame, 1, 0)

        try:
            tik = time.time()

            if args["detector"] == "dlib":
                out = dlib_detector(frame)
            else:
                out = dnn_detector(frame)

            tt += time.time() - tik
            fps = frame_count / tt
            label = f"Detector: {args['detector']} ; Model: {args['model']}; HistEq: {args['histogram_equalization']} ; FPS: {round(fps, 2)}"
            utils.draw_text_with_backgroud(frame,
                                           label,
                                           10,
                                           20,
                                           font_scale=0.35)

        except Exception as e:
            print(e)
            pass

        cv2.imshow("Face Detection Comparison", frame)
        if cv2.waitKey(10) == ord('q'):
            break

    cv2.destroyAllWindows()
    vidcap.release()
Пример #6
0
def dnn_detector(frame):
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123],
                                 False, False)

    net.setInput(blob)
    detections = net.forward()
    bboxes = []
    idx = 0
    offset = 15
    x_pos, y_pos = 10, 40

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face_frame = np.zeros(gray_frame.shape, dtype="uint8")

    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            idx += 1
            x1 = int(detections[0, 0, i, 3] * frame_width)
            y1 = int(detections[0, 0, i, 4] * frame_height)
            x2 = int(detections[0, 0, i, 5] * frame_width)
            y2 = int(detections[0, 0, i, 6] * frame_height)
            bboxes.append([x1, y1, x2, y2])

            face = [x1, y1, x2 - x1, y2 - y1]

            if hist_eq:
                gray_frame = cv2.equalizeHist(gray_frame)

            img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face),
                                       desiredLeftEye)
            face_frame = cv2.resize(img_arr, (48, 48),
                                    interpolation=cv2.INTER_CUBIC)
            img_arr = utils.preprocess_img(img_arr, resize=False)

            predicted_proba = model.predict(img_arr)
            predicted_label = np.argmax(predicted_proba[0])

            cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
            text = f"Person {idx}: {label2text[predicted_label]}"
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x1 + 5,
                                           y1,
                                           font_scale=0.4)

            text = f"Person {idx} :  "
            y_pos = y_pos + 2 * offset
            utils.draw_text_with_backgroud(frame,
                                           text,
                                           x_pos,
                                           y_pos,
                                           font_scale=0.3,
                                           box_coords_2=(2, -2))
            for k, v in label2text.items():
                text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%"
                y_pos = y_pos + offset
                utils.draw_text_with_backgroud(frame,
                                               text,
                                               x_pos,
                                               y_pos,
                                               font_scale=0.3,
                                               box_coords_2=(2, -2))
    return frame, face_frame
Пример #7
0
            img = cv2.resize(thresh, (120, 120), fx=0.5, fy=0.5)
            img = np.expand_dims(img, axis=2)
            img = np.expand_dims(img, axis=0)
            img = img / 255.

            predicted_proba = model.predict(img)
            predicted_label = np.argmax(predicted_proba[0])
            bb_text = label2text[predicted_label]
        except Exception as e:
            bb_text = "no hand"

        if high_resolution:
            utils.draw_text_with_backgroud(frame,
                                           bb_text,
                                           x=start_coords[0],
                                           y=start_coords[1],
                                           font_scale=1.2)
            tt += time.time() - tik
            fps = round(frame_count / tt, 2)
            main_text = "Running..." + f"   fps: {fps}"

            utils.draw_text_with_backgroud(frame,
                                           main_text,
                                           x=15,
                                           y=25,
                                           font_scale=1.,
                                           thickness=2)
            utils.draw_text_with_backgroud(frame,
                                           "Instructions for better results:",
                                           x=15,
Пример #8
0
            if args["mode"] == "debug":
                combined = np.hstack((bb, thresh))
                cv2.imshow("bb v/s thresh", combined)

            img = cv2.resize(thresh, (120, 120))
            img = np.expand_dims(img, axis=2)
            img = np.expand_dims(img, axis=0)
            img = img / 255.

            predicted_proba = model.predict(img)
            predicted_label = np.argmax(predicted_proba[0])
            bb_text = label2text[predicted_label]
        except Exception as e:
            bb_text = "no hand"
        
        utils.draw_text_with_backgroud(frame, bb_text, x=start_coords[0], y=start_coords[1], font_scale=0.4)
        tt += time.time() - tik
        fps = round(frame_count / tt, 2)
        main_text = "Go On..." + f"   fps: {fps}"
        utils.draw_text_with_backgroud(frame, main_text, x=15, y=25, font_scale=0.35, thickness=1)
        utils.draw_text_with_backgroud(frame, "Instructions for better results :-", x=15, y=55, font_scale=0.32, thickness=1)
        utils.draw_text_with_backgroud(frame, "- Place your hand completely inside the window", x=15, y=75, font_scale=0.32, thickness=1)
        utils.draw_text_with_backgroud(frame, "- Place your hand close to window", x=15, y=95, font_scale=0.32, thickness=1)
    else:
        main_text = "Within 5 seconds ensure that the background behind the window doesn't change"
        utils.draw_text_with_backgroud(frame, main_text, x=15, y=25, font_scale=0.35, thickness=1)

    cv2.imshow("out", frame)
    if cv2.waitKey(10) == ord("q"):
        break
Пример #9
0
            break

        frame_count += 1

        if iswebcam:
            frame = cv2.flip(frame, 1, 0)

        try:
            tik = time.time()
            out_dlib = dlib_detector(frame)
            tt_dlib += time.time() - tik
            fps_dlib = frame_count / tt_dlib
            label = f"Detector: dlib ; HistEq: {args['histogram_equalization']} ; FPS: {round(fps_dlib, 2)}"
            utils.draw_text_with_backgroud(out_dlib,
                                           label,
                                           10,
                                           20,
                                           font_scale=0.35)

            tik = time.time()
            out_dnn = dnn_detector(frame)
            tt_dnn += time.time() - tik
            fps_dnn = frame_count / tt_dnn
            label = f"Detector: dnn_tf ; HistEq: {args['histogram_equalization']} ; FPS: {round(fps_dnn, 2)}"
            utils.draw_text_with_backgroud(out_dnn,
                                           label,
                                           10,
                                           20,
                                           font_scale=0.35)

            frame = np.hstack([out_dlib, out_dnn])