def dnn_detector(frame): gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame_height = frame.shape[0] frame_width = frame.shape[1] blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False) net.setInput(blob) detections = net.forward() bboxes = [] idx = 0 offset = 15 x_pos, y_pos = 10, 40 for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > conf_threshold: idx += 1 x1 = int(detections[0, 0, i, 3] * frame_width) y1 = int(detections[0, 0, i, 4] * frame_height) x2 = int(detections[0, 0, i, 5] * frame_width) y2 = int(detections[0, 0, i, 6] * frame_height) bboxes.append([x1, y1, x2, y2]) face = [x1, y1, x2 - x1, y2 - y1] if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) model_in = get_model_compatible_input(gray_frame, utils.bb_to_rect(face)) predicted_proba = model.predict(model_in) predicted_label = np.argmax(predicted_proba[0]) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) text = f"Person {idx}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x1 + 5, y1, font_scale=0.4) text = f"Person {idx} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2))
def haar_detector(frame): gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_frame = np.zeros(gray_frame.shape, dtype="uint8") offset = 15 x_pos, y_pos = 10, 40 faces = cascade_detector.detectMultiScale(gray_frame, 1.32, 5) for idx, face in enumerate(faces): if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face), desiredLeftEye) face_frame = cv2.resize(img_arr, (48, 48), interpolation=cv2.INTER_CUBIC) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) x, y, w, h = face cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) text = f"Person {idx+1}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4) text = f"Person {idx+1} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame, face_frame
def dlib_detector(frame_orig): frame = frame_orig.copy() gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) offset = 15 x_pos, y_pos = 10, 40 faces = hog_detector(gray_frame) for idx, face in enumerate(faces): if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, face, desiredLeftEye) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) x, y, w, h = rect_to_bb(face) cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) text = f"Person {idx+1}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4) text = f"Person {idx+1} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame
def dlib_detector(frame): gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) offset = 15 x_pos, y_pos = 10, 40 faces = hog_detector(gray_frame) for idx, face in enumerate(faces): if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) model_in = get_model_compatible_input(gray_frame, face) predicted_proba = model.predict(model_in) predicted_label = np.argmax(predicted_proba[0]) x, y, w, h = rect_to_bb(face) cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2) text = f"Person {idx+1}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x + 5, y, font_scale=0.4) text = f"Person {idx+1} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2))
if iswebcam: frame = cv2.flip(frame, 1, 0) try: tik = time.time() if args["detector"] == "dlib": out = dlib_detector(frame) else: out = dnn_detector(frame) tt += time.time() - tik fps = frame_count / tt label = f"Detector: {args['detector']} ; Model: {args['model']}; HistEq: {args['histogram_equalization']} ; FPS: {round(fps, 2)}" utils.draw_text_with_backgroud(frame, label, 10, 20, font_scale=0.35) except Exception as e: print(e) pass cv2.imshow("Face Detection Comparison", frame) if cv2.waitKey(10) == ord('q'): break cv2.destroyAllWindows() vidcap.release()
def dnn_detector(frame): frame_height = frame.shape[0] frame_width = frame.shape[1] blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123], False, False) net.setInput(blob) detections = net.forward() bboxes = [] idx = 0 offset = 15 x_pos, y_pos = 10, 40 gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_frame = np.zeros(gray_frame.shape, dtype="uint8") for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > conf_threshold: idx += 1 x1 = int(detections[0, 0, i, 3] * frame_width) y1 = int(detections[0, 0, i, 4] * frame_height) x2 = int(detections[0, 0, i, 5] * frame_width) y2 = int(detections[0, 0, i, 6] * frame_height) bboxes.append([x1, y1, x2, y2]) face = [x1, y1, x2 - x1, y2 - y1] if hist_eq: gray_frame = cv2.equalizeHist(gray_frame) img_arr = utils.align_face(gray_frame, utils.bb_to_rect(face), desiredLeftEye) face_frame = cv2.resize(img_arr, (48, 48), interpolation=cv2.INTER_CUBIC) img_arr = utils.preprocess_img(img_arr, resize=False) predicted_proba = model.predict(img_arr) predicted_label = np.argmax(predicted_proba[0]) cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2) text = f"Person {idx}: {label2text[predicted_label]}" utils.draw_text_with_backgroud(frame, text, x1 + 5, y1, font_scale=0.4) text = f"Person {idx} : " y_pos = y_pos + 2 * offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) for k, v in label2text.items(): text = f"{v}: {round(predicted_proba[0][k]*100, 3)}%" y_pos = y_pos + offset utils.draw_text_with_backgroud(frame, text, x_pos, y_pos, font_scale=0.3, box_coords_2=(2, -2)) return frame, face_frame
img = cv2.resize(thresh, (120, 120), fx=0.5, fy=0.5) img = np.expand_dims(img, axis=2) img = np.expand_dims(img, axis=0) img = img / 255. predicted_proba = model.predict(img) predicted_label = np.argmax(predicted_proba[0]) bb_text = label2text[predicted_label] except Exception as e: bb_text = "no hand" if high_resolution: utils.draw_text_with_backgroud(frame, bb_text, x=start_coords[0], y=start_coords[1], font_scale=1.2) tt += time.time() - tik fps = round(frame_count / tt, 2) main_text = "Running..." + f" fps: {fps}" utils.draw_text_with_backgroud(frame, main_text, x=15, y=25, font_scale=1., thickness=2) utils.draw_text_with_backgroud(frame, "Instructions for better results:", x=15,
if args["mode"] == "debug": combined = np.hstack((bb, thresh)) cv2.imshow("bb v/s thresh", combined) img = cv2.resize(thresh, (120, 120)) img = np.expand_dims(img, axis=2) img = np.expand_dims(img, axis=0) img = img / 255. predicted_proba = model.predict(img) predicted_label = np.argmax(predicted_proba[0]) bb_text = label2text[predicted_label] except Exception as e: bb_text = "no hand" utils.draw_text_with_backgroud(frame, bb_text, x=start_coords[0], y=start_coords[1], font_scale=0.4) tt += time.time() - tik fps = round(frame_count / tt, 2) main_text = "Go On..." + f" fps: {fps}" utils.draw_text_with_backgroud(frame, main_text, x=15, y=25, font_scale=0.35, thickness=1) utils.draw_text_with_backgroud(frame, "Instructions for better results :-", x=15, y=55, font_scale=0.32, thickness=1) utils.draw_text_with_backgroud(frame, "- Place your hand completely inside the window", x=15, y=75, font_scale=0.32, thickness=1) utils.draw_text_with_backgroud(frame, "- Place your hand close to window", x=15, y=95, font_scale=0.32, thickness=1) else: main_text = "Within 5 seconds ensure that the background behind the window doesn't change" utils.draw_text_with_backgroud(frame, main_text, x=15, y=25, font_scale=0.35, thickness=1) cv2.imshow("out", frame) if cv2.waitKey(10) == ord("q"): break
break frame_count += 1 if iswebcam: frame = cv2.flip(frame, 1, 0) try: tik = time.time() out_dlib = dlib_detector(frame) tt_dlib += time.time() - tik fps_dlib = frame_count / tt_dlib label = f"Detector: dlib ; HistEq: {args['histogram_equalization']} ; FPS: {round(fps_dlib, 2)}" utils.draw_text_with_backgroud(out_dlib, label, 10, 20, font_scale=0.35) tik = time.time() out_dnn = dnn_detector(frame) tt_dnn += time.time() - tik fps_dnn = frame_count / tt_dnn label = f"Detector: dnn_tf ; HistEq: {args['histogram_equalization']} ; FPS: {round(fps_dnn, 2)}" utils.draw_text_with_backgroud(out_dnn, label, 10, 20, font_scale=0.35) frame = np.hstack([out_dlib, out_dnn])