def detect_face(img, detector_backend='opencv', grayscale=False, enforce_detection=True, align=True): img_region = [0, 0, img.shape[0], img.shape[1]] #detector stored in a global variable in FaceDetector object. #this call should be completed very fast because it will return found in memory #it will not build face detector model in each call (consider for loops) face_detector = FaceDetector.build_model(detector_backend) detected_face, img_region = FaceDetector.detect_face( face_detector, detector_backend, img, align) if (isinstance(detected_face, np.ndarray)): return detected_face, img_region else: if detected_face == None: if enforce_detection != True: return img, img_region else: raise ValueError( "Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False." )
def detect_face(img, detector_backend='opencv', grayscale=False, enforce_detection=True, align=True): img_region = [0, 0, img.shape[0], img.shape[1]] #if functions.preproces_face is called directly, then face_detector global variable might not been initialized. if not "face_detector" in globals(): initialize_detector(detector_backend=detector_backend) detected_face, img_region = FaceDetector.detect_face( face_detector, detector_backend, img, align) if (isinstance(detected_face, np.ndarray)): return detected_face, img_region else: if detected_face == None: if enforce_detection != True: return img, img_region else: raise ValueError( "Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False." )
def detect_face(face_detector, img, align=True): resp = [] detected_face = None img_region = [0, 0, img.shape[0], img.shape[1]] img_rgb = cv2.cvtColor( img, cv2.COLOR_BGR2RGB) #mtcnn expects RGB but OpenCV read BGR detections = face_detector.detect_faces(img_rgb) if len(detections) > 0: for detection in detections: x, y, w, h = detection["box"] detected_face = img[int(y):int(y + h), int(x):int(x + w)] img_region = [x, y, w, h] if align: keypoints = detection["keypoints"] left_eye = keypoints["left_eye"] right_eye = keypoints["right_eye"] detected_face = FaceDetector.alignment_procedure( detected_face, left_eye, right_eye) resp.append((detected_face, img_region)) return resp
def align_face(eye_detector, img): detected_face_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #eye detector expects gray scale image #eyes = eye_detector.detectMultiScale(detected_face_gray, 1.3, 5) eyes = eye_detector.detectMultiScale(detected_face_gray, 1.1, 10) #---------------------------------------------------------------- #opencv eye detectin module is not strong. it might find more than 2 eyes! #besides, it returns eyes with different order in each call (issue 435) #this is an important issue because opencv is the default detector and ssd also uses this #find the largest 2 eye. Thanks to @thelostpeace eyes = sorted(eyes, key = lambda v: abs((v[0] - v[2]) * (v[1] - v[3])), reverse=True) #---------------------------------------------------------------- if len(eyes) >= 2: #decide left and right eye eye_1 = eyes[0]; eye_2 = eyes[1] if eye_1[0] < eye_2[0]: left_eye = eye_1; right_eye = eye_2 else: left_eye = eye_2; right_eye = eye_1 #----------------------- #find center of eyes left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2))) right_eye = (int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2))) img = FaceDetector.alignment_procedure(img, left_eye, right_eye) return img #return img anyway
def detect_face(img, detector_backend='opencv', grayscale=False, enforce_detection=True, align=True): img_region = [0, 0, img.shape[0], img.shape[1]] #---------------------------------------------- #people would like to skip detection and alignment if they already have pre-processed images if detector_backend == 'skip': return img, img_region #---------------------------------------------- #detector stored in a global variable in FaceDetector object. #this call should be completed very fast because it will return found in memory #it will not build face detector model in each call (consider for loops) face_detector = FaceDetector.build_model(detector_backend) try: detected_face, img_region = FaceDetector.detect_face( face_detector, detector_backend, img, align) except: #if detected face shape is (0, 0) and alignment cannot be performed, this block will be run detected_face = None if (isinstance(detected_face, np.ndarray)): return detected_face, img_region else: if detected_face == None: if enforce_detection != True: return img, img_region else: raise ValueError( "Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False." )
def align_face(img): eye_detector = build_model(model_name='haarcascade_eye') detected_face_gray = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY) #eye detector expects gray scale image eyes = eye_detector.detectMultiScale(detected_face_gray) if len(eyes) >= 2: #find the largest 2 eye base_eyes = eyes[:, 2] items = [] for i in range(0, len(base_eyes)): item = (base_eyes[i], i) items.append(item) df = pd.DataFrame(items, columns=["length", "idx"]).sort_values(by=['length'], ascending=False) eyes = eyes[ df.idx.values[0:2]] #eyes variable stores the largest 2 eye #----------------------- #decide left and right eye eye_1 = eyes[0] eye_2 = eyes[1] if eye_1[0] < eye_2[0]: left_eye = eye_1 right_eye = eye_2 else: left_eye = eye_2 right_eye = eye_1 #----------------------- #find center of eyes left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2))) right_eye = (int(right_eye[0] + (right_eye[2] / 2)), int(right_eye[1] + (right_eye[3] / 2))) img = FaceDetector.alignment_procedure(img, left_eye, right_eye) return img #return img anyway
def detect_face(face_detector, img, align=True): import mediapipe as mp #this is not a must dependency. do not import it in the global level. resp = [] img_width = img.shape[1] img_height = img.shape[0] results = face_detector.process(img) if results.detections: for detection in results.detections: confidence = detection.score bounding_box = detection.location_data.relative_bounding_box landmarks = detection.location_data.relative_keypoints x = int(bounding_box.xmin * img_width) w = int(bounding_box.width * img_width) y = int(bounding_box.ymin * img_height) h = int(bounding_box.height * img_height) right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height)) left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height)) #nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height)) #mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height)) #right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height)) #left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height)) if x > 0 and y > 0: detected_face = img[y:y + h, x:x + w] img_region = [x, y, w, h] if align: detected_face = FaceDetector.alignment_procedure( detected_face, left_eye, right_eye) resp.append((detected_face, img_region)) return resp
def initialize_detector(detector_backend): global face_detector face_detector = FaceDetector.build_model(detector_backend)
def analysis(db_path, model_name='VGG-Face', detector_backend='opencv', distance_metric='cosine', enable_face_analysis=True, source=0, time_threshold=5, frame_threshold=5): #------------------------ face_detector = FaceDetector.build_model(detector_backend) print("Detector backend is ", detector_backend) #------------------------ input_shape = (224, 224) input_shape_x = input_shape[0] input_shape_y = input_shape[1] text_color = (255, 255, 255) employees = [] #check passed db folder exists if os.path.isdir(db_path) == True: for r, d, f in os.walk(db_path): # r=root, d=directories, f = files for file in f: if ('.jpg' in file): #exact_path = os.path.join(r, file) exact_path = r + "/" + file #print(exact_path) employees.append(exact_path) if len(employees) == 0: print("WARNING: There is no image in this path ( ", db_path, ") . Face recognition will not be performed.") #------------------------ if len(employees) > 0: model = DeepFace.build_model(model_name) print(model_name, " is built") #------------------------ input_shape = functions.find_input_shape(model) input_shape_x = input_shape[0] input_shape_y = input_shape[1] #tuned thresholds for model and metric pair threshold = dst.findThreshold(model_name, distance_metric) #------------------------ #facial attribute analysis models if enable_face_analysis == True: tic = time.time() emotion_model = DeepFace.build_model('Emotion') print("Emotion model loaded") age_model = DeepFace.build_model('Age') print("Age model loaded") gender_model = DeepFace.build_model('Gender') print("Gender model loaded") toc = time.time() print("Facial attibute analysis models loaded in ", toc - tic, " seconds") #------------------------ #find embeddings for employee list tic = time.time() #----------------------- pbar = tqdm(range(0, len(employees)), desc='Finding embeddings') #TODO: why don't you store those embeddings in a pickle file similar to find function? embeddings = [] #for employee in employees: for index in pbar: employee = employees[index] pbar.set_description("Finding embedding for %s" % (employee.split("/")[-1])) embedding = [] #preprocess_face returns single face. this is expected for source images in db. img = functions.preprocess_face(img=employee, target_size=(input_shape_y, input_shape_x), enforce_detection=False, detector_backend=detector_backend) img_representation = model.predict(img)[0, :] embedding.append(employee) embedding.append(img_representation) embeddings.append(embedding) df = pd.DataFrame(embeddings, columns=['employee', 'embedding']) df['distance_metric'] = distance_metric toc = time.time() print("Embeddings found for given data set in ", toc - tic, " seconds") #----------------------- pivot_img_size = 112 #face recognition result image #----------------------- freeze = False face_detected = False face_included_frames = 0 #freeze screen if face detected sequantially 5 frames freezed_frame = 0 tic = time.time() cap = cv2.VideoCapture(source) #webcam while (True): ret, img = cap.read() if img is None: break #cv2.namedWindow('img', cv2.WINDOW_FREERATIO) #cv2.setWindowProperty('img', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) raw_img = img.copy() resolution = img.shape resolution_x = img.shape[1] resolution_y = img.shape[0] if freeze == False: try: #faces store list of detected_face and region pair faces = FaceDetector.detect_faces(face_detector, detector_backend, img, align=False) except: #to avoid exception if no face detected faces = [] if len(faces) == 0: face_included_frames = 0 else: faces = [] detected_faces = [] face_index = 0 for face, (x, y, w, h) in faces: if w > 130: #discard small detected faces face_detected = True if face_index == 0: face_included_frames = face_included_frames + 1 #increase frame for a single face cv2.rectangle(img, (x, y), (x + w, y + h), (67, 67, 67), 1) #draw rectangle to main image cv2.putText(img, str(frame_threshold - face_included_frames), (int(x + w / 4), int(y + h / 1.5)), cv2.FONT_HERSHEY_SIMPLEX, 4, (255, 255, 255), 2) detected_face = img[int(y):int(y + h), int(x):int(x + w)] #crop detected face #------------------------------------- detected_faces.append((x, y, w, h)) face_index = face_index + 1 #------------------------------------- if face_detected == True and face_included_frames == frame_threshold and freeze == False: freeze = True #base_img = img.copy() base_img = raw_img.copy() detected_faces_final = detected_faces.copy() tic = time.time() if freeze == True: toc = time.time() if (toc - tic) < time_threshold: if freezed_frame == 0: freeze_img = base_img.copy() #freeze_img = np.zeros(resolution, np.uint8) #here, np.uint8 handles showing white area issue for detected_face in detected_faces_final: x = detected_face[0] y = detected_face[1] w = detected_face[2] h = detected_face[3] cv2.rectangle(freeze_img, (x, y), (x + w, y + h), (67, 67, 67), 1) #draw rectangle to main image #------------------------------- #apply deep learning for custom_face custom_face = base_img[y:y + h, x:x + w] #------------------------------- #facial attribute analysis if enable_face_analysis == True: gray_img = functions.preprocess_face( img=custom_face, target_size=(48, 48), grayscale=True, enforce_detection=False, detector_backend='opencv') emotion_labels = [ 'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral' ] emotion_predictions = emotion_model.predict( gray_img)[0, :] sum_of_predictions = emotion_predictions.sum() mood_items = [] for i in range(0, len(emotion_labels)): mood_item = [] emotion_label = emotion_labels[i] emotion_prediction = 100 * emotion_predictions[ i] / sum_of_predictions mood_item.append(emotion_label) mood_item.append(emotion_prediction) mood_items.append(mood_item) emotion_df = pd.DataFrame( mood_items, columns=["emotion", "score"]) emotion_df = emotion_df.sort_values( by=["score"], ascending=False).reset_index(drop=True) #background of mood box #transparency overlay = freeze_img.copy() opacity = 0.4 if x + w + pivot_img_size < resolution_x: #right cv2.rectangle( freeze_img #, (x+w,y+20) , (x + w, y), (x + w + pivot_img_size, y + h), (64, 64, 64), cv2.FILLED) cv2.addWeighted(overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) elif x - pivot_img_size > 0: #left cv2.rectangle( freeze_img #, (x-pivot_img_size,y+20) , (x - pivot_img_size, y), (x, y + h), (64, 64, 64), cv2.FILLED) cv2.addWeighted(overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) for index, instance in emotion_df.iterrows(): emotion_label = "%s " % (instance['emotion']) emotion_score = instance['score'] / 100 bar_x = 35 #this is the size if an emotion is 100% bar_x = int(bar_x * emotion_score) if x + w + pivot_img_size < resolution_x: text_location_y = y + 20 + (index + 1) * 20 text_location_x = x + w if text_location_y < y + h: cv2.putText( freeze_img, emotion_label, (text_location_x, text_location_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.rectangle( freeze_img, (x + w + 70, y + 13 + (index + 1) * 20), (x + w + 70 + bar_x, y + 13 + (index + 1) * 20 + 5), (255, 255, 255), cv2.FILLED) elif x - pivot_img_size > 0: text_location_y = y + 20 + (index + 1) * 20 text_location_x = x - pivot_img_size if text_location_y <= y + h: cv2.putText( freeze_img, emotion_label, (text_location_x, text_location_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1) cv2.rectangle( freeze_img, (x - pivot_img_size + 70, y + 13 + (index + 1) * 20), (x - pivot_img_size + 70 + bar_x, y + 13 + (index + 1) * 20 + 5), (255, 255, 255), cv2.FILLED) #------------------------------- face_224 = functions.preprocess_face( img=custom_face, target_size=(224, 224), grayscale=False, enforce_detection=False, detector_backend='opencv') age_predictions = age_model.predict(face_224)[0, :] apparent_age = Age.findApparentAge(age_predictions) #------------------------------- gender_prediction = gender_model.predict(face_224)[ 0, :] if np.argmax(gender_prediction) == 0: gender = "W" elif np.argmax(gender_prediction) == 1: gender = "M" #print(str(int(apparent_age))," years old ", dominant_emotion, " ", gender) analysis_report = str( int(apparent_age)) + " " + gender #------------------------------- info_box_color = (46, 200, 255) #top if y - pivot_img_size + int( pivot_img_size / 5) > 0: triangle_coordinates = np.array([ (x + int(w / 2), y), (x + int(w / 2) - int(w / 10), y - int(pivot_img_size / 3)), (x + int(w / 2) + int(w / 10), y - int(pivot_img_size / 3)) ]) cv2.drawContours(freeze_img, [triangle_coordinates], 0, info_box_color, -1) cv2.rectangle( freeze_img, (x + int(w / 5), y - pivot_img_size + int(pivot_img_size / 5)), (x + w - int(w / 5), y - int(pivot_img_size / 3)), info_box_color, cv2.FILLED) cv2.putText(freeze_img, analysis_report, (x + int(w / 3.5), y - int(pivot_img_size / 2.1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 111, 255), 2) #bottom elif y + h + pivot_img_size - int( pivot_img_size / 5) < resolution_y: triangle_coordinates = np.array([ (x + int(w / 2), y + h), (x + int(w / 2) - int(w / 10), y + h + int(pivot_img_size / 3)), (x + int(w / 2) + int(w / 10), y + h + int(pivot_img_size / 3)) ]) cv2.drawContours(freeze_img, [triangle_coordinates], 0, info_box_color, -1) cv2.rectangle( freeze_img, (x + int(w / 5), y + h + int(pivot_img_size / 3)), (x + w - int(w / 5), y + h + pivot_img_size - int(pivot_img_size / 5)), info_box_color, cv2.FILLED) cv2.putText(freeze_img, analysis_report, (x + int(w / 3.5), y + h + int(pivot_img_size / 1.5)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 111, 255), 2) #------------------------------- #face recognition custom_face = functions.preprocess_face( img=custom_face, target_size=(input_shape_y, input_shape_x), enforce_detection=False, detector_backend='opencv') #check preprocess_face function handled if custom_face.shape[1:3] == input_shape: if df.shape[ 0] > 0: #if there are images to verify, apply face recognition img1_representation = model.predict( custom_face)[0, :] #print(freezed_frame," - ",img1_representation[0:5]) def findDistance(row): distance_metric = row['distance_metric'] img2_representation = row['embedding'] distance = 1000 #initialize very large value if distance_metric == 'cosine': distance = dst.findCosineDistance( img1_representation, img2_representation) elif distance_metric == 'euclidean': distance = dst.findEuclideanDistance( img1_representation, img2_representation) elif distance_metric == 'euclidean_l2': distance = dst.findEuclideanDistance( dst.l2_normalize( img1_representation), dst.l2_normalize( img2_representation)) return distance df['distance'] = df.apply(findDistance, axis=1) df = df.sort_values(by=["distance"]) candidate = df.iloc[0] employee_name = candidate['employee'] best_distance = candidate['distance'] #print(candidate[['employee', 'distance']].values) #if True: if best_distance <= threshold: #print(employee_name) display_img = cv2.imread(employee_name) display_img = cv2.resize( display_img, (pivot_img_size, pivot_img_size)) label = employee_name.split( "/")[-1].replace(".jpg", "") label = re.sub('[0-9]', '', label) try: if y - pivot_img_size > 0 and x + w + pivot_img_size < resolution_x: #top right freeze_img[ y - pivot_img_size:y, x + w:x + w + pivot_img_size] = display_img overlay = freeze_img.copy() opacity = 0.4 cv2.rectangle( freeze_img, (x + w, y), (x + w + pivot_img_size, y + 20), (46, 200, 255), cv2.FILLED) cv2.addWeighted( overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) cv2.putText( freeze_img, label, (x + w, y + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1) #connect face and text cv2.line(freeze_img, (x + int(w / 2), y), (x + 3 * int(w / 4), y - int(pivot_img_size / 2)), (67, 67, 67), 1) cv2.line(freeze_img, (x + 3 * int(w / 4), y - int(pivot_img_size / 2)), (x + w, y - int(pivot_img_size / 2)), (67, 67, 67), 1) elif y + h + pivot_img_size < resolution_y and x - pivot_img_size > 0: #bottom left freeze_img[ y + h:y + h + pivot_img_size, x - pivot_img_size:x] = display_img overlay = freeze_img.copy() opacity = 0.4 cv2.rectangle( freeze_img, (x - pivot_img_size, y + h - 20), (x, y + h), (46, 200, 255), cv2.FILLED) cv2.addWeighted( overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) cv2.putText( freeze_img, label, (x - pivot_img_size, y + h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1) #connect face and text cv2.line(freeze_img, (x + int(w / 2), y + h), (x + int(w / 2) - int(w / 4), y + h + int(pivot_img_size / 2)), (67, 67, 67), 1) cv2.line(freeze_img, (x + int(w / 2) - int(w / 4), y + h + int(pivot_img_size / 2)), (x, y + h + int(pivot_img_size / 2)), (67, 67, 67), 1) elif y - pivot_img_size > 0 and x - pivot_img_size > 0: #top left freeze_img[ y - pivot_img_size:y, x - pivot_img_size:x] = display_img overlay = freeze_img.copy() opacity = 0.4 cv2.rectangle( freeze_img, (x - pivot_img_size, y), (x, y + 20), (46, 200, 255), cv2.FILLED) cv2.addWeighted( overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) cv2.putText( freeze_img, label, (x - pivot_img_size, y + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1) #connect face and text cv2.line( freeze_img, (x + int(w / 2), y), (x + int(w / 2) - int(w / 4), y - int(pivot_img_size / 2)), (67, 67, 67), 1) cv2.line( freeze_img, (x + int(w / 2) - int(w / 4), y - int(pivot_img_size / 2)), (x, y - int(pivot_img_size / 2)), (67, 67, 67), 1) elif x + w + pivot_img_size < resolution_x and y + h + pivot_img_size < resolution_y: #bottom righ freeze_img[ y + h:y + h + pivot_img_size, x + w:x + w + pivot_img_size] = display_img overlay = freeze_img.copy() opacity = 0.4 cv2.rectangle( freeze_img, (x + w, y + h - 20), (x + w + pivot_img_size, y + h), (46, 200, 255), cv2.FILLED) cv2.addWeighted( overlay, opacity, freeze_img, 1 - opacity, 0, freeze_img) cv2.putText( freeze_img, label, (x + w, y + h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1) #connect face and text cv2.line(freeze_img, (x + int(w / 2), y + h), (x + int(w / 2) + int(w / 4), y + h + int(pivot_img_size / 2)), (67, 67, 67), 1) cv2.line(freeze_img, (x + int(w / 2) + int(w / 4), y + h + int(pivot_img_size / 2)), (x + w, y + h + int(pivot_img_size / 2)), (67, 67, 67), 1) except Exception as err: print(str(err)) tic = time.time( ) #in this way, freezed image can show 5 seconds #------------------------------- time_left = int(time_threshold - (toc - tic) + 1) cv2.rectangle(freeze_img, (10, 10), (90, 50), (67, 67, 67), -10) cv2.putText(freeze_img, str(time_left), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 1) cv2.imshow('img', freeze_img) freezed_frame = freezed_frame + 1 else: face_detected = False face_included_frames = 0 freeze = False freezed_frame = 0 else: cv2.imshow('img', img) if cv2.waitKey(1) & 0xFF == ord('q'): #press q to quit break #kill open cv things cap.release() cv2.destroyAllWindows()