def dataset_creation(): path = input("\nEnter the output folder location or simply press ENTER create a dataset folder in this directory only: ").rstrip() if os.path.isdir(path): # User given path is present. path += '/output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if not os.makedirs(path): print("Directory successfully made in: " + path + "\n") # either user pressed ENTER or gave wrong location. else: if path == "": print("Making an output folder in this directory only. \n") else: print("No such directory exists. Making an output folder in this current code directory only. \n") path = 'output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + path + "\n") # Ask for webcam resolution res = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower() if res == "": res = (640, 480) else: res = tuple(map(int, res.split('x'))) # Start MTCNN face detection and pose estimation module. # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip() if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) # Some more MTCNN parameter minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # Three steps's threshold factor = 0.709 # scale factor with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Create an object of face aligner module face_size = input("\nEnter desired face width and height in WidthxHeight format OR press ENTER for default 160x160 pixel: ").rstrip().lower() if face_size == "": face_size = (160, 160) else: face_size = tuple(map(int, face_size.split('x'))) affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1]) # Create dataset was choosen before and so working with taking dataset. while True: ask = input("\nEnter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ").rstrip() # removing all spaces with underscore ask = ask.replace(" ", "_") if ask=="": folder_name = 'person' + str(personNo) else: folder_name = ask # Creating new user specific variables personNo += 1 users_folder = path + "/" + folder_name image_no = 1 # Create folder with the given location and the given username. if os.path.isdir(users_folder): print("Directory already exists. Using it \n") else: if os.makedirs(users_folder): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + users_folder + "\n") # Start webcam or videofile according to user. data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ").rstrip() # default webcam which uses infinite loop and video variable to find total frames loop_type = False total_frames = 0 if data_type == "": data_type = 0 loop_type = True # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if data_type == 0: device.set(3, res[0]) device.set(4, res[1]) else: # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Shutting down webcam variable loop_type = False # Start web cam or start video and start creating dataset by user. while loop_type or (total_frames > 0): # If video selected dec counter if loop_type == False: total_frames -= 1 ret, image = device.read() # Run MTCNN and do face detection until 's' keyword is pressed if (cv2.waitKey(1) & 0xFF) == ord("s"): # DETECT FACES. We get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # align the detected faces for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Save the image image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(4) + ".png" cv2.imwrite(image_name, aligned_image) image_no += 1 # Draw the bounding boxes and pose landmarks on the image # Draw functions to show rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) # Show the output video to user cv2.imshow("Output", image) # Break this loop if 'q' keyword pressed to go to next user. if (cv2.waitKey(20) & 0xFF) == ord("q"): device.release() cv2.destroyAllWindows() break # Ask for more user using webcam or video else exit. ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ") ask = ask.rstrip().lstrip().lower() if ask != "": if ask[0] == 'q': break # This means dataset creating is complete. ASK the user for train now or exit. ask = input("Press ENTER to exit or \nPress T keyword to TRAIN and 'maybe' TEST later by creating a classifier on the facenet model OR \nPress W to test the dataset folder on a classifier model: ").rstrip().lstrip().lower() if ask == 't': train() elif ask == 'w': test() else: if ask == "": print("Cleaning and exiting. Thank You \n") else: print("\n wrong keyword pressed. Cleaning and exiting. \n Thank You \n")
def main(args): print('Creating networks and loading parameters') # Building seperate graphs for both the tf architectures #g1 = tf.Graph() g2 = tf.Graph() ''' with g1.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: # Load the model for FaceNet image recognition facenet.load_model(args.model) ''' with g2.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=160, desiredFaceHeight=160) # Taking the video and creating an object of it. print("[INFO] Taking the video input.") vs = cv2.VideoCapture(os.path.expanduser(args.video)) # Finding the file format, size and the fps rate fps = vs.get(cv2.CAP_PROP_FPS) video_format = int(vs.get(cv2.CAP_PROP_FOURCC)) frame_size = (int(vs.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))) total_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT)) output_video = cv2.VideoWriter("Output_" + args.video, video_format, fps, frame_size) # Create the output_faces directory by user or default arguments path = os.path.expanduser(args.output) path = path + "/output_faces" if not os.path.isdir(path): os.makedirs(path) image_numbers = 0 print("Total number of frames \n" + str(total_frames) + "\n") #for i in range(total_frames): for i in range(total_frames): # Print the present frame / total frames to know how much we have completed print("\n" + str(i) + " / " + str(total_frames) + "\n") ret, image = vs.read() # Run MTCNN model to detect faces g2.as_default() with tf.Session(graph=g2) as sess: # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # ALIGNMENT - use the bounding boxes and facial landmarks to align images # create a numpy array to feed the network img_list = [] images = np.empty([bb.shape[0], image.shape[0], image.shape[1]]) for col in range(points.shape[1]): aligned_image = affine.align(image, points[:, col]) if args.show_video == True: cv2.imshow("aligned", aligned_image) # Prewhiten the image for facenet architecture to give better results #mean = np.mean(aligned_image) #std = np.std(aligned_image) #std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size)) #ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj) # Save the found out images place = path + "/" + "output_faces_" + str( image_numbers) + ".png" print("saved to: " + place + "\n") cv2.imwrite(place, aligned_image) image_numbers += 1 # if we want to show or save the video then draw the box and the points on the image if args.show_video == True or args.save_video == True: for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle( image, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) if args.save_video == True: output_video.write(image) if args.show_video == True: cv2.imshow("Output", image) # Save the final aligned face image in given format """ # Show the image #cv2.imshow(str(col), aligned_image) img_list.append(ready_image) images = np.stack(img_list) g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder:False } embedding = sess.run(embeddings, feed_dict=feed_dict) print("Here is the embedding \n") print(embedding.shape) print("\n") """ key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): #if keyboard.is_pressed('q'): # do a bit of cleanup vs.release() output_video.release() cv2.destroyAllWindows() break
def recognize(): # Taking the parameters for recogniton by the user classifier_filename = input("\nEnter the path of the classifier .pkl file or press ENTER if a filename 'classifier.pkl' is present in this code directory itself: ") if classifier_filename == "": classifier_filename = 'classifier.pkl' classifier_filename = os.path.expanduser(classifier_filename) model = input("\nEnter the FOLDER PATH inside which 20180402-114759 FOLDER is present. Press ENTER stating that the FOLDER 20180402-114759 is present in this code directory itself: ").rstrip() if model == "": model = "20180402-114759/20180402-114759.pb" # Create an object of face aligner module image_size = (160, 160) ask = input("\nEnter desired face width and height in WidthxHeight format for face aligner to take OR press ENTER for default 160x160 pixel: ").rstrip().lower() if ask != "": image_size = tuple(map(int, ask.split('x'))) # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ").rstrip() if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) input_type = input("\nPress I for image input OR\nPress V for video input OR\nPress W for webcam input OR\nPress ENTER for default webcam: ").lstrip().rstrip().lower() if input_type == "": input_type = 'w' # Load the face aligner model affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=image_size[0], desiredFaceHeight=image_size[1]) # Building seperate graphs for both the tf architectures g1 = tf.Graph() g2 = tf.Graph() # Load the model for FaceNet image recognition with g1.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: facenet.load_model(model) # Load the model of MTCNN face detection. with g2.as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Some MTCNN network parameters minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.8] # Three steps's threshold factor = 0.709 # scale factor ask = input("\nEnter the threshold FACE DETECTION CONFIDENCE SCORE to consider detection by MTCNN OR press ENTER for default 0.80: ") if ask != "" and float(ask) < 1: threshold[2] = round(float(ask),2) classifier_threshold = 0.50 ask = input("\nEnter the threshold FACE RECOGNITION CONFIDENCE SCORE to consider face is recognised OR press ENTER for default 0.50: ") if ask != "": classifier_threshold = float(ask) # Loading the classifier model with open(classifier_filename, 'rb') as infile: (modelSVM, class_names) = pickle.load(infile) print('\nLoaded classifier model from file "%s"' % classifier_filename) # default webcam which uses infinite loop or set video or image setting loop_type = False image_input = 0 total_frames = 0 save_video = False frame_no = 1 output_video = [] image = [] display_output = True res = (640, 480) # If web cam is selected if input_type == "w": data_type = 0 loop_type = True # Ask for webcam resolution ask = input("\nEnter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").rstrip().lower() if ask != "": res = tuple(map(int, ask.split('x'))) # If image selected, trying to represent it as video with 1 frame elif input_type == "i": loop_type = False total_frames = 0 data_type = input("\nWrite the image path file to open: ").rstrip().lstrip() image = cv2.imread(data_type) # Jump directly intocode to go through a single pass goto(581) # Video is selected else: loop_type = False data_type = input("\nWrite the video path file to open: ").rstrip().lstrip() ask = input("\nPress y to save the output video OR simply press ENTER to ignore it: ").lstrip().rstrip().lower() if ask == "y": save_video = True ask = input("\nSimply press ENTER to see the output video frames OR press N to switch off the output display: ").lstrip().rstrip().lower() if ask == "n": display_output = False # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if input_type == "w": device.set(3, res[0]) device.set(4, res[1]) elif input_type == "v": # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Shutting down webcam variable loop_type = False # save video feature. if save_video: # Finding the file format, size and the fps rate fps = device.get(cv2.CAP_PROP_FPS) video_format = int(device.get(cv2.CAP_PROP_FOURCC)) frame_size = (int(device.get(cv2.CAP_PROP_FRAME_WIDTH)), int(device.get(cv2.CAP_PROP_FRAME_HEIGHT))) # Creating video writer to save the video after process if needed output_video = cv2.VideoWriter("Output_" + data_type, video_format, fps, frame_size) # Start web cam or start video and start creating dataset by user. while loop_type or (frame_no <= total_frames): # If video selected dec counter if loop_type == False: frame_no += 1 # Display the progress print("\nProgress: %.2f" %(100*frame_no/total_frames) + "%") ret, image = device.read() # Run MTCNN model to detect faces g2.as_default() with tf.Session(graph=g2) as sess: # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # ALIGNMENT - use the bounding boxes and facial landmarks points to align images # create a numpy array to feed the network img_list = [] images = np.empty([bb.shape[0], image.shape[0], image.shape[1]]) for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Prewhiten the image for facenet architecture to give better results mean = np.mean(aligned_image) std = np.std(aligned_image) std_adj = np.maximum(std, 1.0/np.sqrt(aligned_image.size)) ready_image = np.multiply(np.subtract(aligned_image, mean), 1/std_adj) img_list.append(ready_image) images = np.stack(img_list) # EMBEDDINGS: Use the processed aligned images for Facenet embeddings g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder:False } embedding = sess.run(embeddings, feed_dict=feed_dict) # PREDICTION: use the classifier to predict the most likely class (person). predictions = modelSVM.predict_proba(embedding) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] # DRAW: draw bounding boxes, landmarks and predicted names if save_video or display_output: for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (255,0, 0), 1) # Put name and probability of detection only if given threshold is crossed if best_class_probabilities[i] > classifier_threshold: cv2.putText(image, class_names[best_class_indices[i]], (int(bb[i][0]),int(bb[i][1])-7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA) cv2.putText(image, str(round(best_class_probabilities[i]*100, 2) ) + "%", (int(bb[i][0]), int(bb[i][3])+7), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1,(0,0,255), 1, cv2.LINE_AA) # loop over the (x, y)-coordinates for the facial landmarks for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) if display_output: cv2.imshow("Output", image) if save_video: output_video.write(image) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): # do a bit of cleanup device.release() if save_video: output_video.release() cv2.destroyAllWindows() break
def main(args): print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.39, 0.39), desiredFaceWidth=256, desiredFaceHeight=256) print("[INFO] camera sensor warming up...") vs = cv2.VideoCapture(0) vs.set(3, 1280) vs.set(4, 720) time.sleep(2.0) while True: ret, img = vs.read() # we get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) #print("here they are \n") #print(points) # See if face is detected if bb.shape[0] > 0: # Draw rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(img, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) # ALIGNMENT - use the bounding boxes and facial landmarks to align images aligned_image = affine.align(img, points) # Show the image only if alignment is there cv2.imshow("Alignment", aligned_image) cv2.imshow("Output", img) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break
print(img) # load the input image, resize it, and convert it to grayscale image = cv2.imread(img) image = imutils.resize(image, width=800) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) rects = detector(gray, 2) # loop over the face detections for rect in rects: count += 1 # extract the ROI of the *original* face, then align the face # using facial landmarks (x, y, w, h) = rect_to_bb(rect) faceOrig = imutils.resize(image[y:y + h, x:x + w], width=256) faceAligned = fa.align(image, gray, rect) faceAligned = cv2.resize(faceAligned, (160, 160)) if (args["class"] == "base"): if (count < 10): cv2.imwrite( "aligned_faces/base_image/user_{}_0{}.jpg".format( uid, count), faceAligned) else: cv2.imwrite( "aligned_faces/base_image/user_{}_{}.jpg".format( uid, count), faceAligned) else: if (count < 10): cv2.imwrite( "aligned_faces/verify/user_{}_0{}.jpg".format(uid, count), faceAligned)
def PatchExtraction(video_path, landmarks_path, output_dir, patch_size=32): print("Input: ", video_path) print("Output:", output_dir) frames = [] frame_number = [] if os.path.exists(landmarks_path) == False: return df = pd.read_csv(landmarks_path) cap = cv2.VideoCapture(video_path) count = 0 while(cap.isOpened()): ret, frame = cap.read() if not ret: break # if count % 6 == 0 and df[' success'][count] == 1: # if df[' success'][count] == 1: if count % 6 == 0 and len(df[' success']) > count: if df[' success'][count] == 1: frame = frame[:,:,::-1] frames.append(frame) frame_number.append(count) count += 1 cap.release() folders = ["aligned_face", "left_eye", "right_eye", "mouth", "nose"] for folder in folders: directory = os.path.join(output_dir, folder) if not os.path.exists(directory): os.makedirs(directory) for idx, frame in enumerate(frames): x = np.array(df.iloc[frame_number[idx],299:299+68]).reshape(68,-1) y = np.array(df.iloc[frame_number[idx],299+68:299+68*2]).reshape(68,-1) z = np.ones(68).reshape(68,-1) landmarks = np.concatenate((x,y), axis=1) aligner = FaceAligner(desiredLeftEye=(0.35, 0.35), desiredFaceWidth=128, desiredFaceHeight=int(128*2)) aligned_face, M = aligner.align(frame, landmarks) landmarks_z = np.concatenate((landmarks, z), axis=1) affined_landmarks = np.matmul(landmarks_z, M.transpose()) regions = ["left_eye", "right_eye", "mouth", "nose"] regions_image = [] for region in regions: start, end = FACIAL_LANDMARKS_68_IDXS[region] Pts = affined_landmarks[start:end] Center = Pts.mean(axis=0) try: img = extract_patch(aligned_face, Center, patch_size) except: break if img.shape != (32, 32, 3): break regions_image.append(img) if len(regions_image) == len(regions): for i, region in enumerate(regions): filename = os.path.join(output_dir, region, str(frame_number[idx]).zfill(4) + '.bmp') img = regions_image[i] save(img, filename) filename = os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4) + '.bmp') np.save(os.path.join(output_dir, 'aligned_face', str(frame_number[idx]).zfill(4) + '.npy'), affined_landmarks) save(aligned_face, filename)
k = cv2.waitKey(1) if k % 256 == 27: # ESC pressed print("Quit") break elif k % 256 == 32: # SPACE pressed lm = "error" gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) face_rect = [] detections = get_face_recs(frame) for detection in detections: clahe_image = clahe.apply(gray) aligned = fa.align(clahe_image, detection) lm = get_landmarks(aligned) face_rect = detection break if lm is not "error": sample = np.array([lm]) sample.reshape(1, -1) emotion = SVM.predict(sample) print("Emotion detected: {}".format(emotion.capitalize())) cv2.putText(frame, emotion.capitalize(), (50, 50), cv2.FONT_HERSHEY_PLAIN, 3, (255, 0, 0), 2) cv2.imshow("Frame", frame) # When everything done, release the capture
def main(args): print('Creating networks and loading parameters') # Building seperate graphs for both the networks g1 = tf.Graph() g2 = tf.Graph() #images_placeholder = tf.placeholder(tf.int32) #embeddings = tf.Variable() #phase_train_placeholder = tf.placeholder(tf.bool) with g1.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with tf.Session() as sess: facenet.load_model(args.model) #with tf.Graph().as_default(): #with tf.Session() as sess: with g2.as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Create an object of face aligner module affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=160, desiredFaceHeight=160) # Load the model for FaceNet image recognition and get the tensors print("[INFO] camera sensor warming up...") vs = cv2.VideoCapture(0) vs.set(3, 640) vs.set(4, 480) time.sleep(2.0) while True: ret, img = vs.read() # we get the bounding boxes as well as the points for the face g2.as_default() with tf.Session(graph=g2) as sess: bb, points = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) #print("here they are \n") #print(points) # See if face is detected if bb.shape[0] > 0: # Draw rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(img, (int(bb[i][0]), int(bb[i][1])), (int(bb[i][2]), int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(img, (int(points[i][col]), int(points[i + 5][col])), 1, (255, 0, 0), -1) # ALIGNMENT - use the bounding boxes and facial landmarks to align images aligned_image = affine.align(img, points) # Show the image only if alignment is there cv2.imshow("Alignment", aligned_image) # Prewhiten the image for facenet architecture to give better results mean = np.mean(aligned_image) std = np.std(aligned_image) std_adj = np.maximum(std, 1.0 / np.sqrt(aligned_image.size)) facenet_image = np.multiply(np.subtract(aligned_image, mean), 1 / std_adj) img_list = [] img_list.append(facenet_image) img_list.append(facenet_image) images = np.stack(img_list) g1.as_default() with tf.Session(graph=g1) as sess: # Run forward pass on FaceNet to get the embeddings images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") feed_dict = { images_placeholder: images, phase_train_placeholder: False } embedding = sess.run(embeddings, feed_dict=feed_dict) print("Here is the embedding \n") print(embedding) print("\n") cv2.imshow("Output", img) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break
def main(): print("\n*********************************************************************************************** \n") print(" Welcome to the Face detection and recognition program. \n") print("\n*********************************************************************************************** \n") print("GUIDELINES TO USE THIS SOFTWARE: \n\nThis code gives the user to:\n\n1) CREATE DATASET using MTCNN face detection and alignment. or\n2) TRAIN FaceNet for face recognition. or \n3) Do both.\n\n The user will multiple times get option to choose webcam (default option) or video file to do face detection and will be asked for output folder, username on folder and image files etc also (default options exists for that too)\n\n ************** IMPORTANT *************\n1) Whenever webcam or video starts press 's' keyword to start face detection in video or webcam frames and save the faces in the folder for a single user. This dataset creation will stop the moment you release the 's' key. This can be done multiple times.\n\n2) Press 'q' to close it when you are done with one person, and want to detect face for another person. \n\n3) Make sure you press the keywords on the image window and not the terminal window.\n") mode = input("Press T to train the facenet for recognition OR \nPress D to first create dataset and then 'maybe' train later: ") # Some variables that will be used through out the code path = "" res = () personNo = 1 folder_name = "" # This means user went for Creating of dataset if mode == 'D': path = input("Enter the output folder location or simply press ENTER create a dataset folder in this directory only: ") if os.path.isdir(path): # User given path is present. path += '/output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if not os.makedirs(path): print("Directory successfully made in: " + path + "\n") # either user pressed ENTER or gave wrong location. else: if path == "": print("Making an output folder in this directory only. \n") else: print("No such directory exists. Making an output folder in this current code directory only. \n") path = 'output' if os.path.isdir(path): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + path + "\n") # Ask for webcam resolution res = tuple(map(int, input("Enter your webcam SUPPORTED resolution for face detection. For eg. 640x480 OR press ENTER for default 640x480: ").split("x"))) if res == "": res = (640, 480) # Start MTCNN face detection and pose estimation module. # Take gpu fraction values gpu_fraction = input("\nEnter the gpu memory fraction u want to allocate out of 1 or press ENTER for default 0.8: ") if gpu_fraction == "": gpu_fraction = 0.8 else: gpu_fraction = round(float(gpu_fraction), 1) # Some more MTCNN parameter minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # Three steps's threshold factor = 0.709 # scale factor with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) # Create an object of face aligner module face_size = tuple(map(int, input("Enter desired face width and height in widthxheight format OR press ENTER for default 160x160 pixel: ").split("x"))) if face_size == "": face_size = (160, 160) affine = FaceAligner(desiredLeftEye=(0.33, 0.33), desiredFaceWidth=face_size[0], desiredFaceHeight=face_size[1]) # This means user went for the train part elif mode == 'T': train() else: print("No correct keyword entered. Exiting") sys.exit() # Create dataset was choosen before and so working with taking dataset. while True: ask = input("\n Enter the user name for CREATING FOLDER with given username and image naming inside with username_xx.png numbered format or press ENTER to use default person_xx naming format: ") # removing all spaces with underscore ask = ask.replace(" ", "_") if ask=="": folder_name = 'person_' + str(personNo) else: folder_name = ask # Creating new user specific variables personNo += 1 users_folder = path + "/" + folder_name image_no = 0 # Create folder with the given location and the given username. if os.path.isdir(users_folder): print("Directory already exists. Using it \n") else: if os.makedirs(path): print("error in making directory. \n") sys.exit() else: print("Directory successfully made: " + users_folder + "\n") # Start webcam or videofile according to user. data_type = input("Press ENTER for detecting " + folder_name + " with webcam or write video path to open and create dataset of " + folder_name + " : ") # default webcam which uses infinite loop and video variable to find total frames loop_type = False total_frames = 0 if data_type == "": data_type = 0 loop_type = True # Initialize webcam or video device = cv2.VideoCapture(data_type) # If webcam set resolution if data_type == 0: device.set(3, res[0]) device.set(4, res[1]) else: # Finding total number of frames of video. total_frames = int(device.get(cv2.CAP_PROP_FRAME_COUNT)) # Start web cam and creating dataset by user. while loop_type or (total_frames > 0): total_frames -= 1 ret, image = device.read() # Run MTCNN and do face detection until 's' keyword is pressed if (cv2.waitKey(1) && 0xFF) == ord("s"): # DETECT FACES. We get the bounding boxes as well as the points for the face bb, points = align.detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) # See if face is detected if bb.shape[0] > 0: # align the detected faces for col in range(points.shape[1]): aligned_image = affine.align(image, points[:,col]) # Save the image image_name = users_folder + "/" + folder_name + "_" + str(image_no).zfill(3) + ".png" cv2.imwrite(image_name, aligned_image) image_no += 1 # Draw the bounding boxes and pose landmarks on the image # Draw functions to show rectangles on the faces and circle on the the landmarks for i in range(bb.shape[0]): cv2.rectangle(image, (int(bb[i][0]),int(bb[i][1])), (int(bb[i][2]),int(bb[i][3])), (0, 255, 0), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw each of them for col in range(points.shape[1]): for i in range(5): cv2.circle(image, (int(points[i][col]), int(points[i+5][col])), 1, (0, 255, 0), -1) # Show the output video to user cv2.imshow("Output", image) # Break this loop if 'q' keyword pressed to go to next user. if (cv2.waitKey(1) && 0xFF) == ord("q"): device.release() cv2.destroyAllWindows() break # Ask for more user using webcam or video else exit. ask = input("Press ENTER if you want to add more users or press the keyword 'q' to stop dataset creation: ") if ask == 'q': break # This means dataset creating is complete. ASK the user for train now or exit. ask = input("Press ENTER to exit or press T keyword to train the data by Facenet model on dataset: ") if ask = "T": train()
class OpenvinoFaceVectorizer: def __init__(self, cpu_lib="/opt/intel/openvino_2019.3.376/deployment_tools/inference_engine/lib/intel64/libcpu_extension_avx2.so", landmarks_xml="openvino_detectors/landmarks-regression/FP32/model.xml", features_xml="openvino_detectors/face-reidentification/FP32/model.xml"): # Plugin initialization for specified device and load extensions library if specified plugin = IEPlugin(device="CPU") plugin.add_cpu_extension(cpu_lib) # Read landmarks IR landmarks_bin = os.path.splitext(landmarks_xml)[0] + ".bin" log.info("Loading landmarks network files:\n\t{}\n\t{}".format(landmarks_xml, landmarks_bin)) landmarks_net = IENetwork.from_ir(model=landmarks_xml, weights=landmarks_bin) # Read features IR features_bin = os.path.splitext(features_xml)[0] + ".bin" log.info("Loading features network files:\n\t{}\n\t{}".format(features_xml, features_bin)) features_net = IENetwork.from_ir(model=features_xml, weights=features_bin) self.l_in = next(iter(landmarks_net.inputs)) self.l_out = next(iter(landmarks_net.outputs)) landmarks_net.batch_size = 1 self.f_in = next(iter(features_net.inputs)) self.f_out = next(iter(features_net.outputs)) features_net.batch_size = 1 cur = landmarks_net.inputs[self.l_in] self.l_n = cur.layout self.l_c, self.l_h, self.l_w = cur.shape[1:] # self.l_n = NCHW it is 1 self.l_images = np.ndarray(shape=(1, self.l_c, self.l_h, self.l_w)) cur = features_net.inputs[self.f_in] self.f_n = cur.layout self.f_c, self.f_h, self.f_w = cur.shape[1:] self.f_images = np.ndarray(shape=(1, self.f_c, self.f_h, self.f_w)) # Loading models to the plugin log.info("Loading models to the plugin") self.l_exec_net = plugin.load(network=landmarks_net) self.f_exec_net = plugin.load(network=features_net) self.face_aligner = FaceAligner(face_width=self.f_w, face_height=self.f_h) self.vectors = {} def face_to_vector(self, face): height, width = face.shape[:-1] landmark_face = cv2.resize(face, (self.l_w, self.l_h)) self.l_images[0] = landmark_face.transpose((2, 0, 1)) l_res = np.squeeze(self.l_exec_net.infer(inputs={self.l_in: self.l_images})[self.l_out]) for i in range(10): if i % 2 == 0: l_res[i] = width * l_res[i] else: l_res[i] = height * l_res[i] aligned_face = self.face_aligner.align(face, l_res) self.f_images[0] = aligned_face.transpose((2, 0, 1)) # self.f_images[0] = cv2.resize(face, (self.f_w, self.f_h)).transpose((2, 0, 1)) f_res = np.squeeze(self.f_exec_net.infer(inputs={self.f_in: self.f_images})[self.f_out]) # print(f_res) # cv2.imshow('frame', face) # cv2.waitKey(1000) return np.array(f_res) def searcher(self, face_img, top=3): face_vector = self.face_to_vector(face_img) nearest = PriorityQueue() for id_people, faces in self.vectors.items(): for face in faces: similarity = self.face_similarity(face, face_vector) nearest.put((similarity, id_people)) if nearest.qsize() > top: nearest.get() # if similarity > max_similarity: # max_similarity = similarity # max_id = id_people res = sorted(nearest.queue, key = lambda x:x[0], reverse=True) return res def add_face(self, face, face_name): self.vectors[face_name] = [self.face_to_vector(face)] def face_similarity(self, v1, v2): return 1.0 - spatial.distance.cosine(v1, v2)