def get_all_faces(self, img): str_image_size = "%d,%d"%(self.image_size[0], self.image_size[1]) bounding_boxes, points = detect_face.detect_face(img, self.det_minsize, self.pnet, self.rnet, self.onet, self.det_threshold, self.det_factor) ret = [] for i in xrange(bounding_boxes.shape[0]): bbox = bounding_boxes[i,0:4] landmark = points[:, i].reshape((2,5)).T aligned = face_preprocess.preprocess(img, bbox=bbox, landmark = landmark, image_size=str_image_size) aligned = np.transpose(aligned, (2,0,1)) ret.append(aligned) return ret
def get_aligned_face(self, img, force = False): #print('before det', img.shape) bounding_boxes, points = detect_face.detect_face(img, self.det_minsize, self.pnet, self.rnet, self.onet, self.det_threshold, self.det_factor) #if bounding_boxes.shape[0]==0: # fimg = np.copy(img) # do_flip(fimg) # bounding_boxes, points = detect_face.detect_face(fimg, self.det_minsize, self.pnet, self.rnet, self.onet, self.det_threshold, self.det_factor) if bounding_boxes.shape[0]==0 and force: print('force det', img.shape) bounding_boxes, points = detect_face.detect_face(img, self.det_minsize, self.pnet, self.rnet, self.onet, [0.3, 0.3, 0.1], self.det_factor) #bounding_boxes, points = detect_face.detect_face_force(img, None, self.pnet, self.rnet, self.onet) #print('after det') if bounding_boxes.shape[0]==0: return None bindex = 0 nrof_faces = bounding_boxes.shape[0] det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] landmark = points.reshape((2,5)).T #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) bb = det points = list(points.flatten()) assert(len(points)==10) str_image_size = "%d,%d"%(self.image_size[0], self.image_size[1]) warped = face_preprocess.preprocess(img, bbox=bb, landmark = landmark, image_size=str_image_size) warped = np.transpose(warped, (2,0,1)) print(warped.shape) return warped
def main(): if len(sys.argv) != 3: sys.exit('Usage: %s IMAGE_PATH OUTPUT_PATH_PREFIX' % sys.argv[0]) image_path =sys.argv[1] output_path_prefix =sys.argv[2] faces = detect_face(image_path) if len(faces) == 0: sys.exit('No face found') for (n, face) in enumerate(faces, start=1): (x, y, width, height) = face subprocess.run([ 'convert', image_path, '-gravity', 'NorthWest', '-extent', f'{width}x{height}+{x}+{y}', f'{output_path_prefix}-{n}.png', ])
def main(): if len(sys.argv) != 3: sys.exit('Usage: %s IMAGE_PATH OUTPUT_PATH' % sys.argv[0]) image_path =sys.argv[1] output_path =sys.argv[2] faces = detect_face(image_path) if len(faces) == 0: subprocess.run([ 'convert', image_path, '-gravity', 'North', '-extent', '1:1', '-resize', '128x128', f'{output_path}', ]) else: (x, y, width, height) = random.choice(faces) subprocess.run([ 'convert', image_path, '-gravity', 'NorthWest', '-extent', f'{width}x{height}+{x}+{y}', '-resize', '128x128', f'{output_path}', ])
def Recognizing(): with tf.Graph().as_default(): with sess.as_default(): cap = cv2.VideoCapture(0) try: while (True): __, frame = cap.read() bounding_boxes, _ = detect_face.detect_face( frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[ 0] #Number of faces found if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] if (bb[i][3] - bb[i][1]) / frame.shape[ 0] > 0.1: #Remove the face too small cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print("predictions: ", predictions) best_class_indices = np.argmax(predictions, axis=1) print('best_class_indices: ', best_class_indices) print('index of person: ', best_class_indices[0]) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print('best_class_probabilities: ', best_class_probabilities) dict_name = model.predict(emb_array) name_r = dict_name[0] print("dict_name", dict_name) prob = best_class_probabilities print("prob: {}".format(prob)) #conditions to confirm the identity of a face if best_class_probabilities > 0.85: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 cv2.putText(frame, name_r, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) else: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 name = "Unknown" cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break except: pass cap.release() cv2.destroyAllWindows()
saver.restore(sess, model_checkpoint_path) print('facenet embedding模型建立完毕') #restore pre-trained knn classifier model = joblib.load('./model_check_point/knn_classifier_gender.model') print('knn classifier loaded 建立完毕') image = cv2.imread(sys.argv[1]) find_results = [] gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if gray.ndim == 2: img = to_rgb(gray) bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #number of faces for face_position in bounding_boxes: face_position = face_position.astype(int) cv2.rectangle(image, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2) crop = img[face_position[1]:face_position[3], face_position[0]:face_position[2], ] crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC) data = crop.reshape(-1, 96, 96, 3)
def Recognizing(): with tf.Graph().as_default(): with sess.as_default(): cap = cv2.VideoCapture(0) try: while (True): __, frame = cap.read() bounding_boxes, _ = detect_face.detect_face( frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[ 0] #Number of faces found if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] if (bb[i][3] - bb[i][1]) / frame.shape[ 0] > 0.1: #Loại bỏ những khuôn mặt quá nhỏ trong hình cropp = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropp, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC ) #resize lại ảnh theo chuẩn đầu vào của model scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print("predictions: ", predictions) #vị trí của labels giống nhất với input best_class_indices = np.argmax(predictions, axis=1) print("best_class_indices: ", best_class_indices) #xác suất của lớp tốt nhất best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #label của class đó ứng với tên của người đó dict_name = model.predict(emb_array) name_r = dict_name[0] #Xét ngưỡng để xác định danh tính khuôn mặt if best_class_probabilities > 0.70: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 cv2.putText(frame, name_r, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) else: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 name = "Unknown" cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break except: pass cap.release() cv2.destroyAllWindows()
def faceNet_Detection(img, output_dir, args, pnet, rnet, onet): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor nrof_successfully_aligned = 0 scaled_matrix = np.array([]) detected_faces = np.array([]) if not os.path.exists(output_dir): os.makedirs(output_dir) filename = 'test_frame' output_filename = os.path.join(output_dir, filename + '.png') if not os.path.exists(output_filename) and img is not None: if img.ndim < 2: print('Unable to align "%s" FOR THE DIMENSION' % output_filename) # continue img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) scaled_matrix = np.empty( (len(det_arr), args.image_size, args.image_size, 3)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 # filename_base, file_extension = os.path.splitext(output_filename) # if args.detect_multiple_faces: # output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) # else: # output_filename_n = "{}{}".format(filename_base, file_extension) scaled_matrix[i] = scaled detected_faces = full_img_with_boxes(img, bounding_boxes, filename, output_dir, nrof_successfully_aligned) print('Number of croped images on the frame : %d' % (nrof_successfully_aligned)) nrof_successfully_aligned = 0 else: print('Unable to align "%s"' % output_filename) return [scaled_matrix, nrof_successfully_aligned, detected_faces]
def Extract_feature(): #Time start MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.7 INPUT_IMAGE_SIZE = 160 if not os.path.exists("Dataset"): os.mkdir("Dataset") cap = cv2.VideoCapture(0) cnt = 0 while (True): ret, frame = cap.read() frame = cv2.resize(frame, (0, 0), fx=0.75, fy=0.75) bounding_boxes, _ = detect_face.detect_face(frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] print("faces_found: ", faces_found) if bounding_boxes != []: flag = 1 # for person in bounding_boxes: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) if (bb[i][3] - bb[i][1]) / frame.shape[0] > 0.0: cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled_out = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled_out) scaled_reshape = scaled.reshape(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) print("emb_array.shape: ", emb_array.shape) emb_array = sess.run(embeddings, feed_dict=feed_dict) cv2.imwrite(folder + str(cnt) + '.jpg', scaled_out) emb_array = np.append(emb_array, name) my_features = np.array(emb_array) my_features = my_features.reshape(-1, my_features.shape[0]) df = pd.DataFrame(my_features) df.to_csv("features.csv", mode='a', header=None, index=False) cnt += 1 cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break elif cnt > 100: break cv2.destroyAllWindows()
def collect_data(self): output_dir = os.path.expanduser(self.output_datadir) if not os.path.exists(output_dir): os.makedirs(output_dir) dataset = facenet.get_dataset(self.input_datadir) with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 182 # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext( os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print("Image: %s" % image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) print('to_rgb data dimension: ', img.ndim) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('No of Detected Face: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = misc.imresize( cropped_temp, (image_size, image_size), interp='bilinear') nrof_successfully_aligned += 1 misc.imsave(output_filename, scaled_temp) text_file.write( '%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) return (nrof_images_total, nrof_successfully_aligned)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # TODO: create video capture class (cv2 package) # cap = cv2. video catpture cap = cv2.VideoCapture(0) # TODO: set the paramters about MT-CNN (minsize, threshold, scale factor) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold, each network's threshold factor = 0.709 # scale factor pyramid # TODO: create MT-CNN network # create mtcnn with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction ) # Amount of memory to use sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) # Using GPU options with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) with tf.Graph().as_default(): with tf.Session() as sess: np.random.seed(seed=args.seed) # Load the model facenet.load_model(args.model) # TODO: load classifier model # pickle.load refered classifier_filename_exp = os.path.expanduser( args.classifier_filename) with open(classifier_filename_exp, 'rb') as infile: # load the model to predict the class (model, class_names) = pickle.load(infile) # TODO: get input and output tensors about embedding network # images_placeholder = # embeddings = # phase_train_placeholder = images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") #feed_dict = {images_placeholder: frame, phase_train_placeholder: False} #emb = sess.run(embeddings, feed_dict=feed_dict) while (cap.isOpened()): ret, frame = cap.read() # ret boolean if ret == True: bounding_boxes, landmarks = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: # if you want to classify about all face which is detected in frame for face_no in range(nrof_faces): bb_box = bounding_boxes[face_no, 0:4] landmark = landmarks[:, face_no] # if you want to classify about all face which is detected in frame for face_no in range(nrof_faces): bb_box = bounding_boxes[face_no, 0:4] landmark = landmarks[:, face_no] aligned = face_alignment(frame, args.image_size, landmark) prewhitened = facenet.prewhiten(aligned) img = np.expand_dims(prewhitened, 0) aligned = face_alignment(frame, args.image_size, landmark) prewhitened = facenet.prewhiten(aligned) img = np.expand_dims(prewhitened, 0) feed_dict = { images_placeholder: img, phase_train_placeholder: False } emb = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (int(bb_box[0]), int(bb_box[1])), (int(bb_box[2]), int(bb_box[3])), (0, 255, 0), 3) font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText( frame, '%s: %.3f' % (class_names[best_class_indices[0]], best_class_probabilities[0]), (int(bb_box[0]), int(bb_box[1])), font, 1, (255, 0, 0), 2) #'%s' % (best_class_probabilities[0]) #frame = cv2.resize(frame,(2*640, 2*480),interpolation=cv2.INTER_AREA) cv2.imshow('video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor if args.name=='lfw' or args.name=='webface' or args.name=='vgg': minsize = 20 threshold = [0.6,0.7,0.9] factor = 0.85 print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _minsize = minsize if fimage.bbox is not None: _bb = fimage.bbox _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces>0: assert(bounding_boxes.shape[0]==points.shape[1]) det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>-0.3: bindex = index2[1] nrof_iou3+=1 if bindex<0: bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force+=1 #if bindex<0: # _img = img[fimage.bbox[1]:fimage.bbox[3], fimage.bbox[0]:fimage.bbox[2],:] # woffset = fimage.bbox[0] # hoffset = fimage.bbox[1] # _minsize = min( [_img.shape[0]//3, _img.shape[1]//3] ) # bounding_boxes, points = detect_face.detect_face(_img, _minsize, pnet, rnet, onet, [0.6,0.7,0.01], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # #print(points.shape) # #assert(nrof_faces>0) # bounding_boxes[:,0]+=woffset # bounding_boxes[:,2]+=woffset # bounding_boxes[:,1]+=hoffset # bounding_boxes[:,3]+=hoffset # points[0:5,:] += woffset # points[5:10,:] += hoffset # bindex = 0 # score = bounding_boxes[bindex,4] # print(score) # if score<=0.0: # bindex = -1 # else: # nrof_force+=1 #if bindex<0: # _bb = fimage.bbox # _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.1], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # bindex = 0 #if fimage.bbox is not None and bounding_boxes.shape[0]==0: # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.3], factor) #print(bounding_boxes.shape, points.shape) #print(nrof_faces, points.shape) if bindex>=0: det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) #bb = np.zeros(4, dtype=np.int32) #bb[0] = np.maximum(det[0]-args.margin/2, 0) #bb[1] = np.maximum(det[1]-args.margin/2, 0) #bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) #bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) bb = det #print(points.shape) points = list(points.flatten()) assert(len(points)==10) #cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] #scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') #misc.imsave(output_filename, scaled) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n"%oline) else: print('Unable to align "%s", no face detected' % image_path) if args.force>0: if fimage.bbox is None: oline = '%d\t%s\t%d\n' % (0,fimage.image_path, int(fimage.classname)) else: bb = fimage.bbox oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\n' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) text_file.write(oline) #text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--path', help='Path of the video you want to test on.', default=0) args = parser.parse_args() # Cai dat cac tham so can thiet MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = 'Models/facemodel.pkl' VIDEO_PATH = args.path FACENET_MODEL_PATH = 'Models/20180402-114759.pb' # Load model da train de nhan dien khuon mat - thuc chat la classifier with open(CLASSIFIER_PATH, 'rb') as file: model, class_names = pickle.load(file) print("Custom Classifier, Successfully loaded") with tf.Graph().as_default(): # Cai dat GPU neu co gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Load model MTCNN phat hien khuon mat print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) # Lay tensor input va output images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] # Cai dat cac mang con pnet, rnet, onet = detect_face.create_mtcnn(sess, "src/align") people_detected = set() person_detected = collections.Counter() # Lay hinh anh tu file video #cap = cv2.VideoCapture(VIDEO_PATH) cap = cv2.VideoCapture(VIDEO_PATH) while (cap.isOpened()): # Doc tung frame ret, frame = cap.read() # Phat hien khuon mat, tra ve vi tri trong bounding_boxes bounding_boxes, _ = detect_face.detect_face( frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] try: # Neu co it nhat 1 khuon mat trong frame if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # Cat phan khuon mat tim duoc cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize( cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape( -1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = { images_placeholder: scaled_reshape, phase_train_placeholder: False } emb_array = sess.run(embeddings, feed_dict=feed_dict) # Dua vao model de classifier predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # Lay ra ten va ty le % cua class co ty le cao nhat best_name = class_names[best_class_indices[0]] print("Name: {}, Probability: {}".format( best_name, best_class_probabilities)) # Ve khung mau xanh quanh khuon mat cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 # Neu ty le nhan dang > 0.5 thi hien thi ten if best_class_probabilities > 0.9: name = class_names[best_class_indices[0]] else: # Con neu <=0.5 thi hien thi Unknow name = "Unknown" # Viet text len tren frame cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.putText(frame, str( round(best_class_probabilities[0], 3)), (text_x, text_y + 17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 except: pass # Hien thi frame len man hinh cv2.imshow('Face Recognition', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor #image_size = [112,96] image_size = [112, 112] src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041]], dtype=np.float32) if image_size[1] == 112: src[:, 0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] _paths = fimage.image_path.split('/') a, b, c = _paths[-3], _paths[-2], _paths[-1] target_dir = os.path.join(args.output_dir, a, b) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, c) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3, :] * 1.5 + image_size[0] * 0.25) M = tform.params[0:2, :] warped0 = cv2.warpAffine( img, M, (image_size[1] * 2, image_size[0] * 2), borderValue=0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face( warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: bindex = 0 det = bounding_boxes[bindex, 0:4] #points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(warped0, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[0] += 1 # ! tight bbox #assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0] // 4 bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: det = bounding_boxes[:, 0:4] bindex = -1 index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > 0.3: bindex = index2[1] if bindex >= 0: dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine( img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[1] += 1 # ! small loose bbox #print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox #croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force( img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0] == 1 _box = bounding_boxes[0] if _box[4] >= 0.3: dst = points[:, 0].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[2] += 1 # adjust label bbox #print('2',target_file) if warped is None: roi = np.zeros((4, ), dtype=np.int32) roi[0] = int(img.shape[1] * 0.06) roi[1] = int(img.shape[0] * 0.06) roi[2] = img.shape[1] - roi[0] roi[3] = img.shape[0] - roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3] - bb[1] w = bb[2] - bb[0] x = bb[0] y = bb[1] #roi = np.copy(bb) _w = int((float(h) / image_size[0]) * image_size[1]) x += (w - _w) // 2 #x = min( max(0,x), img.shape[1] ) x = max(0, x) xw = x + _w xw = min(xw, img.shape[1]) roi = np.array((x, y, xw, y + h), dtype=np.int32) nrof[3] += 1 # label bbox else: nrof[4] += 1 # tight inside bbox #print('3',bb,roi,img.shape) #print('3',target_file) warped = img[roi[1]:roi[3], roi[0]:roi[2], :] #print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[..., ::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1, target_file, int( fimage.classname)) text_file.write(oline)
def get_face_video(sess, frame, margin, image_size, images_placeholder, embeddings, phase_train_placeholder, embding, images_label_list, pnet, rnet, onet, dict, video_path, ip): bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) labels = [] basees = [] nrof_faces = bounding_boxes.shape[0] # 人脸数目 image_output_name = os.path.join(os.path.split(video_path)[0], 'tmp') if not os.path.exists(image_output_name): os.mkdir(image_output_name) if nrof_faces > 0: # 人脸对x和y的坐标范围集合 det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] face_num = det.shape[0] output_list = [] for i in range(face_num): bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[i][0] - margin / 2, 0) bb[1] = np.maximum(det[i][1] - margin / 2, 0) bb[2] = np.minimum(det[i][2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[i][3] + margin / 2, img_size[0]) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] scaled = cv2.resize( cropped, (image_size, image_size), interpolation=cv2.INTER_CUBIC) image_output_name_dir = os.path.join( image_output_name, 'k_' + str(i) + '.png') cv2.imwrite(image_output_name_dir, scaled) output_list.append(image_output_name_dir) if not len(output_list) == 0: images = facenet.load_data(output_list, False, False, image_size) feed_dict = {images_placeholder: images, phase_train_placeholder: False} embeddings_face = sess.run(embeddings, feed_dict=feed_dict) label = '' min_dist = 2 for i in range(embding.shape[0]): dist = np.sum(np.square(np.subtract( embeddings_face[0, :], embding[i, :]))) if min_dist > dist: min_dist = dist label = images_label_list[i] if min_dist < 0.5: if dict.has_key(label): dict[label] = dict[label] + 1 else: dict[label] = 0 save_image_dir = os.path.join( image_output_name, str(label) + '.png') cv2.imwrite(save_image_dir, frame) f = open(save_image_dir, 'rb') ls_f = base64.b64encode(f.read()) labels.append(label) basees.append(ls_f) # send_message(os.path.basename(video_path), label, save_image_dir, ip) print(label) for item in output_list: os.remove(item) if len(basees) == 0: return labels, '' else: return labels, ''.join(basees)
def run(): # app runs every 12 hours, so delete files whenever app start running cleanup(30) # store images for 30 days print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './src/align/') minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 print('Loading feature extraction model') modeldir = './20180402-114759/20180402-114759.pb' facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename = './classifier/my_classifier.pkl' classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # print('load classifier file-> %s' % classifier_filename_exp) print('class_names', class_names) video_capture = cv2.VideoCapture(0) c = 0 video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640) video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) # #video writer print('Start Recognition!') flag = False time_started = datetime.datetime.now() while True: # destroy app period time timenow = datetime.datetime.now() elapsed_time = int(timenow.timestamp() - time_started.timestamp()) if elapsed_time > 60 * 60 * 12: break if flag == False: flag = True else: flag = False ret, frame = video_capture.read() #Optional roate frame by 90 degrees num_rows, num_cols = frame.shape[:2] rotation_matrix = cv2.getRotationMatrix2D( (num_cols / 2, num_rows / 2), 90, 1) frame = cv2.warpAffine(frame, rotation_matrix, (num_cols, num_rows)) frame = cv2.resize(frame, (0, 0), fx=1, fy=1) #resize frame (optional) timeF = frame_interval if (c % timeF == 0): if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): cropped = [] scaled = [] scaled_reshape = [] emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # ensure the face width and height are sufficiently large if abs(bb[i][0] - bb[i][2]) < 40 or abs(bb[i][1] - bb[i][3]) < 40: continue # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len(frame[0]) or bb[i][3] >= len( frame): continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append( misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize( scaled[0], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[0], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) predicted_probability = predictions[ np.arange(len(best_class_indices)), best_class_indices] # cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 result_names = 'unknown' if len( best_class_indices ) > 0 and predicted_probability[0] > 0.6: result_names = class_names[ best_class_indices[0]] # write Images write_image_file(result_names, predicted_probability, frame) g = 255 r = 0 if result_names.lower() == 'unknown': g = 0 r = 255 else: print('%s: %.3f, %s' % (result_names, predicted_probability, datetime.datetime.now())) # open_door(result_names) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, g, r), 2) #boxing face #cv2.rectangle(frame, (endX, startY + int((endY - startY)*int((1-proba)*100)/100)), (endX + 10, endY), (0, 0, 255), cv2.FILLED) cv2.rectangle( frame, (bb[i][2], bb[i][1] + int((bb[i][3] - bb[i][1]) * int( (1 - predicted_probability[0]) * 100) / 100)), (bb[i][2] + 10, bb[i][3]), (0, g, r), cv2.FILLED) cv2.putText( frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, g, r), thickness=1, lineType=2) else: # Save Unknown Images write_image_file(result_names, predicted_probability, frame) cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 0, 255), 2) #boxing face # if you want to display realtime video #cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break video_capture.release()
def datect_faces_in_photo(file_path, file_name): people = {} options, sess, pnet, rnet, onet = initialize_network() threshold = [0.6, 0.7, 0.7] # three steps's threshold minsize = 20 # minimum size of face factor = 0.709 # scale factor margin = 44 batch_size, image_size, input_image_size = 1000, 182, 160 HumanNames = os.listdir("./input_dir") HumanNames.sort() frame_interval = 3 print('Loading pretrained FACENET model:') model_directory = './pre_model/20170511-185253.pb' facenet.load_model(model_directory) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") embedding_size = embeddings.get_shape()[1] clf_file_location = './my_class/my_classifier.pkl' clf_file_location = os.path.expanduser( clf_file_location) # REQUIRED FOR WINDOWS with open(clf_file_location, 'rb') as inf: (clf, class_names) = pickle.load(inf) print('Classification File := %s' % clf_file_location) print('Start Recognition!:') prevTime = 0 frame = cv2.imread(file_path) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) timeF = frame_interval find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('No of faces detected: {}'.format(nrof_faces)) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('INNER FACE RANGE ERROR! KEEP THE FACES IN THE RANGE') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = clf.predict_proba(emb_array) print("predictions :=", predictions) best_class_indices = np.argmax(predictions, axis=1) print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] print('best_class_probabilities', best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (255, 255, 255), 1) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('result: ', best_class_indices[0]) print(best_class_indices) print(HumanNames) for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, '{}'.format(result_names), (text_x, text_y), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), thickness=1, lineType=2) people[result_names] = best_class_probabilities[0] cv2.imwrite(file_name, frame) return people
def main(): global face_detected global save_pic global ii model_path = "models/20170511-185253.pb" # classifier_output_path = "/mnt/softwares/acv_project_code/Code/classifier_rf1_team.pkl" classifier_output_path = "models/classifier_rf4.pkl" #classifier_output_path = "/mnt/softwares/acv_project_code/Code/classfier_path/classifier_svm.pkl" with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") gpu_memory_fraction = 0.5 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess1 = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess1 = tf.Session(config=tf.ConfigProto(device_count = {'GPU': 0})) with sess1.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess1, None) model, class_names = pickle.load(open(classifier_output_path, 'rb'), encoding='latin1') cap_2 = cv2.VideoCapture( 'rtsp://*****:*****@192.168.10.111:554/' ) # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') fno = 0 det_name = [] det_prob = [] bbs = [] i = 0 while (~(cv2.waitKey(1) & 0xFF == ord('q'))): ret, image3 = cap_2.read() image4 = cv2.resize(image3, (600, 400)) minsize = 10 # minimum size of face #minsize = 40 threshold = [0.5, 0.6, 0.7] # three steps's threshold factor = 0.709 # scale factor img = image4[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) x = datetime.datetime.now() print("before Detection") print(x) nrof_faces = bounding_boxes.shape[0] #print(nrof_faces) if nrof_faces == 1: top = bounding_boxes[0][1] right = bounding_boxes[0][0] bottom = bounding_boxes[0][3] left = bounding_boxes[0][2] print(top) print(right) print(bottom) print(left) cv2.rectangle(image4, (int(left + 60), int(top - 60)), (int(right - 60), int(bottom + 60)), (0, 0, 255), 2) #save_pic = save_pic + 1 #print ("save_pic: " + str(save_pic)) crop_img = img[int(top - 60):int(bottom + 60), int(left - 100):int(right + 100)] gray_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) name_time = int(time.time()) print(name_time) if ii <= 5: ii += 1 else: ii = 0 name_time = str(name_time) + str(ii) path = os.path.join(folder_path, str(name_time) + '.jpg') print(path) cv2.imwrite(path, gray_img) x = datetime.datetime.now() print("After Detection") print(x) #i = i+1 face_detected = True print("taking first imagae") #_" + str(i)) #save_pic = 0 #cv2.resize(image4,(600,400)) cv2.imshow('In Camera Live Feed', image4) fno = fno + 1 #cap.release() cap_2.release() cv2.destroyAllWindows()
# frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] #print(frame.shape[0]) #print(frame.shape[1]) ## Use MTCNN to get the bounding boxes bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] # cropped = [] # scaled = [] # scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size))
print('Start Recognition!') prevTime = 0 while True: ret, frame = video_capture.read() frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) #resize frame (optional) timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('No of faces detected: {}'.format(nrof_faces)) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size))
def main(self, image): high, wide = image.shape[0:2] rate = wide / 500 new_high = round(high / rate) resize_image = cv2.resize(image, (500, int(new_high))) t0 = time.time() bounding_boxes, _ = detect_face.detect_face(resize_image, minsize, self.pnet, self.rnet, self.onet, threshold, factor) t1 = time.time() #print('mtcnn detecte time:{}'.format(t1-t0)) bounding_boxes = rate * bounding_boxes nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] image_size = np.asarray(image.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) image_center = image_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - image_center[1], (det[:, 1] + det[:, 3]) / 2 - image_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det = det[index, :] det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, image_size[1]) bb[3] = np.minimum(det[3] + margin / 2, image_size[0]) cropped = image[bb[1]:bb[3], bb[0]:bb[2], :] #scaled = misc.imresize(cropped, (photo_size,photo_size), interp = 'bilinear') scaled = cv2.resize(cropped, (photo_size, photo_size), interpolation=cv2.INTER_LINEAR) scaled = facenet.prewhiten(scaled) images_batch[0, :, :, :] = scaled # image_path = ['/media/universe/768CE57C8CE53771/mnist/src/1.png'] # images_batch_ = facenet.load_data(image_path, False, False, 160) t2 = time.time() images_feature = self.sess.run( self.facenet_embeddings, { self.facenet_images_placeholder: images_batch, self.facenet_phase_train_placeholder: False }) t3 = time.time() #print('facenet processing time:{}'.format(t3-t2)) # print(images_feature_[0][0:10]) # images_feature_ = np.ones((1,128)) t5 = time.time() #images_result = self.sess.run(tf.nn.softmax(self.mlp_logits), {self.mlp_images_features_placehoder: images_feature}) images_result_no_norm = self.sess.run( self.mlp_logits, {self.mlp_images_features_placehoder: images_feature}) best_index = int(np.argmax(images_result_no_norm, axis=1)[0]) temp_mother = np.exp(images_result_no_norm[0]) best_score = np.divide( np.exp(images_result_no_norm[0][best_index]), np.sum(temp_mother)) best_class_names = self.face_classname[best_index] t6 = time.time() #print('mlp classify time:{}'.format(t6-t5)) return best_class_names, float(best_score), [ bb[0], bb[1], bb[2], bb[3] ], 1 else: return 'no_face', 0, [0, 0, 0, 0], 0
def detect(frame): frame = cv2.resize(frame, (0, 0), fx=1.5, fy=1.5) # resize frame (optional) timeF = frame_interval if True: find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is inner of range!') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[0] = facenet.flip(cropped[0], False) scaled.append( misc.imresize(cropped[0], (image_size, image_size), interp='bilinear')) scaled[0] = cv2.resize(scaled[0], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[0] = facenet.prewhiten(scaled[0]) scaled_reshape.append(scaled[0].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[0], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) # boxing face text_x = bb[i][0] text_y = bb[i][3] + 20 for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[best_class_indices[0]] if result_names == 'User_Name': return {'id': 'User_ID', 'psw': 'User_PSW'} else: print('Unable to align') return ""
minsize = 20 threshold = [0.6, 0.7, 0.7] #factor = 0.709 factor = 0.600 #image=cv2.imread("img.jpg") cap = cv2.VideoCapture(0) #while(True): a = 0 while (cap.isOpened()): a = a + 1 ret, image = cap.read() bounding_boxes, yes = detect_face.detect_face(image, minsize, pnet, rnet, onet, threshold, factor) print("the no of faces are", len(bounding_boxes), len(yes)) print("the landmarks are ", yes) print("the shape[0] is ", bounding_boxes.shape[0]) print("the bounding_boxes[:,0:4] is ", bounding_boxes[:, 0:4]) print("the boxes are", bounding_boxes) ''' nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(image.shape)[0:2] if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
def main(args): # load facenet weight with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # open the camera cap = cv2.VideoCapture(0) if cap is None: print('can not open the camera') return face_points = ('left_eye', 'right_eye', 'nose', 'left_mouse', 'right_mouse') # detect face while cap is not None: ret, img = cap.read() bounding_boxes, points = detect_face.detect_face( img[..., ::-1], minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('face_num: ', nrof_faces) print('face_point: ', points) if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] # detect multiple faces if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) # detect only one face else: det_arr.append(np.squeeze(det)) print(img.shape) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) # draw points for j in range(nrof_faces): p = points[:, j] for k in range(5): cv2.circle(img, (p[k], p[k + 5]), 2, (0, 255, 0), 2) # draw face bbox print('face_bbox: ', (bb[0], bb[1]), (bb[2], bb[3])) cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 1) cv2.imshow('camera', img) if cv2.waitKey(33) > 0: return else: print('no face has been detected')
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory # facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_dir = os.path.join(args.input_dir, 'facescrub') dataset = face_image.get_dataset('facescrub', image_dir) print('dataset size', len(dataset)) bbox = {} for label_file in ['facescrub_actors.txt', 'facescrub_actresses.txt']: label_file = os.path.join(args.input_dir, label_file) pp = 0 for line in open(label_file, 'r'): pp += 1 if pp == 1: continue vec = line.split("\t") key = (vec[0], int(vec[2])) value = [int(x) for x in vec[4].split(',')] bbox[key] = value print('bbox size', len(bbox)) valid_key = {} json_data = open( os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read() json_data = json.loads(json_data)['path'] for _data in json_data: key = _data.split('/')[-1] pos = key.rfind('.') if pos < 0: print(_data) else: key = key[0:pos] keys = key.split('_') # print(key) if len(keys) != 2: print('err', key, _data) continue # assert len(keys)==2 key = (keys[0], int(keys[1])) valid_key[key] = 1 # print(key) print('valid keys', len(valid_key)) print('Creating networks and loading parameters') with tf.Graph().as_default(): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor image_size = [112, 96] image_size = [112, 112] src = np.array([[30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041]], dtype=np.float32) if image_size[1] == 112: src[:, 0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes # random_key = np.random.randint(0, high=99999) # bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) # output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 # if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue # print(image_path) filename = os.path.splitext(os.path.split(image_path)[1])[0] _paths = fimage.image_path.split('/') print(fimage.image_path) a, b = _paths[-2], _paths[-1] pb = b.rfind('.') bname = b[0:pb] pb = bname.rfind('_') body = bname[(pb + 1):] img_id = int(body) key = (a, img_id) if not key in valid_key: continue # print(b, img_id) assert key in bbox fimage.bbox = bbox[key] try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) # text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] tb = bname.replace(' ', '_') + ".png" ta = a.replace(' ', '_') target_dir = os.path.join(args.output_dir, ta) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, tb) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3, :] * 1.5 + image_size[0] * 0.25) M = tform.params[0:2, :] warped0 = cv2.warpAffine( img, M, (image_size[1] * 2, image_size[0] * 2), borderValue=0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face( warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: bindex = 0 det = bounding_boxes[bindex, 0:4] # points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(warped0, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[0] += 1 # assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0] // 4 bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0] > 0: det = bounding_boxes[:, 0:4] bindex = -1 index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > 0.3: bindex = index2[1] if bindex >= 0: dst = points[:, bindex].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine( img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[1] += 1 # print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox # croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force( img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0] == 1 _box = bounding_boxes[0] if _box[4] >= 0.3: dst = points[:, 0].reshape((2, 5)).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2, :] warped = cv2.warpAffine(img, M, (image_size[1], image_size[0]), borderValue=0.0) nrof[2] += 1 # print('2',target_file) if warped is None: roi = np.zeros((4, ), dtype=np.int32) roi[0] = int(img.shape[1] * 0.06) roi[1] = int(img.shape[0] * 0.06) roi[2] = img.shape[1] - roi[0] roi[3] = img.shape[0] - roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3] - bb[1] w = bb[2] - bb[0] x = bb[0] y = bb[1] # roi = np.copy(bb) _w = int((float(h) / image_size[0]) * image_size[1]) x += (w - _w) // 2 # x = min( max(0,x), img.shape[1] ) x = max(0, x) xw = x + _w xw = min(xw, img.shape[1]) roi = np.array((x, y, xw, y + h), dtype=np.int32) nrof[3] += 1 else: nrof[4] += 1 # print('3',bb,roi,img.shape) # print('3',target_file) warped = img[roi[1]:roi[3], roi[0]:roi[2], :] # print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[..., ::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1, target_file, int( fimage.classname)) text_file.write(oline)
def Augmentation(input_image): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, './npy') sometimes = lambda aug: iaa.Sometimes(0.5, aug) aug_name = input_image.split("/")[-1].split(".")[0] minsize = 35 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 200 nb_batches = 16 aug_faces = [] batches = [] seq = iaa.Sequential( [ iaa.Fliplr(0.5), sometimes( iaa.CropAndPad( percent=(-0.05, 0.1), pad_mode=ia.ALL, pad_cval=(0, 255))), sometimes( iaa.Affine(scale={ "x": (0.8, 1.0), "y": (0.8, 1.0) }, translate_percent={ "x": (-0.2, 0.2), "y": (0, 0.2) }, rotate=(-10, 10), shear=(-16, 16), order=[0, 1], cval=(0, 255))), iaa.SomeOf( (0, 4), [ iaa.OneOf([ iaa.GaussianBlur((0, 3.0)), iaa.AverageBlur(k=(2, 7)), iaa.MedianBlur(k=(3, 11)), ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.Emboss(alpha=(0, 1.0), strength=(0, 1.0)), # emboss images iaa.SimplexNoiseAlpha( iaa.OneOf([ iaa.EdgeDetect(alpha=(0.2, 0.5)), iaa.DirectedEdgeDetect(alpha=(0.2, 0.5), direction=(0.0, 1.0)), ])), iaa.AdditiveGaussianNoise( loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), iaa.Dropout((0.01, 0.1), per_channel=0.5), iaa.Add((-10, 10), per_channel=0.5), iaa.AddToHueAndSaturation((-20, 20)), iaa.ContrastNormalization((0.5, 1.5), per_channel=0.5), iaa.Grayscale(alpha=(0.0, 1.0)), sometimes( iaa.ElasticTransformation(alpha=(0.5, 2), sigma=0.25)), sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.03))), sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1))) ], random_order=True) ], random_order=True) img = misc.imread(input_image) if img.ndim < 2: print("Unable !") elif img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] batches.append(np.array([img for _ in range(nb_batches)], dtype=np.uint8)) aug_images = seq.augment_images(batches[0]) for aug_img in aug_images: bounding_boxes, _ = detect_face.detect_face(aug_img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det = det[index, :] det = np.squeeze(det) bb_temp = np.zeros(4, dtype=np.int32) bb_temp[0] = det[0] bb_temp[1] = det[1] bb_temp[2] = det[2] bb_temp[3] = det[3] cropped_temp = aug_img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear') aug_faces.append(scaled_temp) return aug_faces
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) datamap = {} pp = 0 datasize = 0 verr = 0 for line in open(args.input_dir + "_clean_list.txt", 'r'): pp += 1 if pp % 10000 == 0: print('loading list', pp) line = line.strip()[2:] if not line.startswith('m.'): continue vec = line.split('/') assert len(vec) == 2 #print(line) person = vec[0] img = vec[1] try: img_id = int(img.split('.')[0]) except ValueError: #print('value error', line) verr += 1 continue if not person in datamap: labelid = len(datamap) datamap[person] = [labelid, {img_id: 1}] else: datamap[person][1][img_id] = 1 datasize += 1 print('dataset size', args.name, datasize) print('dataset err', verr) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face #threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold threshold = [0.6, 0.6, 0.3] # three steps's threshold factor = 0.709 # scale factor print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for line in open(args.input_dir, 'r'): vec = line.strip().split() person = vec[0] img_id = int(vec[1]) v = datamap.get(person, None) if v is None: continue #TODO #if not img_id in v[1]: # continue labelid = v[0] img_str = base64.b64decode(vec[-1]) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR) img = img[..., ::-1] #to rgb if nrof_images_total % 100 == 0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 target_dir = os.path.join(output_dir, person) if not os.path.exists(target_dir): os.makedirs(target_dir) target_path = os.path.join(target_dir, "%d.jpg" % img_id) _minsize = minsize fimage = edict() fimage.bbox = None fimage.image_path = target_path fimage.classname = str(labelid) if fimage.bbox is not None: _bb = fimage.bbox _minsize = min([ _bb[2] - _bb[0], _bb[3] - _bb[1], img.shape[0] // 2, img.shape[1] // 2 ]) else: _minsize = min(img.shape[0] // 5, img.shape[1] // 5) bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) bindex = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces > 0: assert (bounding_boxes.shape[0] == points.shape[1]) det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > -0.3: bindex = index2[1] nrof_iou3 += 1 if bindex < 0: bounding_boxes, points = detect_face.detect_face_force( img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force += 1 if bindex >= 0: det = bounding_boxes[:, 0:4] det = det[bindex, :] points = points[:, bindex] landmark = points.reshape((2, 5)).T #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) bb = det points = list(points.flatten()) assert (len(points) == 10) warped = face_preprocess.preprocess(img, bbox=bb, landmark=landmark, image_size=args.image_size) misc.imsave(target_path, warped) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d' % (1, fimage.image_path, int(fimage.classname)) #oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) #oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n" % oline) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def Image_Recognition(path): start_time = time.time() warnings.filterwarnings("ignore") #Give image path img_path=path modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy='./npy' train_img="./pre_img" result = [] with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() # print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 # print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path,0) frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time()+1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] # print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces,4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): # print('face is too close') continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append(Image.resize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) # print(predictions) best_class_indices = np.argmax(predictions, axis=1) # print(best_class_indices) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] if best_class_probabilities>0.53: # print("Highest Probability : {0:.2f}".format( float(best_class_probabilities) * 100.0) + "%") cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 # print('Result Indices: ', best_class_indices[0]) for H_i in HumanNames: # print("H_i: ", H_i) if HumanNames[best_class_indices[0]] == H_i: # global result result_names = HumanNames[best_class_indices[0]] result.append(result_names) # print("Name: ", result_names + "\n") cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) # else: # print('Unable to align') cv2.imshow('Image', frame) # print(HumanNames) # print("--- %s seconds ---" % (time.time() - start_time)) if cv2.waitKey(10000) & 0xFF == ord('q'): sys.exit("Thanks") cv2.destroyAllWindows() return result
def recognize_face(): with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 1000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading Modal') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) #video_capture = cv2.VideoCapture(input_video) video_capture = cv2.VideoCapture(0) c = 0 print('Start Recognition') prevTime = 0 while True: ret, frame = video_capture.read() #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Detected_FaceNum: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][ 2] >= len( frame[0]) or bb[i][3] >= len(frame): print('Face is very close!') continue try: cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) print(predictions) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] # print("predictions") print(best_class_indices, ' with accuracy ', best_class_probabilities) except: continue # print(best_class_probabilities) if best_class_probabilities > 0.80: cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 print('Result Indices: ', best_class_indices[0]) print(HumanNames) for H_i in HumanNames: if HumanNames[ best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] append_name_to_attendance(result_names) cv2.putText( frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) else: text_x = bb[i][0] text_y = bb[i][3] + 20 print('unknown') cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) cv2.putText(frame, "", (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=2) #boxing face else: print('Alignment Failure') # c+=1 cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): #print('Final consolidated result is :- ', x,students) cv2.destroyAllWindows() write_data_to_xcel() xcel_2.send_multiple_mail() break video_capture.release() cv2.destroyAllWindows()
def main(): global face_detected global save_pic global ii model_path = "models/20170511-185253.pb" # classifier_output_path = "/mnt/softwares/acv_project_code/Code/classifier_rf1_team.pkl" classifier_output_path = "models/classifier_rf4.pkl" #classifier_output_path = "/mnt/softwares/acv_project_code/Code/classfier_path/classifier_svm.pkl" with gfile.FastGFile(model_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") gpu_memory_fraction = 0.5 with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess1 = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) # sess1 = tf.Session(config=tf.ConfigProto(device_count = {'GPU': 0})) with sess1.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess1, None) model, class_names = pickle.load(open(classifier_output_path, 'rb'), encoding='latin1') #cap = cv2.VideoCapture(0) #cap = cv2.VideoCapture("rtsp://*****:*****@10.194.2.141:554/"); #rtsp://admin:[email protected]:554/cam/realmonitor?channel=1&subtype=1 #cap_2 = cv2.VideoCapture("rtsp://*****:*****@10.194.2.51:554/") #cap_2 = cv2.VideoCapture("rtsp://*****:*****@[email protected]:554/cam/realmonitor?channel=1&subtype=0") # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') # cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/lokender.mp4') cap_2 = cv2.VideoCapture("rtsp://*****:*****@10.194.2.51:554/")# cap = cv2.VideoCapture('/home/lokender/Downloads/orig_faces/videos/nayeem.mp4') fno = 0 det_name = [] det_prob =[] bbs = [] i = 0 while (~(cv2.waitKey(1) & 0xFF == ord('q'))): #print(time.strftime("%H:%M:%S")) ''' # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) ret, image1 = cap.read() image2 = cv2.resize(image1, (320, 240)) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor img = image2[:, :, 0:3] print('it - 1') bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) print('it - 2') nrof_faces = bounding_boxes.shape[0] print(nrof_faces) if nrof_faces == 1: path = os.path.join(os.getcwd(), folder, str(i)+'.jpg') cv2.imwrite(path, image1) i = i+1 face_detected = True print("taking first imagae_" + str(i)) cv2.imshow('fr', image2) fno = fno + 1 ''' # image2 = cv2.imread("/home/lokender/Downloads/T1/both/IMG_20171115_150720.jpg") # image2.set_shape((480, 640, 3)) # image2= cv2.resize(image2, (640,480)) ret, image3 = cap_2.read() image4 = cv2.resize(image3, (600, 400)) minsize = 20 # minimum size of face threshold = [0.5, 0.6, 0.7] # three steps's threshold factor = 0.2 #0.709 # scale factor img = image4[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] #print(nrof_faces) if nrof_faces == 1: save_pic = save_pic + 1 print ("save_pic: " + str(save_pic)) if save_pic == 10: #path = os.path.join(os.getcwd(), folder_2, str(i)+'.jpg') name_time = int(time.time()) print(name_time) if ii <= 5: ii+=1 else: ii = 0 name_time=str(name_time)+str(ii) path = os.path.join(folder_path,str(name_time)+'.jpg') print(path) cv2.imwrite(path, image3) #i = i+1 face_detected = True print("taking first imagae")#_" + str(i)) save_pic = 0 #cv2.resize(image4,(600,400)) cv2.imshow('Room Camera Live Feed', image4) fno = fno + 1 #cap.release() cap_2.release() cv2.destroyAllWindows()
def load_and_align_data( img, margin, minsize, threshold, factor, pnet, rnet, onet): # Pretrained Facenet model we use expects 160x160 image_size = 160 # Allow MTCNN to detect multiple faces in one image detect_multiple_faces = True # detect_face function expects RGB format img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Filter out any layers beyond first three (RGB) if img.shape[2] > 3: img = img[:, :, 0:3] img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] faces = list() bboxes = list() if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = cv2.resize( cropped, (image_size, image_size), interpolation=cv2.INTER_LINEAR) faces.append(prewhiten(scaled)) bboxes.append((bb[1], bb[2], bb[3], bb[0])) return faces, bboxes else: #print('Warning - No faces detected in image') return faces, bboxes
def main(): parser = argparse.ArgumentParser() parser.add_argument('--path', help = 'Path of the video you want to test on.', default = 0) args = parser.parse_args() MINSIZE = 20 THRESHOLD = [0.6, 0.7, 0.7] FACTOR = 0.709 IMAGE_SIZE = 182 INPUT_IMAGE_SIZE = 160 CLASSIFIER_PATH = 'data/Friends.pkl' VIDEO_PATH = args.path FACENET_MODEL_PATH = 'data/20180402-114759/20180402-114759.pb' # print(facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))) # Load The Custom Classifier with open(CLASSIFIER_PATH, 'rb') as file: model, class_names = pickle.load(file) print("Custom Classifier, Successfully loaded") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): # Load the model print('Loading feature extraction model') facenet.load_model(FACENET_MODEL_PATH) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] pnet, rnet, onet = detect_face.create_mtcnn(sess, None) people_detected = set() person_detected = collections.Counter() cap = cv2.VideoCapture(VIDEO_PATH) while(cap.isOpened()): ret, frame = cap.read() bounding_boxes, _ = detect_face.detect_face(frame, MINSIZE, pnet, rnet, onet, THRESHOLD, FACTOR) faces_found = bounding_boxes.shape[0] try: if faces_found > 0: det = bounding_boxes[:, 0:4] bb = np.zeros((faces_found, 4), dtype=np.int32) for i in range(faces_found): bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] cropped = frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :] scaled = cv2.resize(cropped, (INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE), interpolation=cv2.INTER_CUBIC) scaled = facenet.prewhiten(scaled) scaled_reshape = scaled.reshape(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 3) feed_dict = {images_placeholder: scaled_reshape, phase_train_placeholder: False} emb_array = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] best_name = class_names[best_class_indices[0]] print("Name: {}, Probability: {}".format(best_name, best_class_probabilities)) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) text_x = bb[i][0] text_y = bb[i][3] + 20 name = class_names[best_class_indices[0]] if (best_class_probabilities > 0.15) else "Unknown" cv2.putText(frame, name, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) cv2.putText(frame, str(round(best_class_probabilities[0], 3)), (text_x, text_y+17), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), thickness=1, lineType=2) person_detected[best_name] += 1 except Exception as e: print(e) cv2.imshow('Face Recognition',frame) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) image_dir = os.path.join(args.input_dir, 'facescrub') dataset = face_image.get_dataset('facescrub', image_dir) print('dataset size', len(dataset)) bbox = {} for label_file in ['facescrub_actors.txt', 'facescrub_actresses.txt']: label_file = os.path.join(args.input_dir, label_file) pp = 0 for line in open(label_file, 'r'): pp+=1 if pp==1: continue vec = line.split("\t") key = (vec[0], int(vec[2])) value = [int(x) for x in vec[4].split(',')] bbox[key] = value print('bbox size', len(bbox)) valid_key = {} json_data = open(os.path.join(args.input_dir, 'facescrub_uncropped_features_list.json')).read() json_data = json.loads(json_data)['path'] for _data in json_data: key = _data.split('/')[-1] pos = key.rfind('.') if pos<0: print(_data) else: key = key[0:pos] keys = key.split('_') #print(key) if len(keys)!=2: print('err', key, _data) continue #assert len(keys)==2 key = (keys[0], int(keys[1])) valid_key[key] = 1 #print(key) print('valid keys', len(valid_key)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor image_size = [112,96] image_size = [112,112] src = np.array([ [30.2946, 51.6963], [65.5318, 51.5014], [48.0252, 71.7366], [33.5493, 92.3655], [62.7299, 92.2041] ], dtype=np.float32 ) if image_size[1]==112: src[:,0] += 8.0 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros( (5,), dtype=np.int32) for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue #print(image_path) filename = os.path.splitext(os.path.split(image_path)[1])[0] _paths = fimage.image_path.split('/') print(fimage.image_path) a,b = _paths[-2], _paths[-1] pb = b.rfind('.') bname = b[0:pb] pb = bname.rfind('_') body = bname[(pb+1):] img_id = int(body) key = (a, img_id) if not key in valid_key: continue #print(b, img_id) assert key in bbox fimage.bbox = bbox[key] try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] tb = bname.replace(' ','_')+".png" ta = a.replace(' ','_') target_dir = os.path.join(args.output_dir, ta) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, tb) warped = None if fimage.landmark is not None: dst = fimage.landmark.astype(np.float32) tform = trans.SimilarityTransform() tform.estimate(dst, src[0:3,:]*1.5+image_size[0]*0.25) M = tform.params[0:2,:] warped0 = cv2.warpAffine(img,M,(image_size[1]*2,image_size[0]*2), borderValue = 0.0) _minsize = image_size[0] bounding_boxes, points = detect_face.detect_face(warped0, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: bindex = 0 det = bounding_boxes[bindex,0:4] #points need to be transpose, points = points.reshape( (5,2) ).transpose() dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(warped0,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[0]+=1 #assert fimage.bbox is not None if warped is None and fimage.bbox is not None: _minsize = img.shape[0]//4 bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) if bounding_boxes.shape[0]>0: det = bounding_boxes[:,0:4] bindex = -1 index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>0.3: bindex = index2[1] if bindex>=0: dst = points[:, bindex].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[1]+=1 #print('1',target_file,index2[0]) if warped is None and fimage.bbox is not None: bb = fimage.bbox #croped = img[bb[1]:bb[3],bb[0]:bb[2],:] bounding_boxes, points = detect_face.detect_face_force(img, bb, pnet, rnet, onet) assert bounding_boxes.shape[0]==1 _box = bounding_boxes[0] if _box[4]>=0.3: dst = points[:, 0].reshape( (2,5) ).T tform = trans.SimilarityTransform() tform.estimate(dst, src) M = tform.params[0:2,:] warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0) nrof[2]+=1 #print('2',target_file) if warped is None: roi = np.zeros( (4,), dtype=np.int32) roi[0] = int(img.shape[1]*0.06) roi[1] = int(img.shape[0]*0.06) roi[2] = img.shape[1]-roi[0] roi[3] = img.shape[0]-roi[1] if fimage.bbox is not None: bb = fimage.bbox h = bb[3]-bb[1] w = bb[2]-bb[0] x = bb[0] y = bb[1] #roi = np.copy(bb) _w = int( (float(h)/image_size[0])*image_size[1] ) x += (w-_w)//2 #x = min( max(0,x), img.shape[1] ) x = max(0,x) xw = x+_w xw = min(xw, img.shape[1]) roi = np.array( (x, y, xw, y+h), dtype=np.int32) nrof[3]+=1 else: nrof[4]+=1 #print('3',bb,roi,img.shape) #print('3',target_file) warped = img[roi[1]:roi[3],roi[0]:roi[2],:] #print(warped.shape) warped = cv2.resize(warped, (image_size[1], image_size[0])) bgr = warped[...,::-1] cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname)) text_file.write(oline)
def process(args, minsize, pnet, rnet, onet, threshold, factor, im_quene): # global count nrof_images_total = 0 nrof = np.zeros((5, ), dtype=np.int32) # with open(output_filename, "w") as text_file: # while not im_quene.empty(): # im_i = im_quene.get() # print(im_i) while not im_quene.empty(): # 拿到一个文件夹,遍历文件夹,获取里面每张图片得到img_path fimage = im_quene.get() for img_name in os.listdir(fimage): image_path = os.path.join(fimage, img_name) # print('process %d'%count) # count+=1 if nrof_images_total % 100 == 0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 # if nrof_images_total<950000: # continue if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] # print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) # text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] # _paths = fimage.image_path.split('/') a, b = fimage.split('/')[-1], img_name target_dir = os.path.join(args.output_dir, a) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, b) _minsize = minsize _bbox = None _landmark = None bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) bindex = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering _bbox = bounding_boxes[bindex, 0:4] _landmark = points[:, bindex].reshape((2, 5)).T nrof[0] += 1 else: nrof[1] += 1 warped = face_preprocess.preprocess(img, bbox=_bbox, landmark=_landmark, image_size=args.image_size) bgr = warped[..., ::-1] # print(bgr.shape) cv2.imwrite(target_file, bgr)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) datamap = {} pp = 0 datasize = 0 verr = 0 for line in open(args.input_dir+"_clean_list.txt", 'r'): pp+=1 if pp%10000==0: print('loading list', pp) line = line.strip()[2:] if not line.startswith('m.'): continue vec = line.split('/') assert len(vec)==2 #print(line) person = vec[0] img = vec[1] try: img_id = int(img.split('.')[0]) except ValueError: #print('value error', line) verr+=1 continue if not person in datamap: labelid = len(datamap) datamap[person] = [labelid, {img_id : 1}] else: datamap[person][1][img_id] = 1 datasize+=1 print('dataset size', args.name, datasize) print('dataset err', verr) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for line in open(args.input_dir, 'r'): vec = line.strip().split() person = vec[0] img_id = int(vec[1]) v = datamap.get(person, None) if v is None: continue if not img_id in v[1]: continue labelid = v[0] img_str = base64.b64decode(vec[-1]) nparr = np.fromstring(img_str, np.uint8) img = cv2.imdecode(nparr, cv2.CV_LOAD_IMAGE_COLOR) img = img[...,::-1] #to rgb if nrof_images_total%100==0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 target_dir = os.path.join(output_dir, person) if not os.path.exists(target_dir): os.makedirs(target_dir) target_path = os.path.join(target_dir, "%d.jpg"%img_id) _minsize = minsize fimage = edict() fimage.bbox = None fimage.image_path = target_path fimage.classname = str(labelid) if fimage.bbox is not None: _bb = fimage.bbox _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces>0: assert(bounding_boxes.shape[0]==points.shape[1]) det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou>index2[0]: index2[0] = iou index2[1] = i if index2[0]>-0.3: bindex = index2[1] nrof_iou3+=1 if bindex<0: bounding_boxes, points = detect_face.detect_face_force(img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force+=1 if bindex>=0: det = bounding_boxes[:,0:4] det = det[bindex,:] points = points[:, bindex] landmark = points.reshape((2,5)).T #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) bb = det points = list(points.flatten()) assert(len(points)==10) warped = face_preprocess.preprocess(img, bbox=bb, landmark = landmark, image_size=args.image_size) misc.imsave(target_path, warped) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d' % (1,fimage.image_path, int(fimage.classname)) #oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % (0,fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) #oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n"%oline) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction, allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w", encoding='utf-8') as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def main(args): output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset(args.name, args.input_dir) print('dataset size', args.name, len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 100 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor if args.name == 'lfw' or args.name == 'webface' or args.name == 'vgg': minsize = 20 threshold = [0.6, 0.7, 0.9] factor = 0.85 if args.name == 'ytf': minsize = 20 threshold = [0.6, 0.7, 0.4] factor = 0.85 print(minsize) print(threshold) print(factor) # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 nrof_changed = 0 nrof_iou3 = 0 nrof_force = 0 for fimage in dataset: if nrof_images_total % 100 == 0: print("Processing %d, (%d)" % (nrof_images_total, nrof_successfully_aligned)) nrof_images_total += 1 image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)' % image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:, :, 0:3] _minsize = minsize if fimage.bbox is not None: _bb = fimage.bbox _minsize = min([ _bb[2] - _bb[0], _bb[3] - _bb[1], img.shape[0] // 2, img.shape[1] // 2 ]) bounding_boxes, points = detect_face.detect_face( img, _minsize, pnet, rnet, onet, threshold, factor) bindex = -1 nrof_faces = bounding_boxes.shape[0] if fimage.bbox is None and nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) bindex = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering if fimage.bbox is not None: if nrof_faces > 0: assert (bounding_boxes.shape[0] == points.shape[1]) det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] index2 = [0.0, 0] for i in xrange(det.shape[0]): _det = det[i] iou = IOU(fimage.bbox, _det) if iou > index2[0]: index2[0] = iou index2[1] = i if index2[0] > -0.3: bindex = index2[1] nrof_iou3 += 1 if bindex < 0: bounding_boxes, points = detect_face.detect_face_force( img, fimage.bbox, pnet, rnet, onet) bindex = 0 nrof_force += 1 #if bindex<0: # _img = img[fimage.bbox[1]:fimage.bbox[3], fimage.bbox[0]:fimage.bbox[2],:] # woffset = fimage.bbox[0] # hoffset = fimage.bbox[1] # _minsize = min( [_img.shape[0]//3, _img.shape[1]//3] ) # bounding_boxes, points = detect_face.detect_face(_img, _minsize, pnet, rnet, onet, [0.6,0.7,0.01], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # #print(points.shape) # #assert(nrof_faces>0) # bounding_boxes[:,0]+=woffset # bounding_boxes[:,2]+=woffset # bounding_boxes[:,1]+=hoffset # bounding_boxes[:,3]+=hoffset # points[0:5,:] += woffset # points[5:10,:] += hoffset # bindex = 0 # score = bounding_boxes[bindex,4] # print(score) # if score<=0.0: # bindex = -1 # else: # nrof_force+=1 #if bindex<0: # _bb = fimage.bbox # _minsize = min( [_bb[2]-_bb[0], _bb[3]-_bb[1], img.shape[0]//2, img.shape[1]//2] ) # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.1], factor) # nrof_faces = bounding_boxes.shape[0] # print(nrof_faces) # if nrof_faces>0: # bindex = 0 #if fimage.bbox is not None and bounding_boxes.shape[0]==0: # bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, [0.6,0.7,0.3], factor) #print(bounding_boxes.shape, points.shape) #print(nrof_faces, points.shape) if bindex >= 0: det = bounding_boxes[:, 0:4] det = det[bindex, :] points = points[:, bindex] #points need to be transpose, points = points.reshape( (5,2) ).transpose() det = np.squeeze(det) #bb = np.zeros(4, dtype=np.int32) #bb[0] = np.maximum(det[0]-args.margin/2, 0) #bb[1] = np.maximum(det[1]-args.margin/2, 0) #bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) #bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) bb = det #print(points.shape) points = list(points.flatten()) assert (len(points) == 10) #cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] #scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') #misc.imsave(output_filename, scaled) nrof_successfully_aligned += 1 oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\t' % ( 0, fimage.image_path, int( fimage.classname), bb[0], bb[1], bb[2], bb[3]) oline += '\t'.join([str(x) for x in points]) text_file.write("%s\n" % oline) else: print('Unable to align "%s", no face detected' % image_path) if args.force > 0: if fimage.bbox is None: oline = '%d\t%s\t%d\n' % (0, fimage.image_path, int(fimage.classname)) else: bb = fimage.bbox oline = '%d\t%s\t%d\t%d\t%d\t%d\t%d\n' % ( 0, fimage.image_path, int(fimage.classname), bb[0], bb[1], bb[2], bb[3]) text_file.write(oline) #text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned) print('Number of changed: %d' % nrof_changed) print('Number of iou3: %d' % nrof_iou3) print('Number of force: %d' % nrof_force)
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename+'.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:,:,0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces>1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering det_arr.append(det[index,:]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-args.margin/2, 0) bb[1] = np.maximum(det[1]-args.margin/2, 0) bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext(output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) else: output_filename_n = "{}{}".format(filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def All(img_path): modeldir = './model/20170511-185253.pb' classifier_filename = './class/classifier.pkl' npy = './npy' train_img = "./train_img" with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 frame_interval = 3 batch_size = 5000 image_size = 182 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) c = 0 prevTime = 0 frame = cv2.imread(img_path, 0) frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5) curTime = time.time() + 1 timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] # inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): continue cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face text_x = bb[i][0] text_y = bb[i][3] + 20 for H_i in HumanNames: if HumanNames[best_class_indices[0]] == H_i: result_names = HumanNames[ best_class_indices[0]] return result_names, img_path else: return 'Out of Face'
def main(args): #facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = face_image.get_dataset('lfw', args.input_dir) print('dataset size', 'lfw', len(dataset)) print('Creating networks and loading parameters') with tf.Graph().as_default(): #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) #sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 threshold = [0.6,0.7,0.9] factor = 0.85 # Add a random key to the filename to allow alignment using multiple processes #random_key = np.random.randint(0, high=99999) #bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) #output_filename = os.path.join(output_dir, 'faceinsight_align_%s.lst' % args.name) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) output_filename = os.path.join(args.output_dir, 'lst') with open(output_filename, "w") as text_file: nrof_images_total = 0 nrof = np.zeros( (5,), dtype=np.int32) for fimage in dataset: if nrof_images_total%100==0: print("Processing %d, (%s)" % (nrof_images_total, nrof)) nrof_images_total += 1 #if nrof_images_total<950000: # continue image_path = fimage.image_path if not os.path.exists(image_path): print('image not found (%s)'%image_path) continue filename = os.path.splitext(os.path.split(image_path)[1])[0] #print(image_path) try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim<2: print('Unable to align "%s", img dim error' % image_path) #text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = to_rgb(img) img = img[:,:,0:3] _paths = fimage.image_path.split('/') a,b = _paths[-2], _paths[-1] target_dir = os.path.join(args.output_dir, a) if not os.path.exists(target_dir): os.makedirs(target_dir) target_file = os.path.join(target_dir, b) _minsize = minsize _bbox = None _landmark = None bounding_boxes, points = detect_face.detect_face(img, _minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) img_center = img_size / 2 offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets,2.0),0) bindex = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering _bbox = bounding_boxes[bindex, 0:4] _landmark = points[:, bindex].reshape( (2,5) ).T nrof[0]+=1 else: nrof[1]+=1 warped = face_preprocess.preprocess(img, bbox=_bbox, landmark = _landmark, image_size=args.image_size) bgr = warped[...,::-1] #print(bgr.shape) cv2.imwrite(target_file, bgr) oline = '%d\t%s\t%d\n' % (1,target_file, int(fimage.classname)) text_file.write(oline)
fold = int(pair[2]) paths = [str(pair[0][0]), str(pair[1][0])] for path in paths: img_path = os.path.join(args.data_dir, 'CFP', path) txt_path = os.path.join(args.data_dir, 'CFP', path[0:-3]+"txt") landmark = [] for line in open(txt_path, 'r'): vec = line.strip().split(',') x = np.array( [float(x) for x in vec], dtype=np.float32) landmark.append(x) landmark = np.array(landmark, dtype=np.float32) #print(landmark.shape) img = misc.imread(img_path) _bbox = None _landmark = None bounding_boxes, points = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: nrof[0]+=1 else: bounding_boxes, points = detect_face.detect_face_force(img, minsize, pnet, rnet, onet) nrof_faces = bounding_boxes.shape[0] if nrof_faces>0: nrof[1]+=1 else: nrof[2]+=1 if nrof_faces>0: det = bounding_boxes[:,0:4] img_size = np.asarray(img.shape)[0:2] bindex = 0 if nrof_faces>1: