def find_faces(self, image): faces = [] bounding_boxes, _ = align_detect_face.detect_face( image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) for bb in bounding_boxes: face = Face() face.container_image = image face.bounding_box = np.zeros(4, dtype=np.int32) img_size = np.asarray(image.shape)[0:2] face.bounding_box[0] = np.maximum( bb[0] - self.face_crop_margin / 2, 0) face.bounding_box[1] = np.maximum( bb[1] - self.face_crop_margin / 2, 0) face.bounding_box[2] = np.minimum( bb[2] + self.face_crop_margin / 2, img_size[1]) face.bounding_box[3] = np.minimum( bb[3] + self.face_crop_margin / 2, img_size[0]) cropped = image[face.bounding_box[1]:face.bounding_box[3], face.bounding_box[0]:face.bounding_box[2], :] face.image = misc.imresize( cropped, (self.face_crop_size, self.face_crop_size), interp='bilinear') faces.append(face) return faces
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) # noqa: E501 sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) nrof_samples = len(image_paths) img_list = [None] * nrof_samples for i in xrange(nrof_samples): print(image_paths[i]) img = imageio.imread(os.path.expanduser(image_paths[i])) img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list[i] = prewhitened images = np.stack(img_list) return images
def detect(self, image): """Detect a face using the given image.""" total_boxes, _ = detect_face.detect_face(image, self.minsize, *self.funs, threshold=self.threshold, factor=self.factor) return total_boxes
def get_face_locations(self, image, model=None): bounding_boxes, _ = detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) bounding_boxes = bounding_boxes[:, 0:4] return [[int(top), int(right), int(bottom), int(left)] for (left, top, right, bottom) in bounding_boxes]
def run(self): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 44 image_size = 160 gpu_memory_fraction = 1.0 with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): p_net, r_net, o_net = detect_face.create_mtcnn(sess, None) while True: img = self.inq.get() img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face( img, minsize, p_net, r_net, o_net, threshold, factor) src = img.copy() dist_white_ends = [] for num in range(bounding_boxes.shape[0]): det = np.squeeze(bounding_boxes[num, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] if (bb[0] >= 0) & (bb[0] < src.shape[1]): src[bb[1]:bb[3], bb[0], :] = 255 else: src[bb[1]:bb[3], src.shape[1] - 1, :] = 255 if (bb[2] >= 0) & (bb[2] < src.shape[1]): src[bb[1]:bb[3], bb[2], :] = 255 else: src[bb[1]:bb[3], src.shape[1] - 1, :] = 255 if (bb[1] >= 0) & (bb[1] < src.shape[0]): src[bb[1], bb[0]:bb[2], :] = 255 else: src[src.shape[0] - 1, bb[0]:bb[2], :] = 255 if (bb[3] >= 0) & (bb[3] < src.shape[0]): src[bb[3], bb[0]:bb[2], :] = 255 else: src[src.shape[0] - 1, bb[0]:bb[2], :] = 255 pil_im = Image.fromarray(cropped) aligned = pil_im.resize((image_size, image_size), Image.BILINEAR) aligned = np.array(aligned) pre_whitened = facenet.prewhiten(aligned) dist_white_ends.append(pre_whitened) self.out_q.put({"src": src, "dst": dist_white_ends})
def detect(self, frame): import facenet.src.align.detect_face as facenet # lazy loading faces, _ = facenet.detect_face(frame, self._minfacesize, self._pnet, self._rnet, self._onet, self._threshold, self._factor) faces_updated = [] for face in faces: face = face.astype("int") (x, y, w, h) = (max(face[0], 0), max(face[1],0), min(face[2],frame.shape[1])-max(face[0],0), min(face[3],frame.shape[0])-max(face[1],0) ) faces_updated.append((x, y, w, h)) return faces_updated
def getBoundingBoxes(img, minsize, threshold, factor): with tf.Graph().as_default(): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) bounding_boxes, points = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) for (x1, y1, x2, y2, acc) in bounding_boxes: rectangle = cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) return rectangle, bounding_boxes
def look_for_faces(): while True: img = cam.read() total_boxes, points = mtcnn.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) faces = [] logger.info(f'Found {len(total_boxes)} faces') for idx, (bounding_box, keypoints) in enumerate(zip(total_boxes, points.T)): det1 = np.squeeze(total_boxes[0, 0:4]) x, y, w, h = (int(bounding_box[0]), int(bounding_box[1]), int(bounding_box[2] - bounding_box[0]), int(bounding_box[3] - bounding_box[1])) face = { 'box': [x, y, w, h], 'confidence': bounding_box[-1], 'keypoints': { 'left_eye': (int(keypoints[0]), int(keypoints[5])), 'right_eye': (int(keypoints[1]), int(keypoints[6])), 'nose': (int(keypoints[2]), int(keypoints[7])), 'mouth_left': (int(keypoints[3]), int(keypoints[8])), 'mouth_right': (int(keypoints[4]), int(keypoints[9])), } } faces.append(face) logger.info( f"Found face with a confidence of: {face['confidence']}") # Draw rectangle around face on the original image x1, y1, x2, y2 = (x, y, x + w, y + h) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 255), 2) # Show cropped face image face_img = img[y1:y2, x1:x2] if face_img.shape[0] > 0 and face_img.shape[1] > 0: logger.debug(face_img.shape) cv2.imshow(f'cropped face {idx}', face_img) face_img_2 = crop_and_align_image(img, bounding_box, face['confidence']) if face_img_2.shape[0] > 0 and face_img_2.shape[1] > 0: logger.debug(face_img_2.shape) cv2.imshow(f'Aligned face {idx}', face_img_2) cv2.imshow('image', img) k = cv2.waitKey(200) & 0xff # Press 'ESC' for exiting video if k == 27: break cam.release() cv2.destroyAllWindows()
def ReadDetectAndEncode(self, imgPath, sess, n_jitters=0): img = misc.imread(imgPath, mode='RGB') bbs, landmarks = detect_face.detect_face(img, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) if len(bbs) != 1: return [] img_list = [None] prewhitened = facenet.prewhiten(img) img_list[0] = prewhitened # Fixed normalization controlArray = np.expand_dims(np.zeros(1, dtype=np.int32), 1) controlArray += np.expand_dims(np.ones(1, dtype=np.int32), 1) * facenet.FIXED_STANDARDIZATION # Run forward pass to calculate embeddings feed_dict = { self.images_placeholder: img_list, self.phase_train_placeholder: False, self.control_placeholder: controlArray } img_encoding = sess.run(self.embeddings, feed_dict=feed_dict) if n_jitters: imgEncodings = img_encoding img = dlib.load_rgb_image(imgPath) augmented_images = dlib.jitter_image(img, num_jitters=n_jitters) for augmented_image in augmented_images: prewhitened = facenet.prewhiten(augmented_image) img_list[0] = prewhitened # Run forward pass to calculate embeddings feed_dict = { self.images_placeholder: img_list, self.phase_train_placeholder: False, self.control_placeholder: controlArray } img_encoding = sess.run(self.embeddings, feed_dict=feed_dict) imgEncodings = np.concatenate((imgEncodings, img_encoding), axis=0) return np.average(imgEncodings, axis=0) return img_encoding[0]
def align(image_paths, image_size=160, margin=32, gpu_memory_fraction=1.0): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) aligned_indices = [] aligned_images = [] #aligned_images = [None] * len(image_paths) # aligned_image_paths = [] for i, image_path in enumerate(image_paths): # print('%1d: %s' % (i, image_path)) try: img = misc.imread(str(image_path)) img = img[:, :, 0:3] # apply for 32bit image img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) == 0: print('No bounding boxes: {}'.format(image_path)) continue det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp='bilinear') # prewhitened = facenet.prewhiten(aligned) # do in the FaceNetModel aligned_indices.append(i) aligned_images.append(aligned) #img_list[i] = prewhitened # aligned_image_paths.append(image_path) except: print('Cannot align: {}'.format(image_path)) if 0 < len(aligned_images): aligned_images = np.stack(aligned_images) return aligned_images, aligned_indices
def get_faces_live(img, pnet, rnet, onet, image_size): """Detects multiple human faces live from web camera frame. Args: img: web camera frame. pnet: proposal net, first stage of the MTCNN face detection. rnet: refinement net, second stage of the MTCNN face detection. onet: output net, third stage of the MTCNN face detection. image_size: (int) required square image size. Returns: faces: List containing the cropped human faces. rects: List containing the rectangle coordinates to be drawn around each human face. """ # Default constants from the FaceNet repository implementation of the MTCNN minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 margin = 44 input_image_size = image_size faces = [] rects = [] img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face(img=img, minsize=minsize, pnet=pnet, rnet=rnet, onet=onet, threshold=threshold, factor=factor) # If human face(s) is/are detected: if not len(bounding_boxes) == 0: for face in bounding_boxes: if face[4] > 0.50: det = np.squeeze(face[0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] resized = imresize(arr=cropped, size=(input_image_size, input_image_size), mode='RGB') faces.append(resized) rects.append([bb[0], bb[1], bb[2], bb[3]]) return faces, rects
def get_face(img, pnet, rnet, onet, image_size): """Crops an image containing a single human face from the input image if it exists; using a Multi-Task Cascading Convolutional neural network, then resizes the image to the required image size: default = (160 x 160 x 3). If no face is detected, it returns a null value. Args: img: (numpy array) image file pnet: proposal net, first stage of the MTCNN face detection rnet: refinement net, second stage of the MTCNN face detection onet: output net, third stage of the MTCNN face detection image_size: (int) required square image size Returns: face_img: an image containing a face of image_size: default = (160 x 160 x 3) if no human face is detected a None value is returned instead. """ # Default constants from the FaceNet repository implementation of the MTCNN minsize = 20 threshold = [0.6, 0.7, 0.7] factor = 0.709 margin = 44 input_image_size = image_size img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face(img=img, minsize=minsize, pnet=pnet, rnet=rnet, onet=onet, threshold=threshold, factor=factor) if not len(bounding_boxes) == 0: for face in bounding_boxes: det = np.squeeze(face[0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] face_img = imresize(arr=cropped, size=(input_image_size, input_image_size), mode='RGB') return face_img else: return None
def _detectFaces(self, frames, out_dir): start = time.time() discarded = 0 alignedFrames = [] for i, frame in frames: bounding_boxes, _ = detect_face.detect_face( frame, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) nrof_faces = bounding_boxes.shape[0] #print 'faces,shape:', nrof_faces, np.array(points).shape # points are (10, n) array for n faces # resize to embedding_size according to bounding boxes if nrof_faces > 0: dets = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] for j, det in enumerate(dets): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - self.margin / 2, 0) bb[1] = np.maximum(det[1] - self.margin / 2, 0) bb[2] = np.minimum(det[2] + self.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + self.margin / 2, img_size[0]) cropped = frame[bb[1]:bb[3], bb[0]:bb[2], :] # Filtering based on blurriness deactivated as it didnt perform quite good #if self._sum_of_magnitude(gray) < self.sharpness_threshold: # discarded +=1 # continue # if the cropped image is blurred, discard scaled = misc.imresize(cropped, (self.image_size, self.image_size), interp='bilinear') #scaled = cv2.resize(cropped, (self.image_size, self.image_size)) scaled = self._preprocess(scaled) # PREWHITEN ETC alignedFrames.append((i, j, scaled)) #misc.imsave(os.path.join(out_dir, 'test{}_{}.png'.format(i, j)), scaled) processTime = time.time() - start print 'face detection for', len( frames), 'took', processTime, 's, ~', processTime / len( frames), 's; found', len(alignedFrames) return alignedFrames
def detect_faces(image_paths, image_size=160, margin=44): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor gpu_memory_fraction = 0.5 print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = facenet_detect_face.create_mtcnn(sess, None) if isinstance(image_paths, str): image_paths = [image_paths] result_list = [] tmp_image_paths = copy.copy(image_paths) for image in tmp_image_paths: img = imageio.imread(os.path.expanduser(image), pilmode='RGB') bounding_boxes, points = facenet_detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: print("can't detect face, remove ", image) source_img = Image.open(os.path.expanduser(image)) for i in range(bounding_boxes.shape[0]): draw = ImageDraw.Draw(source_img) draw.rectangle(bounding_boxes[i,0:4].tolist(), outline="lime") font_location = bounding_boxes[i,0:2] - np.array([0, 30]) confidence = "{:.6f}".format(bounding_boxes[i,4] * 100) draw.text(font_location, str(confidence) + "%", fill="white", font=ImageFont.truetype("arial", 20)) for j in range(5): point_x = points[j,i] point_y = points[j+5,i] r = 2 draw.ellipse((point_x-r, point_y-r, point_x+r, point_y+r), fill="lime") source_img.save(os.path.splitext(os.path.expanduser(image))[0] + "_result.jpg", "JPEG") return result_list
def align(self, img, margin=44, image_size=160): img = img[:, :, 0:3] bounding_boxes, _ = df.detect_face(img, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) nrof_faces = bounding_boxes.shape[0] #bb = np.zeros(4, dtype=np.int32) if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') return scaled else: return None '''
def find_faces(self, img: Array3D, det_prob_threshold: float = None) -> List[BoundingBoxDTO]: if det_prob_threshold is None: det_prob_threshold = self.det_prob_threshold assert 0 <= det_prob_threshold <= 1 scaler = ImgScaler(self.IMG_LENGTH_LIMIT) img = scaler.downscale_img(img) fdn = self._face_detection_nets detect_face_result = detect_face.detect_face( img, self.FACE_MIN_SIZE, fdn.pnet, fdn.rnet, fdn.onet, [self.det_threshold_a, self.det_threshold_b, self.det_threshold_c], self.SCALE_FACTOR) img_size = np.asarray(img.shape)[0:2] bounding_boxes = [] detect_face_result = list( zip(detect_face_result[0], detect_face_result[1].transpose())) for result_item, landmarks in detect_face_result: result_item = np.squeeze(result_item) margin = self.BOX_MARGIN / 2 box = BoundingBoxDTO( x_min=int(np.maximum(result_item[0] - margin, 0)), y_min=int(np.maximum(result_item[1] - margin, 0)), x_max=int(np.minimum(result_item[2] + margin, img_size[1])), y_max=int(np.minimum(result_item[3] + margin, img_size[0])), np_landmarks=landmarks.reshape(2, 5).transpose(), probability=result_item[4]) logger.debug(f"Found: {box}") bounding_boxes.append(box) filtered_bounding_boxes = [] for box in bounding_boxes: box = box.scaled(scaler.upscale_coefficient) if box.probability <= det_prob_threshold: logger.debug( f'Box filtered out because below threshold ({det_prob_threshold}): {box}' ) continue filtered_bounding_boxes.append(box) return filtered_bounding_boxes
def align_face(pic_path): if os.path.exists(pic_path): try: img = misc.imread(pic_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(pic_path, e) print(errorMessage) if img.ndim < 2: print('Unable to align "%s"' % pic_path) return if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') misc.imsave(pic_path, scaled)
def find_faces(self, image, frame_downscale): faces = [] # finding faces with frame downscale bounding_boxes, _ = detect_face.detect_face( image[::frame_downscale, ::frame_downscale, :], self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor, ) for bb in bounding_boxes: # updating bounding boxes wrt downscaling bb = bb * frame_downscale face = Face() face.container_image = image face.bounding_box = np.zeros(4, dtype=np.int32) img_size = np.asarray(image.shape)[0:2] face.bounding_box[0] = np.maximum(bb[0] - self.face_crop_margin / 2, 0) face.bounding_box[1] = np.maximum(bb[1] - self.face_crop_margin / 2, 0) face.bounding_box[2] = np.minimum( bb[2] + self.face_crop_margin / 2, img_size[1] ) face.bounding_box[3] = np.minimum( bb[3] + self.face_crop_margin / 2, img_size[0] ) # cropping and resizing image cropped = image[ face.bounding_box[1] : face.bounding_box[3], face.bounding_box[0] : face.bounding_box[2], :, ] face.image = misc.imresize( cropped, (self.face_crop_size, self.face_crop_size), interp="bilinear" ) faces.append(face) return faces
def detect_faces(self, image, margin=20, min_face_size=20, thresholds=(0.6, 0.7, 0.7), factor=0.709, adjust_face=False, *args, **kw): img_size = np.asarray(image.shape)[0:2] bounding_boxes, points = detect_face.detect_face( image, min_face_size, self._pnet, self._rnet, self._onet, thresholds, factor) bounding_boxes = np.round(bounding_boxes).astype(np.int) points = np.round(points).astype(np.int) faces = [] for box in bounding_boxes: x0 = np.maximum(box[0] - margin // 2, 0) y0 = np.maximum(box[1] - margin // 2, 0) x1 = np.minimum(box[2] + margin // 2, img_size[1]) y1 = np.minimum(box[3] + margin // 2, img_size[0]) face = image[y0:y1, x0:x1, :] resize_face = misc.imresize(face, self._FACE_SIZE, interp='bilinear') faces.append(resize_face) if len(faces) == 0: return None, None, None faces = np.stack(faces) if adjust_face: faces = self._adjust_faces(faces, points, *args, **kw) return faces, bounding_boxes, points
def load_and_align_data(image_paths, image_size, margin, gpu_memory_fraction): minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor print("Creating networks and loading parameters") with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) tmp_image_paths = copy.copy(image_paths) img_list = [] for image in tmp_image_paths: img = misc.imread(os.path.expanduser(image), mode="RGB") img_size = np.asarray(img.shape)[0:2] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) if len(bounding_boxes) < 1: image_paths.remove(image) print("can't detect face, remove ", image) continue det = np.squeeze(bounding_boxes[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] aligned = misc.imresize(cropped, (image_size, image_size), interp="bilinear") prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) return images
def find_faces(self, image): faces = [] bounding_boxes, _ = align_detect_face.detect_face(image, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) for bb in bounding_boxes: face = Face() face.container_image = image face.bounding_box = np.zeros(4, dtype=np.int32) img_size = np.asarray(image.shape)[0:2] face.bounding_box[0] = np.maximum(bb[0] - self.face_crop_margin / 2, 0) face.bounding_box[1] = np.maximum(bb[1] - self.face_crop_margin / 2, 0) face.bounding_box[2] = np.minimum(bb[2] + self.face_crop_margin / 2, img_size[1]) face.bounding_box[3] = np.minimum(bb[3] + self.face_crop_margin / 2, img_size[0]) cropped = image[face.bounding_box[1]:face.bounding_box[3], face.bounding_box[0]:face.bounding_box[2], :] face.image = misc.imresize(cropped, (self.face_crop_size, self.face_crop_size), interp='bilinear') faces.append(face) return faces
def _detectFaces(self, frames, out_dir): start = time.time() discarded = 0 alignedFrames = [] for i, frame in frames: bounding_boxes, _ = detect_face.detect_face(frame, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) nrof_faces = bounding_boxes.shape[0] #print 'faces,shape:', nrof_faces, np.array(points).shape # points are (10, n) array for n faces # resize to embedding_size according to bounding boxes if nrof_faces > 0: dets = bounding_boxes[:,0:4] img_size = np.asarray(frame.shape)[0:2] for j, det in enumerate(dets): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-self.margin/2, 0) bb[1] = np.maximum(det[1]-self.margin/2, 0) bb[2] = np.minimum(det[2]+self.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+self.margin/2, img_size[0]) cropped = frame[bb[1]:bb[3],bb[0]:bb[2],:] # Filtering based on blurriness deactivated as it didnt perform quite good #if self._sum_of_magnitude(gray) < self.sharpness_threshold: # discarded +=1 # continue # if the cropped image is blurred, discard scaled = misc.imresize(cropped, (self.image_size, self.image_size), interp='bilinear') #scaled = cv2.resize(cropped, (self.image_size, self.image_size)) scaled = self._preprocess(scaled) # PREWHITEN ETC alignedFrames.append((i, j, scaled)) #misc.imsave(os.path.join(out_dir, 'test{}_{}.png'.format(i, j)), scaled) processTime = time.time() - start print 'face detection for', len(frames), 'took', processTime, 's, ~', processTime/len(frames),'s; found',len(alignedFrames) return alignedFrames
def detect_mtcnn(path): ''' Face detection using facenet & tensorflow package ''' print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) img = misc.imread(path) bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) num_faces = bounding_boxes.shape[0] print('////////////{} faces founded////////////'.format(nrof_faces)) print(bounding_boxes) crop_faces = [] for face_position in bounding_boxes: face_position = face_position.astype(int) print(face_position[0:4]) cv2.rectangle(img, (face_position[0], face_position[1]), (face_position[2], face_position[3]), (0, 255, 0), 2) crop = img[face_position[1]:face_position[3], face_position[0]:face_position[2], ] crop = cv2.resize(crop, (96, 96), interpolation=cv2.INTER_CUBIC) print(crop.shape) crop_faces.append(crop) plt.imshow(crop) plt.show() plt.imshow(img) plt.show()
def detect(self, img): """ img: rgb 3 channel """ minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor bounding_boxes, _ = FaceDet.detect_face( img, minsize, self.pnet, self.rnet, self.onet, threshold, factor) num_face = bounding_boxes.shape[0] assert num_face == 1, num_face bbox = bounding_boxes[0][:4] # xy,xy margin = 32 x0 = np.maximum(bbox[0] - margin // 2, 0) y0 = np.maximum(bbox[1] - margin // 2, 0) x1 = np.minimum(bbox[2] + margin // 2, img.shape[1]) y1 = np.minimum(bbox[3] + margin // 2, img.shape[0]) x0, y0, x1, y1 = bbox = [int(k + 0.5) for k in [x0, y0, x1, y1]] cropped = img[y0:y1, x0:x1, :] scaled = misc.imresize(cropped, (160, 160), interp='bilinear') return scaled, bbox
def align_face(pic_path): if os.path.exists(pic_path): try: img = misc.imread(pic_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(pic_path, e) print(errorMessage) if img.ndim < 2: print('Unable to align "%s"' % pic_path) return if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack( [(det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det = det[index, :] det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (image_size, image_size), interp='bilinear') misc.imsave(pic_path, scaled)
sess = tf.Session() pnet_fun, rnet_fun, onet_fun = detect_face.create_mtcnn(sess, model_path=None) while(True): # Capture frame-by-frame ret, frame = cap.read() # # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) rgb_frame = frame[:, :, ::-1] # Find all the faces and face enqcodings in the frame of video total_boxes, points = detect_face.detect_face(rgb_frame, minsize=MINSIZE, pnet=pnet_fun, rnet=rnet_fun, onet=onet_fun, threshold=THRESHOLD, factor=FACTOR) #face_encodings = face_recognition.face_encodings(rgb_frame, face_locations) # Loop through each face in this frame of video for (right, top, left, bottom, prob) in total_boxes: # See if the face is a match for the known face(s) #match = face_recognition.compare_faces([obama_face_encoding], face_encoding) # Draw a box around the face cv2.rectangle(frame, (int(left), int(top)), (int(right), int(bottom)), (0, 0, 255), 2) # Draw a label with a name below the face #cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED) font = cv2.FONT_HERSHEY_DUPLEX #cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
def process_data(self, class_name, image_paths, text_file, result_file): output_class_dir = os.path.join(self.out_image_dir, class_name) tf.io.gfile.mkdir(output_class_dir) if self.random_order: random.shuffle(image_paths) for image_path in image_paths: file = os.path.split(image_path)[1] filename = os.path.splitext(file)[0] output_filename = os.path.join(output_class_dir, filename + '.png') tmp_path = os.path.join(self.tmp_dir, file) tf.io.gfile.copy(image_path, tmp_path, True) try: # img = misc.imread(tmp_path) img = imread(tmp_path) except (IOError, ValueError, IndexError) as e: error_message = '{}: {}\n'.format(image_path, e) result_file.write(error_message) else: if img.ndim < 2: result_file.write('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if self.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - self.margin / 2, 0) bb[1] = np.maximum(det[1] - self.margin / 2, 0) bb[2] = np.minimum(det[2] + self.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + self.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] # scaled = misc.imresize(cropped, (self.image_size, self.image_size), interp='bilinear') scaled = np.array( Image.fromarray(cropped).resize( (self.image_size, self.image_size), resample=Image.BILINEAR)) filename_base, file_extension = os.path.splitext( tmp_path) if self.detect_multiple_faces: tmp_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: tmp_filename_n = "{}{}".format( filename_base, file_extension) # misc.imsave(tmp_filename_n, scaled) imsave(tmp_filename_n, scaled) output_filename_n = os.path.join( output_class_dir, os.path.split(tmp_filename_n)[1]) tf.io.gfile.copy(tmp_filename_n, output_filename_n, True) text_file.write( '%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: result_file.write('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename))
# Start code from facenet/src/compare.py print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) # end code from facenet/src/compare.py for i in images: img = misc.imread(os.path.expanduser(image_dir + i)) # run detect_face from the facenet library bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) print(bounding_boxes) # for each box for (x1, y1, x2, y2, acc) in bounding_boxes: w = x2 - x1 h = y2 - y1 # plot the box using cv2 cv2.rectangle(img, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), (255, 0, 0), 2) print('Accuracy score', acc) # save a new file with the boxed face cv2.imwrite('faceBoxed' + i, img) # show the boxed face cv2.imshow('facenet is cool', img) print('Press any key to advance to the next image')
def _extractFramesToFeatures(self, video, metricFn): # We combined every step into one function to lower memory consumption by not accumulating all frames for every next step # metricFn is function for selecting frame indices frame_num = video.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) frame_rate = video.get(cv2.cv.CV_CAP_PROP_FPS) slength = int(frame_num / frame_rate) indices = metricFn(frame_num, slength) start = time.time() print 'Extracting indices', len(indices) alignedFrames = [] features = [] for i in indices: video.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, i) # sets video pointer to frame i ret, frame = video.read() # reads frame if ret: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) bounding_boxes, _ = detect_face.detect_face(frame, self.minsize, self.pnet, self.rnet, self.onet, self.threshold, self.factor) nrof_faces = bounding_boxes.shape[0] #print 'faces,shape:', nrof_faces, np.array(points).shape # points are (10, n) array for n faces # resize to embedding_size according to bounding boxes if nrof_faces > 0: dets = bounding_boxes[:,0:4] img_size = np.asarray(frame.shape)[0:2] for j, det in enumerate(dets): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0]-self.margin/2, 0) bb[1] = np.maximum(det[1]-self.margin/2, 0) bb[2] = np.minimum(det[2]+self.margin/2, img_size[1]) bb[3] = np.minimum(det[3]+self.margin/2, img_size[0]) cropped = frame[bb[1]:bb[3],bb[0]:bb[2],:] # Filtering based on blurriness deactivated as it didnt perform quite good #if self._sum_of_magnitude(gray) < self.sharpness_threshold: # discarded +=1 # continue # if the cropped image is blurred, discard scaled = misc.imresize(cropped, (self.image_size, self.image_size), interp='bilinear') #scaled = cv2.resize(cropped, (self.image_size, self.image_size)) scaled = self._preprocess(scaled) # PREWHITEN ETC #print scaled.shape, scaled.dtype alignedFrames.append((i, j, scaled)) if len(alignedFrames)>=self.batch_size: with self.session.graph.as_default(): with self.session.as_default(): # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("resnet/input:0") embeddings = tf.get_default_graph().get_tensor_by_name("resnet/embeddings:0") #phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("resnet/phase_train:0") imgs = np.array([f for (i, j, f) in alignedFrames], copy=False) feed_dict = { images_placeholder: imgs } #phase_train_placeholder: False } emb = self.session.run(embeddings, feed_dict=feed_dict) features.extend([(i, j, e) for (i, j, f), e in zip(alignedFrames, emb)]) alignedFrames = [] if len(alignedFrames) > 0: with self.session.graph.as_default(): with self.session.as_default(): # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("resnet/input:0") embeddings = tf.get_default_graph().get_tensor_by_name("resnet/embeddings:0") #phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("resnet/phase_train:0") imgs = np.array([f for (i, j, f) in alignedFrames], copy=False) feed_dict = { images_placeholder: imgs } #phase_train_placeholder: False } emb = self.session.run(embeddings, feed_dict=feed_dict) features.extend([(i, j, e) for (i, j, f), e in zip(alignedFrames, emb)]) del alignedFrames gc.collect() print total_size(features) processTime = time.time() - start print 'feature extraction took', processTime, 's, found', len(features) return features
def face_verification(img_pairs_list): model = r'facenet\src\align' model_facenet = './20170512-110547.pb' # mtcnn相关参数 minsize = 40 threshold = [0.4, 0.5, 0.6] # pnet、rnet、onet三个网络输出人脸的阈值,大于阈值则保留,小于阈值则丢弃 factor = 0.709 # scale factor # 创建mtcnn网络 with tf.Graph().as_default(): sess = tf.compat.v1.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, model) margin = 44 image_size = 160 with tf.Graph().as_default(): with tf.compat.v1.Session() as sess: # 根据模型文件载入模型 facenet.load_model(model_facenet) # 得到输入、输出等张量 images_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.compat.v1.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("phase_train:0") # 设置可视化进度条相关参数 jd = '\r %2d%%\t [%s%s]' bar_num_total = 50 total_num = len(img_pairs_list) result, dist = [], [] for i in range(len(img_pairs_list)): # 画进度条 if i % (total_num / bar_num_total) == 0 or i == total_num - 1: bar_num_alright = round(bar_num_total * i / total_num) alright = '#' * bar_num_alright not_alright = '□' * (bar_num_total - bar_num_alright) percent = (bar_num_alright / bar_num_total) * 100 print(jd % (percent, alright, not_alright), end='') # 读取一对人脸图像 img_pairs = img_pairs_list[i] img_list = [] img1 = cv2.imread(img_pairs[0]) img2 = cv2.imread(img_pairs[1]) img_size1 = np.asarray(img1.shape)[0:2] img_size2 = np.asarray(img2.shape)[0:2] # 检测该对图像中的人脸 bounding_box1, _1 = detect_face.detect_face(img1, minsize, pnet, rnet, onet, threshold, factor) bounding_box2, _2 = detect_face.detect_face(img2, minsize, pnet, rnet, onet, threshold, factor) # 未检测到人脸,则将结果标为-1,后续计算准确率时排除 if len(bounding_box1) < 1 or len(bounding_box2) < 1: result.append(-1) dist.append(-1) continue # 将图片1加入img_list det = np.squeeze(bounding_box1[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size1[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size1[0]) cropped = img1[bb[1]:bb[3], bb[0]:bb[2], :] aligned = cv2.resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) # 将图片2加入img_list det = np.squeeze(bounding_box2[0, 0:4]) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - margin / 2, 0) bb[1] = np.maximum(det[1] - margin / 2, 0) bb[2] = np.minimum(det[2] + margin / 2, img_size2[1]) bb[3] = np.minimum(det[3] + margin / 2, img_size2[0]) cropped = img2[bb[1]:bb[3], bb[0]:bb[2], :] aligned = cv2.resize(cropped, (image_size, image_size)) prewhitened = facenet.prewhiten(aligned) img_list.append(prewhitened) images = np.stack(img_list) # 将两个人脸转化为512维的向量 feed_dict = {images_placeholder: images, phase_train_placeholder: False} emb = sess.run(embeddings, feed_dict=feed_dict) # 计算两个人脸向量的距离 ed = np.sqrt(np.sum(np.square(np.subtract(emb[0], emb[1])))) dist.append(ed) # 根据得出的人脸间的距离,判断是否属于同一个人 if ed <= 1.1: result.append(1) else: result.append(0) return result, dist
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = facenet.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
# In[6]: tf.reset_default_graph() sess = tf.Session() # In[7]: pnet, rnet, onet = df.create_mtcnn(sess, det_path) # In[ ]: # Not sure how to set these parameters threshold = [0.5, 0.5, 0.3] factor = 0.79 minsize = 10 boxes, points = df.detect_face(test_img, minsize, pnet, rnet, onet, threshold, factor) # In[21]: print(boxes) print(points) # In[17]: #pnet_output = pnet(np.transpose(np.expand_dims(test_img, 0), (0,2,1,3))) # In[19]: #plt.imshow(pnet_output)
def face_recog(): if request.method == "POST": file = request.files["image"] filename = secure_filename(file.filename) names = [] img_name = str(filename) img_path = "attendance/facenet/dataset/test-images/" + img_name modeldir = "attendance/facenet/src/20180402-114759/" classifier_filename = "attendance/facenet/src/20180402-114759/my_classifier.pkl" npy = "" train_img = "attendance/facenet/dataset/raw" workbook = xlsxwriter.Workbook( 'C:\\Users\\Dell\\Attendance\\Reports\\Report_for_' + datetime.datetime.now().strftime("%Y_%m_%d-%H") + '.xlsx') worksheet = workbook.add_worksheet() conn = sqlite3.connect('C:\\Users\\Dell\\Attendance\\attendance\\site.db') c = conn.cursor() students = c.execute("SELECT stuname FROM 'add'") with tf.Graph().as_default(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, npy) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor margin = 32 frame_interval = 3 batch_size = 1000 image_size = 160 input_image_size = 160 HumanNames = os.listdir(train_img) HumanNames.sort() print('Loading feature extraction model') facenet.load_model(modeldir) images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name("phase_train:0") embedding_size = embeddings.get_shape()[1] classifier_filename_exp = os.path.expanduser(classifier_filename) with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) # video_capture = cv2.VideoCapture("akshay_mov.mp4") c = 0 print('Start Recognition!') prevTime = 0 # ret, frame = video_capture.read() frame = cv2.imread(img_path, 0) #frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) curTime = time.time() + 1 # calc fps timeF = frame_interval if (c % timeF == 0): find_results = [] if frame.ndim == 2: frame = facenet.to_rgb(frame) frame = frame[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( frame, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] print('Face Detected: %d' % nrof_faces) if nrof_faces > 0: det = bounding_boxes[:, 0:4] img_size = np.asarray(frame.shape)[0:2] cropped = [] scaled = [] scaled_reshape = [] bb = np.zeros((nrof_faces, 4), dtype=np.int32) for i in range(nrof_faces): emb_array = np.zeros((1, embedding_size)) bb[i][0] = det[i][0] bb[i][1] = det[i][1] bb[i][2] = det[i][2] bb[i][3] = det[i][3] #inner exception if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len( frame[0]) or bb[i][3] >= len(frame): print('face is too close') break cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) cropped[i] = facenet.flip(cropped[i], False) scaled.append( misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) scaled[i] = cv2.resize( scaled[i], (input_image_size, input_image_size), interpolation=cv2.INTER_CUBIC) scaled[i] = facenet.prewhiten(scaled[i]) scaled_reshape.append(scaled[i].reshape( -1, input_image_size, input_image_size, 3)) feed_dict = { images_placeholder: scaled_reshape[i], phase_train_placeholder: False } emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) predictions = model.predict_proba(emb_array) #print(predictions) best_class_indices = np.argmax(predictions, axis=1) # no print(best_class_indices) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] #print(best_class_probabilities) cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face #plot result idx under box text_x = bb[i][0] text_y = bb[i][3] + 20 #print('Result Indices: ', best_class_indices[0]) print(HumanNames[best_class_indices[0]]) names.append(HumanNames[best_class_indices[0]]) for H_i in HumanNames: if HumanNames[best_class_indices[ 0]] == H_i and best_class_probabilities > 0.43: result_names = HumanNames[best_class_indices[0]] cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), thickness=1, lineType=1) else: print('Unable to align') for i, row in enumerate(students): for j, value in enumerate(row): worksheet.write_string(i, j + 2, 'Absent') for name in names: if name == value: worksheet.write_string(i, j + 2, 'Present') worksheet.write_string(i, j, str(value)) # reg_no = c.execute("SELECT regno FROM 'add'") # for i, row in enumerate(reg_no): # for j, value in enumerate(row): # worksheet.write(i,j+1,value) cv2.imshow('Image', frame) cv2.imwrite('output/' + img_path.split('/')[-1], frame) if cv2.waitKey(9000) & 0xFF == ord('q'): sys.exit("Thanks") workbook.close() cv2.destroyAllWindows() flash('The students faces were recognized successfully!', 'success') return render_template('take.html', title="Take Attendance")
def gen(camera): sess = tf.Session() with sess.as_default(): pnet, rnet, onet = detect_face.create_mtcnn(sess, None) facenet.load_model( '/home/rohitner/models/facenet/20180402-114759/20180402-114759.pb') images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") phase_train_placeholder = tf.get_default_graph().get_tensor_by_name( "phase_train:0") classifier_filename_exp = '/home/rohitner/models/lfw_classifier.pkl' with open(classifier_filename_exp, 'rb') as infile: (model, class_names) = pickle.load(infile) print('Loaded classifier model from file "%s"' % classifier_filename_exp) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor file_index = 0 while True: success, img = camera.read() results = tfnet.return_predict(img) for result in results: cv2.rectangle( img, (result["topleft"]["x"], result["topleft"]["y"]), (result["bottomright"]["x"], result["bottomright"]["y"]), (255, 0, 0), 4) text_x, text_y = result["topleft"]["x"] - 10, result[ "topleft"]["y"] - 10 cv2.putText(img, result["label"], (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA) if img.ndim < 2: print('Unable to align') continue if img.ndim == 2: img = facenet.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if True: # args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = (det[:, 2] - det[:, 0]) * ( det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - 44 / 2, 0) bb[1] = np.maximum(det[1] - 44 / 2, 0) bb[2] = np.minimum(det[2] + 44 / 2, img_size[1]) bb[3] = np.minimum(det[3] + 44 / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize(cropped, (160, 160), interp='bilinear') scaled = prewhiten_and_expand(scaled) emb = sess.run(embeddings, feed_dict={ images_placeholder: scaled, phase_train_placeholder: False }) predictions = model.predict_proba(emb) best_class_indices = np.argmax(predictions) best_class_probabilities = predictions[0, best_class_indices] font = cv2.FONT_HERSHEY_SIMPLEX cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 5) cv2.putText(img, class_names[best_class_indices], (bb[0], bb[1] - 10), font, 0.5, (255, 0, 0), 2, cv2.LINE_AA) else: print('No face detected') ret, jpeg = cv2.imencode('.jpg', img) frame = jpeg.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n\r\n')