class FaceAlignmentMTCNN: def __init__(self): self.mtcnn = MTCNN( image_size=160, margin=0, selection_method="probability", device=torch.device( "cuda:0" if torch.cuda.is_available() else "cpu"), ) self.to_tensor = transforms.ToTensor() def make_align(self, img): img = img.resize((512, 512)) try: bbx, prob = self.mtcnn.detect(img) if bbx is not None: self.mtcnn.extract(img, bbx, "temp.jpg") face = Image.open("temp.jpg") face_tensor = self.to_tensor(face) return face_tensor else: print("No Face") return None except: print("No Face") return None
class DocumentFaceChecker: def __init__(self): self.mtcnn = MTCNN( keep_all=True, min_face_size=30, image_size=200 ) self.embedding = InceptionResnetV1(pretrained='vggface2').eval() def check(self, img_RGB): boxes, probs = self.mtcnn.detect(img_RGB) if probs.shape[0] < 2: raise Exception("couldn't find two faces") sorted_by_area = sorted( boxes, key=lambda box: (box[1] - box[3]) * (box[0] - box[2]), reverse=True ) faces = self.mtcnn.extract(img_RGB, sorted_by_area, None) print(faces[1].shape) sharpened_image = sharpen_image(faces[1]) print(sharpened_image.shape) faces[1] = sharpened_image print(faces.shape) vector_faces = self.embedding(faces)[:2] face1, face2 = vector_faces[0].detach(), vector_faces[1].detach() return cosine_simularity(face1, face2)
class FaceNetSegmenter(TorchDevice, BaseSegmenter): """FaceNetSegmenter segments faces from an image. - Input shape: `(Height x Width x Channels)` - Output shape: `NumFaces x (Channels x ImageSize x ImageSize)` `Channels` dimension can be changed (e.g. set `channel_axis` to 0 for channels first mode instead of channels last). :param image_size: Height and width of a detected face. Smaller faces are upscaled. :param margin: Margin to add to bounding box, in terms of pixels in the final image. :param selection_method: Heuristic to use to select a single face from the image. Options: "probability": highest probability selected "largest": largest box selected "largest_over_threshold": largest box over a certain probability selected "center_weighted_size": box size minus weighted squared offset from image center :param post_process: Flag for normalizing the output image. Required if you want to pass these face to the FaceNetEmbedder. :param min_face_size: Minimum face size to search for. :param channel_axis: Axis of channels in the image. Default is 2 (channels-last), use 0 for channels-first. """ def __init__(self, image_size: int = 160, margin: int = 0, selection_method: str = 'largest', post_process: bool = True, min_face_size: int = 20, channel_axis: int = 2, *args, **kwargs): super().__init__(*args, **kwargs) self.image_size = image_size self.margin = margin self.selection_method = selection_method self.post_process = post_process self.min_face_size = min_face_size self.channel_axis = channel_axis self._default_channel_axis = 2 def post_init(self): from facenet_pytorch import MTCNN self.face_detector = MTCNN(selection_method=self.selection_method, image_size=self.image_size, margin=self.margin, device=self.device, post_process=self.post_process, min_face_size=self.min_face_size, keep_all=True) @batching def segment(self, blob: 'np.ndarray', *args, **kwargs) -> List[List[Dict]]: """Transform a numpy `ndarray` of shape `(Height x Width x Channel)` into a list with dicts that contain cropped images. :param blob: A numpy `ndarray` that represents a single image. :param args: Additional positional arguments. :param kwargs: Additional positional arguments. :return: A list with dicts that contain cropped images. """ if self.channel_axis != self._default_channel_axis: blob = np.moveaxis(blob, self.channel_axis, self._default_channel_axis + 1) batch = blob results = [] batch = np.asarray(batch) with torch.no_grad(): image = torch.from_numpy(data.astype('float32')).to(self.device) # Create a batch of size 1 image = image.unsqueeze(0) # Detect faces batch_boxes, batch_probs, _ = self.face_detector.detect( image, landmarks=True) # Select faces if not self.keep_all: batch_boxes, batch_probs, _ = self.face_detector.select_boxes( batch_boxes, batch_probs, _, image, method=self.selection_method) # Extract faces faces = self.face_detector.extract(image, batch_boxes, save_path=None) if faces[0] is not None: faces = faces[0].view(-1, image.shape[-1], self.image_size, self.image_size) batch_boxes = batch_boxes[0] batch_probs = batch_probs[0] results = [ dict(offset=0, weight=probability, blob=face.numpy(), location=bounding_box.tolist()) for face, probability, bounding_box in zip( faces, batch_probs, batch_boxes) if face is not None ] return results
def start_recognize(self): # mtcnn检测人脸位置 mtcnn = MTCNN(device=self.device, keep_all=True) # 用于生成人脸512维特征向量 resnet = InceptionResnetV1(pretrained='vggface2').eval().to(self.device) # 初始化视频窗口 windows_name = 'FaceRecognize' cv2.namedWindow(windows_name) cap = cv2.VideoCapture(0, cv2.CAP_DSHOW) while True: # 从摄像头读取一帧图像 success, image = cap.read() if not success: break img_PIL = Image.fromarray(image) draw = ImageDraw.Draw(img_PIL) # 检测人脸位置,获得人脸框坐标和人脸概率 boxes, probs = mtcnn.detect(image) if boxes is not None: for box, prob in zip(boxes, probs): # 设置人脸检测阈值 if prob < 0.3: continue x1, y1, x2, y2 = [int(p) for p in box] # 框出人脸位置 draw.rectangle((x1, y1, x2, y2), outline=(0, 255, 0), width=2) # cv2.rectangle(image, (x1 - 10, y1 - 10), (x2 + 10, y2 + 10), color=(0, 255, 0), thickness=2) # cv2.putText(image, str(round(prob, 3)), (x1, y1 - 30), cv2.FONT_ITALIC, 1, (255, 0, 255), 4) # 导出人脸图像 face = mtcnn.extract(image, [box], None).to(self.device) # 生成512维特征向量 # print(type(face)) embeddings = resnet(face).detach().cpu().numpy() # KNN预测 # name_knn = self.knn_model.predict(embeddings) # imageS = face.squeeze(0) # imageS = self.uploader(imageS) # imageS.save(f'B/{random.randint(1,1000)}.jpg') # 获得预测姓名和最小距离 name, min_dis, min_dis_name = self._recognize(embeddings, thres=5) # if min_dis_name != name: # name = min_dis_name # 如果距离过大则认为识别失败 if min_dis > 1.0: draw.rectangle((x1, y1, x2, y2), outline=(0, 0, 255), width=2) draw.text((x1, y1 - 35), f'未知', font=self.font, fill=(0, 0, 255)) # cv2.putText(image, 'unknown', (x1 - 20, y1 - 20), cv2.FONT_ITALIC, 1, (255, 0, 255), 4) else: # 框出人脸位置并写上名字 draw.rectangle((x1, y1, x2, y2), outline=(0, 255, 0), width=2) draw.text((x1, y1 - 35), f'{name}', font=self.font, fill=(0, 255, 0)) # cv2.putText(image, f'{name_knn[0]}({round(dis, 2)})', (x1 - 20, y1 - 20), # cv2.FONT_ITALIC, 1, (255, 0, 255), 4) # 显示处理后的图片 cv2.imshow(windows_name, np.array(img_PIL)) # 保持窗口 key = cv2.waitKey(1) # ESC键退出 if key & 0xff == 27: break # 释放设备资源,销毁窗口 cap.release() cv2.destroyAllWindows()