def process(fn): img_dir = os.path.join(out_dir, folder, '%s_imgs' % fn) out_file = os.path.join(out_dir, folder, '%s_img.npz' % fn) num_frames = len(os.listdir(img_dir)) if num_frames > 0: imgs = [] for frame in range(num_frames): img = Image.open( os.path.join(img_dir, "frame{:03d}.jpg".format(frame))).resize( (160, 160)) imgs.append(np.asarray(img).transpose(2, 0, 1).astype('float32')) imgs = np.stack(imgs) imgs = fixed_image_standardization(imgs) batch_size = 50 outs = [] for i in range(0, num_frames, batch_size): segment = torch.tensor(imgs[i:i + batch_size]).to(device) with torch.no_grad(): embeddings = resnet(segment).cpu().numpy() outs.append(embeddings) embeddings = np.concatenate(outs) else: print("[num_frames == 0] %s" % fn) embeddings = np.zeros((1, 512)).astype('float32') np.savez(out_file, embeddings)
def run_network(self, image: np.ndarray) -> List[np.ndarray]: cropped_image, scale = resize_proportionally(image, self.face_size) boxes, probs = self.net.detect(cropped_image) if boxes is None: return [] boxes = boxes[probs >= self.min_prob] faces = self._crop_faces(image, boxes, scale) faces = [fixed_image_standardization(face) for face in faces] return faces
def face_match(self, image, classify_model, person_names): box_dr = [] text_dr = [] mark_dr = [] try: bboxes, prob, landmarks = self.mtcnn_pt.detect(image, landmarks=True) except Exception as ex: with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr return box_dr, text_dr, mark_dr if bboxes is None: with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr return box_dr, text_dr, mark_dr for idx, box in enumerate(bboxes): if prob[idx] > 0.90: # if face detected and probability > 90% box_dr.append(box) mark_dr.append(landmarks[idx]) face = extract_face(image, box, image_size=self.mtcnn_pt.image_size, margin=self.mtcnn_pt.margin) face = fixed_image_standardization(face) emb = self.resnet( face.unsqueeze(0) ) # passing cropped face into resnet model to get embedding matrix emb_array = emb.detach().numpy() predictions = classify_model.predict_proba(emb_array) best_class_indices = np.argmax(predictions, axis=1) best_class_probabilities = predictions[ np.arange(len(best_class_indices)), best_class_indices] if best_class_probabilities[0] > self.accuracy_th: text = '{0}: {1:.0%}'.format( person_names[best_class_indices[0]], best_class_probabilities[0]) else: text = '{0}'.format('Unknown') text_dr.append(text) elif prob[idx] > 0.10: continue else: continue with self.lock_boxes: self.box_draw[0] = box_dr self.text_draw[0] = text_dr self.mark_draw[0] = mark_dr self.new_boxes = True return box_dr, text_dr, mark_dr
def get_embeddings(arr: np.array, batch_size: int = 4, pre_process: bool = False) -> np.array: """ Method that returns InceptionRestnetV1 embeddings as trained on VGGFace2 Parameters ---------- arr : np.array Batch of images of faces extracted using facenet_pytorch.MTCNN batch_size: int Batch size used for transforming images into embeddings, by default 4. pre_process: bool Whether to pre_process the images or not. Rule of thumb, if you loaded your images from disk and these images looked normal, you should opt for pre-processing the imag. By default False. Returns ------- np.array InceptionRestnetV1 embeddings as trained on VGGFace2 """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") resnet = InceptionResnetV1(pretrained="vggface2").eval().to(device) T = torch.Tensor(arr).to(device) if T.shape[-1] == 3: T = T.permute(0, 3, 1, 2) if pre_process: logger.info("Pre-processing images...") T = fixed_image_standardization(T) n_batches = T.shape[0] // batch_size + 1 with torch.no_grad(): embeddings = [ resnet(T[batch_size * idx:batch_size * (idx + 1), :, :, :]).detach().cpu() for idx in tqdm(range(n_batches)) ] arr = np.vstack(embeddings) return arr
def detectAndConvert(self, frame): """ Function that handles the actual face detection. Detected faces are converted to tensors. Amount of detected faces can be found with len(self.detected_person) """ image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) boxes, probas = self.detector.detect(image) if boxes is not None: for box in boxes: face = extract_face(frame, box) prediction = self.recognizer.predict( fixed_image_standardization(face)) print(prediction) if self.__debug: frame = cv2.rectangle( frame, (box[0], box[1]), (box[2], box[3]), (255, 0, 0)) # Draw a rectangle arround the face cv2.putText(frame, f'{prediction[0]}', (int(box[0]), int(box[1] - 10)), cv2.FONT_HERSHEY_COMPLEX, 1, (200, 0, 0))
def crop_folder(imgs, out_file): resCropped = feature_extract(out_file, imgs) # os.makedirs(out_dir,exist_ok=True) batch_size = 60 outs = [] for i in range(0, len(resCropped), batch_size): segment = [] for i in range(i, min(len(resCropped), i + batch_size)): im = Image.fromarray(resCropped[i]).resize((160, 160)) segment.append( np.asarray(im).transpose(2, 0, 1).astype(np.float32)) segment = torch.tensor(np.stack(segment)).to(device) segment = fixed_image_standardization(segment) with torch.no_grad(): out = resnet(segment).detach().cpu().numpy() outs.append(out) if len(outs) > 0: outs = np.concatenate(outs) return resCropped, outs else: return resCropped, np.zeros((1, 512)).astype(np.float32)
def __call__(self, frame) -> NoReturn: mtcnn = MTCNN( keep_all=True, min_face_size=100, image_size=160, margin=14, selection_method="center_weighted_size", post_process=True, device=self.device, ) boxes, probs = mtcnn.detect(frame) faces = [] if boxes is None: return faces for i, box in enumerate(boxes): if probs[i] < 0.93: continue box = box.astype(int) faces.append( Face(box=box, labels={}, image_tensor=fixed_image_standardization( extract_face(frame, box)))) return faces
def preprocess_image(image_path): "Load Image, normalize and convert to tensor." img = Image.open(image_path) img_tensor = F.to_tensor(np.float32(img)) return fixed_image_standardization(image_tensor=img_tensor) # in [-1, 1]