def forward(self, img, **kwargs): """Simplified MTCNN forward step; no saving to file, no prob returning, only one image per call, BUT it returns a bbox""" with torch.no_grad(): box_im, prob_im = self.detect(img) # Process all bounding boxes and probabilities if box_im is None: return None, None if not self.keep_all: box_im = box_im[[0]] faces_im = [] for i, box in enumerate(box_im): face = extract_face(img, box, self.image_size, self.margin, None) if self.post_process: face = fixed_image_standardization(face) faces_im.append(face) if self.keep_all: faces_im = torch.stack(faces_im) else: faces_im = faces_im[0] box_im = box_im[0] return faces_im, box_im
def folder2lmdb(samples, name="train", write_frequency=5000, num_workers=16): transform = transforms.Compose([ preprocessing.ExifOrientationNormalize(), # transforms.Resize(1024), # transforms.ToTensor(), ]) batch_size = 128 dataset = CelebA_Dataset(samples) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size) print("Number of training samples in total: {}".format(len(samples))) print("Number of batches: {}".format(len(data_loader))) lmdb_path = osp.join("./model", "%s.lmdb" % name) isdir = os.path.isdir(lmdb_path) print("Generate LMDB to %s" % lmdb_path) db = lmdb.open(lmdb_path, subdir=isdir, map_size=10e+11, readonly=False, meminit=False, map_async=True) aligner = MTCNN(keep_all=True, thresholds=[0.6, 0.7, 0.9]) facenet_preprocess = transforms.Compose([preprocessing.Whitening()]) facenet = InceptionResnetV1(pretrained='vggface2').eval() ii = 0 txn = db.begin(write=True) for idx, (images, labels) in enumerate(data_loader): for j in range(len(images)): image = transform(Image.open(images[j]).convert('RGB')) bbs, _ = aligner.detect(image) if bbs is None: continue faces = torch.stack([extract_face(image, bb) for bb in bbs]) preprocessed_faces = facenet_preprocess(faces) temp = facenet(preprocessed_faces) embeddings = temp.detach().numpy() print("putting image {} with label {}".format(ii, labels[j].shape)) # txn.put(u'{}'.format(ii).encode('ascii'), dumps_pyarrow((embeddings[0], labels[j]))) txn.put(u'{}'.format(ii).encode('ascii'), dumps_pyarrow((preprocessed_faces[0], labels[j]))) ii += 1 if ii % write_frequency == 0: print("[%d/%d]" % (ii, len(data_loader) * batch_size)) txn.commit() txn = db.begin(write=True) # finish iterating through dataset txn.commit() keys = [u'{}'.format(k).encode('ascii') for k in range(ii + 1)] with db.begin(write=True) as txn: txn.put(b'__keys__', dumps_pyarrow(keys)) txn.put(b'__len__', dumps_pyarrow(len(keys))) print("Flushing database ...") db.sync() db.close()
def main(): """ Face Matching """ activity = ["CELEB MATCH", "VIDEO SEARCH"] choice = st.sidebar.selectbox("Choose Activity",activity) #CELEB MATCH if choice == "CELEB MATCH": face_recogniser = load_model('model/face_recogniser.pkl') preprocess = preprocessing.ExifOrientationNormalize() uploaded_file = st.file_uploader("Choose an image...", type=["jpg","png", "jpeg"]) if uploaded_file is not None: image = Image.open(uploaded_file) image = preprocess(image) image = image.convert("RGB") bbs, _ = aligner.detect(image) if bbs is not None: faces = torch.stack([extract_face(image, bb) for bb in bbs]) embeddings = facenet(facenet_preprocess(faces)).detach().numpy() predictions = face_recogniser.classifier.predict_proba(embeddings) for bb, probs in zip(bbs, predictions): try: cropped_faces = [] cropped_face = image.crop(bb) cropped_faces.append(cropped_face) prediction = top_prediction(face_recogniser.idx_to_class, probs) files = glob.glob("images/" + prediction.label + "/*.*") actor_image = Image.open(files[0]) actor_image_bbs, _ = aligner.detect(actor_image) actor_image = actor_image.crop(actor_image_bbs[0]) if len(actor_image_bbs) > 0 else actor_image cropped_faces.append(actor_image) st.image(cropped_faces, width=100) st.write(prediction.label) except: pass else: st.write("Can't detect face") st.image(image, caption='Uploaded Image.', use_column_width=True) elif choice == "VIDEO SEARCH": st.write("Video Search") url = st.text_input("YOUTUBE URL") if url: video = get_video(url) if video: st.video(url) vpr = get_video_processor(video) vpr.read_frames() st.write("Number of frames " + str(vpr.frame_count)) st.write("Duration " + str(int(vpr.duration)) + " s") frame_idx = st.number_input("Frame index", value=0, min_value=0, max_value=vpr.frame_count-1) if frame_idx: frame_image = Image.fromarray(vpr.frames[frame_idx]) st.image(frame_image, caption='Image at selected frame')
def detect_and_extract_feature(self, img): bbs = self.detector.extract_faces(img, lm=False) if bbs is None: return None, None faces = torch.stack( [extract_face(img, box, self.img_size) for box in bbs]) embeddings = self.facenet( self.facenet_preprocess(faces).to( self.device)).detach().to('cpu').numpy() return bbs, embeddings
def extract_features(self, img): bbs, _ = self.aligner.detect(img) if bbs is None: # if no face is detected return None, None faces = torch.stack([extract_face(img, bb) for bb in bbs]) embeddings = self.facenet(self.facenet_preprocess(faces)).detach().numpy() return bbs, embeddings
def extract_features(self, image: Image) -> Optional[ndarray]: detection_tuple: Tuple[ndarray, list] = self.__aligner.detect(image) if detection_tuple[0] is None: return None faces: Tensor = torch.stack( [extract_face(image, box) for box in detection_tuple[0]]) embeddings: ndarray = self.__facenet( self.__facenet_preprocessor(faces)).detach().numpy() return embeddings
def extract(self, img, batch_boxes, save_path, probs): # Determine if a batch or single image was passed batch_mode = True if ( not isinstance(img, (list, tuple)) and not (isinstance(img, np.ndarray) and len(img.shape) == 4) and not (isinstance(img, torch.Tensor) and len(img.shape) == 4) ): img = [img] batch_boxes = [batch_boxes] probs = [probs] batch_mode = False # Parse save path(s) if save_path is not None: if isinstance(save_path, str): save_path = [save_path] else: save_path = [None for _ in range(len(img))] # Process all bounding boxes faces = [] for im, box_im, path_im, prob in zip(img, batch_boxes, save_path, probs): if box_im is None: faces.append(None) continue if not self.keep_all: box_im = box_im[[0]] faces_im = [] for i, box in enumerate(box_im): face_path = path_im if path_im is not None and i > 0: save_name, ext = os.path.splitext(path_im) face_path = save_name + '_' + str(i + 1) + ext if prob[i] > self.confidence: face = extract_face(im, box, self.image_size, self.margin, face_path) else: continue if self.post_process: face = fixed_image_standardization(face) faces_im.append(face) if self.keep_all and len(faces_im) > 0: faces_im = torch.stack(faces_im) elif len(faces_im) > 0: faces_im = faces_im[0] faces.append(faces_im) if not batch_mode: faces = faces[0] return faces
def extract_inception_feature(aligner, facenet_preprocess, facenet, img_path): img = preprocessing.ExifOrientationNormalize()( Image.open(img_path).convert('RGB')) try: bbs, _ = aligner.detect(img) except Exception as e: print(e) if bbs is None: # if no face is detected return None faces = torch.stack([extract_face(img, bb) for bb in bbs]) preprocessed_faces = facenet_preprocess(faces) temp = facenet(preprocessed_faces) embeddings = temp.detach().numpy() return embeddings
def align_image(image, mtcnn): with torch.no_grad(): batch_boxes, _ = mtcnn.detect(image) batch_mode = True if not isinstance(image, (list, tuple)) and not (isinstance(image, np.ndarray) and len(image.shape) == 4): image = [image] batch_boxes = [batch_boxes] batch_mode = False faces, boxes = [], [] for im, box_im in zip(image, batch_boxes): if box_im is None: faces.append(None) boxes.append([None] if mtcnn.keep_all else None) continue if not mtcnn.keep_all: box_im = box_im[[0]] faces_im = [] boxes_im = [] for i, box in enumerate(box_im): face = extract_face(im, box, mtcnn.image_size, mtcnn.margin) if mtcnn.post_process: face = fixed_image_standardization(face) faces_im.append(face) boxes_im.append(box) if mtcnn.keep_all: faces_im = torch.stack(faces_im) else: faces_im = faces_im[0] boxes_im = boxes_im[0] faces.append(faces_im) boxes.append(boxes_im) if not batch_mode: faces = faces[0] boxes = boxes[0] return faces, boxes
def load_data(dataset, transform, batch_size, batch_num): aligner = MTCNN(prewhiten=False, keep_all=True, thresholds=[0.6, 0.7, 0.9]) facenet_preprocess = transforms.Compose([preprocessing.Whitening()]) samples = [] begin = batch_num * batch_size end = begin + batch_size if len(dataset.samples) < end: end = len(dataset.samples) for idx, (img_path, label) in enumerate(dataset.samples[begin:end]): # print("image {} - {}".format(idx + 1, img_path)) img = transform(Image.open(img_path).convert('RGB')) print("type and shape ", type(img), img.size) bbs, _ = aligner.detect(img) if bbs is None: continue faces = torch.stack([extract_face(img, bb) for bb in bbs]) preprocessed_faces = facenet_preprocess(faces) samples.append((preprocessed_faces[0], label)) return samples
PIL_frame = Image.fromarray(np.uint8(frame)).convert('RGB') PIL_frame = Image.fromarray(frame.astype('uint8'), 'RGB') # Detect faces boxes, _ = mtcnn.detect(PIL_frame) # Draw faces frame_draw = PIL_frame.copy() draw = ImageDraw.Draw(frame_draw) if not (boxes is None): for box in boxes: box[0] -= margin box[1] -= margin box[2] += margin box[3] += margin draw.rectangle(box.tolist(), outline=(255, 0, 0), width=3) # extract face face = detect_face.extract_face(frame_draw, box, 32, 0) draw.text((box[0], box[1]), index_to_class[predict_image(face)]) cv2.imshow("My cam video", np.array(frame_draw.convert('RGB'))) out.write(np.array(frame_draw.convert('RGB'))) # frame_draw # Close and break the loop after pressing "x" key if cv2.waitKey(1) & 0XFF == ord('x'): cv2.destroyAllWindows() vid_capture.release() out.release() break