Пример #1
0
def get_faces(detector, images, box, args):
    ret_faces = []
    all_boxes = []
    avg_box = None
    all_imgs = []
    if box is None:
        # Get bounding boxes
        print('Getting bounding boxes')
        for lb in tqdm(np.arange(0, len(images), args.mtcnn_batch_size)):
            imgs_pil = [Image.fromarray(image) for image in images[lb:lb+args.mtcnn_batch_size]]
            boxes, _, _ = detector.detect(imgs_pil, landmarks=True)
            all_boxes.extend(boxes)
            all_imgs.extend(imgs_pil)
        # Check if boxes are fine, do temporal smoothing, return average box.
        img_size = (all_imgs[0].size[0] + all_imgs[0].size[1]) / 2
        stat, avg_box = check_boxes(all_boxes, img_size, args)
    else:
        all_imgs = [Image.fromarray(image) for image in images]
        stat, avg_box = True, box
    # Crop face regions.
    if stat:
        print('Extracting faces')
        for img in tqdm(all_imgs, total=len(all_imgs)):
            face = extract_face(img, avg_box, args.cropped_image_size, args.margin)
            ret_faces.append(face)
    return stat, ret_faces, avg_box
Пример #2
0
def get_emb(emb_state, image, box):
    if emb_state is not None:
        return (emb_state)
    else:
        cropped_face = extract_face(image, box)
        cropped_face = prewhiten(cropped_face)
        emb = resnet(cropped_face.unsqueeze(0))[0].detach()  # .numpy().reshape(1, 512)
        return (emb)
Пример #3
0
def crop_and_resize_face(source_image,
                         instances,
                         face_detector,
                         use_rendered,
                         rendered_image,
                         target_shape=(160, 160)):
    face_mask = torch.logical_or(torch.ge(instances, 23),
                                 torch.ge(instances, 24))
    instances_masked = torch.mul(instances, face_mask)
    face_indicies = torch.nonzero(instances_masked, as_tuple=True)
    resize_diff = int((target_shape[0] - target_shape[1]) / 2)
    if torch.numel(face_indicies[0]) == 0 or torch.numel(
            face_indicies[1]) == 0:
        return 0
    else:
        xmin, xmax = [
            torch.min(face_indicies[0]).item(),
            torch.max(face_indicies[0]).item()
        ]
        ymin, ymax = [
            torch.min(face_indicies[1]).item(),
            torch.max(face_indicies[1]).item()
        ]
        cropped_face = source_image[:, xmin:xmax, ymin:ymax]
        cropped_face = cropped_face.permute(
            (1, 2, 0)).add(1).div(2).mul(255).cpu().numpy()
        try:
            box = face_detector.detect(cropped_face)[0][0]
            if use_rendered:
                cropped_rendered_face = rendered_image[:, xmin:xmax, ymin:ymax]
                cropped_rendered_face = cropped_rendered_face.permute(
                    (1, 2, 0)).add(1).div(2).mul(255).detach().cpu().numpy()
                cropped_face = extract_face(cropped_rendered_face,
                                            box,
                                            image_size=target_shape[0])
            else:
                cropped_face = extract_face(cropped_face,
                                            box,
                                            image_size=target_shape[0])
            cropped_face = cropped_face[:, resize_diff:target_shape[0] -
                                        resize_diff, :]
            return cropped_face.cuda()
        except:
            return 0
Пример #4
0
    def embeddings(self, path):
        video = mmcv.VideoReader(path)
        frames = [
            Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            for frame in video[1:]
        ]

        a = dict()
        for i in range(self.total_people):
            a[i] = []

        count = 0
        for frame in frames:
            bouding_box, prob = self.mtcnn.detect(frame)

            for box in bouding_box:
                x1, y1, x2, y2 = box
                if (x1 > x2):
                    x1, x2 = x2, x1
                if (y1 > y2):
                    y1, y2 = y2, y1

                cropped_tensors = extract_face(frame, (x1, y1, x2, y2)).to(
                    self.device).view(-1, 3, 160, 160)
                emb = self.resnet(cropped_tensors)
                emb = emb.detach()
                if self.device.type == "cuda":
                    emb = emb.cpu()
                emb = emb.numpy()

                idx = -1
                min_dist = 10**9
                for i, e in enumerate(self.embeddings_initial):
                    d = emb - e
                    d = d.reshape(512)

                    # https://github.com/cmusatyalab/openface/blob/master/demos/compare.py
                    dist = np.dot(
                        d, d
                    )  # https://cmusatyalab.github.io/openface/demo-2-comparison/
                    if (dist < min_dist):
                        idx = i
                        min_dist = dist
                a[idx].append(emb)

                # testing for face tracking

                crop = frame.crop((x1, y1, x2, y2))
                crop = cv2.cvtColor(np.array(crop), cv2.COLOR_RGB2BGR)
                cv2.imshow(str(idx), crop)
                cv2.waitKey(1)

            print(len(a[0]))
        return a
Пример #5
0
    def draw_one(self, frame, box, prob, landmark, count):
        """
        Draw landmarks and boxes for only one face detected
        """

        im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # cv2 imwrite no need for RGB conversion
        cv2.imwrite(
            configs['frames_folder'] + "original" + str(count) + ".png", frame)

        cropped_img = extract_face(im_rgb,
                                   box,
                                   image_size=224,
                                   save_path=configs['frames_folder'] +
                                   str(count) + ".png")

        # Draw rectangle on frame
        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), (0, 0, 255),
                      thickness=2)

        transform = get_test_augmentations()

        # (C, H, W) -> (H, W, C)
        transformed_img = transform(
            image=np.array(cropped_img).transpose((1, 2, 0)))['image']
        # add batch dim
        transformed_img = transformed_img.unsqueeze(0)

        # transformed_img = transformed_img.to(device)
        cue = self.model.infer(transformed_img)

        # save cues
        save_image(cue,
                   configs['frames_folder'] + "cues/" + str(count) + ".png")

        score = cue.mean().cpu().item()

        # Show probability
        cv2.putText(frame, "FDet: " + "{:.3f}".format(prob),
                    (box[0], int(box[3])), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 255), 2, cv2.LINE_AA)
        cv2.putText(frame, "Spoof Score: " + "{:.6f}".format(score),
                    (box[0], int(box[3] + 30.0)), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 255), 2, cv2.LINE_AA)

        # Draw landmarks
        # cv2.circle(frame, tuple(ld[0]), 5, (0, 0, 255), -1)
        # cv2.circle(frame, tuple(ld[1]), 5, (0, 0, 255), -1)
        # cv2.circle(frame, tuple(ld[2]), 5, (0, 0, 255), -1)
        # cv2.circle(frame, tuple(ld[3]), 5, (0, 0, 255), -1)
        # cv2.circle(frame, tuple(ld[4]), 5, (0, 0, 255), -1)

        return frame
Пример #6
0
def detectEmbed(width, height, bytedata):
	img = createImage(width, height, bytedata)
	boxes, probs = mtcnn.detect(img)
	if boxes is None:
		return None, None, None
	embeddings = []
	for i, box in enumerate(boxes):
	    img_cropped = extract_face(img, box)
	    img_embedding = resnet(img_cropped.unsqueeze(0))
	    embeddings.append(img_embedding.cpu().detach().numpy())
	embeddings = np.array(embeddings)
	return boxes, probs, embeddings
Пример #7
0
    def face_match(self, image, classify_model, person_names):
        box_dr = []
        text_dr = []
        mark_dr = []
        try:
            bboxes, prob, landmarks = self.mtcnn_pt.detect(image,
                                                           landmarks=True)
        except Exception as ex:
            with self.lock_boxes:
                self.box_draw[0] = box_dr
                self.text_draw[0] = text_dr
            return box_dr, text_dr, mark_dr
        if bboxes is None:
            with self.lock_boxes:
                self.box_draw[0] = box_dr
                self.text_draw[0] = text_dr
            return box_dr, text_dr, mark_dr
        for idx, box in enumerate(bboxes):
            if prob[idx] > 0.90:  # if face detected and probability > 90%
                box_dr.append(box)
                mark_dr.append(landmarks[idx])
                face = extract_face(image,
                                    box,
                                    image_size=self.mtcnn_pt.image_size,
                                    margin=self.mtcnn_pt.margin)
                face = fixed_image_standardization(face)
                emb = self.resnet(
                    face.unsqueeze(0)
                )  # passing cropped face into resnet model to get embedding matrix
                emb_array = emb.detach().numpy()
                predictions = classify_model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]
                if best_class_probabilities[0] > self.accuracy_th:
                    text = '{0}: {1:.0%}'.format(
                        person_names[best_class_indices[0]],
                        best_class_probabilities[0])
                else:
                    text = '{0}'.format('Unknown')
                text_dr.append(text)

            elif prob[idx] > 0.10:
                continue
            else:
                continue
        with self.lock_boxes:
            self.box_draw[0] = box_dr
            self.text_draw[0] = text_dr
            self.mark_draw[0] = mark_dr
            self.new_boxes = True
        return box_dr, text_dr, mark_dr
Пример #8
0
def detect_face(img_dir='vgg2', margin=40):
    mtcnn = MTCNN(select_largest=True, device=device)
    img_dir = os.path.join(os.path.join(cur_path, img_dir))
    dir_lists = [os.path.join(img_dir, x) for x in os.listdir(img_dir)]
    dir_lists.sort()
    save_path = os.path.join(cur_path, img_dir + 'save_path')
    for dir_list in dir_lists[::-1]:
        begin = time.time()
        dir_save = os.path.join(save_path, dir_list.split('/')[-1])
        if not os.path.exists(dir_save):
            os.makedirs(dir_save)
        else:
            continue
        img_paths = [
            os.path.join(dir_list, x) for x in os.listdir(dir_list)
            if x.endswith('.jpg')
        ]
        img_paths.sort()
        print(dir_list)

        for img_path in img_paths:
            img = Image.open(img_path)
            start = time.time()
            boxes, _ = mtcnn.detect(img)
            draw = ImageDraw.Draw(img)
            w, h = img.size
            if boxes is None: continue
            for box in boxes:
                offset = margin / 2
                box[0],box[1],box[2],box[3]= max(box[0]-offset,0),max(box[1]-offset,0),\
                                             min(box[2]+offset,w),min(box[3]+offset,h)
                extract_face(img,
                             box,
                             save_path=os.path.join(
                                 dir_save,
                                 os.path.split(img_path)[-1]))
                #---output face shape is (160,160,3)
            end = time.time()
            print('img {} has token {:.2f}s'.format(img_path, end - start))
Пример #9
0
def predict():
    if request.method == 'POST':
        payload = request.get_json()
        images_b64 = payload['images']
        ids = payload['ids']
        db_id = payload['db_id']
        images = []
        for im_b64 in images_b64:
            im_binary = base64.b64decode(im_b64)
            images.append(im_binary)
        del images_b64
        gc.collect()
        probs, bbox = validate_images(images)
        
        filtered_images = []
        filtered_idxs = []
        output = []
        idx = 0
        transform_tensor_to_image = transforms.ToPILImage()
        for i in range(len(probs)):
            if probs[i] is None:
                entry = {}
                entry['id'] = ids[i]
                entry['prob'] = None
                entry['class_id'] = None
                entry['class_name'] = None
                output[idx] = entry
                idx = idx + 1
            else:
                print(bbox[i][0])
                face = extract_face(Image.open(io.BytesIO(images[i])), bbox[i][0])
                img = transform_tensor_to_image(face.cpu())
                filtered_images.append(img)
                filtered_idxs.append(ids[i])

        del images
        gc.collect()
        class_id, class_name, probs = get_prediction(db_id, filtered_images)
        del filtered_images
        gc.collect()

        for i in range(len(class_id)):
            entry = {}
            entry['id'] = filtered_idxs[i]
            entry['prob'] = probs[i]
            entry['class_id'] = class_id[i]
            entry['class_name'] = class_name[i]
            output.append(entry)
            idx = idx + 1
        
        return jsonify(output)
Пример #10
0
    def _draw(self, frame, boxes, probs, landmarks, count):
        """
        Draw landmarks and boxes for each face detected
        """

        im_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # cv2 imwrite no need for RGB conversion
        cv2.imwrite(
            configs['frames_folder'] + "original" + str(count) + ".png", frame)
        for box, prob, ld in zip(boxes, probs, landmarks):
            cropped_img = extract_face(im_rgb,
                                       box,
                                       image_size=224,
                                       save_path=configs['frames_folder'] +
                                       str(count) + ".png")

            # Draw rectangle on frame
            cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]),
                          (0, 0, 255),
                          thickness=2)

            transform = get_test_augmentations()

            # (C, H, W) -> (H, W, C)
            transformed_img = transform(
                image=np.array(cropped_img).transpose((1, 2, 0)))['image']
            # add batch dim
            transformed_img = transformed_img.unsqueeze(0)

            output = self.model.classify(transformed_img)
            prediction = torch.argmax(output, dim=1).cpu().numpy()

            # Show probability
            cv2.putText(frame, "FDet: " + str(prob),
                        (box[2], int(box[3] - 30.0)), cv2.FONT_HERSHEY_SIMPLEX,
                        1, (0, 0, 255), 2, cv2.LINE_AA)
            cv2.putText(frame, str(labels_map.get(prediction[0])),
                        (box[2], box[3]), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 0, 255), 2, cv2.LINE_AA)

            # Draw landmarks
            # cv2.circle(frame, tuple(ld[0]), 5, (0, 0, 255), -1)
            # cv2.circle(frame, tuple(ld[1]), 5, (0, 0, 255), -1)
            # cv2.circle(frame, tuple(ld[2]), 5, (0, 0, 255), -1)
            # cv2.circle(frame, tuple(ld[3]), 5, (0, 0, 255), -1)
            # cv2.circle(frame, tuple(ld[4]), 5, (0, 0, 255), -1)

        return frame
Пример #11
0
    def annotate_apply_tracking(self, frame, font_size=FONT_SIZE, box_size=1):
        detections = self.prev_dets

        if (self.stride % 4 == 0):
            detections = self.apply_tracking(frame.resize(DETECTION_SIZE))
            #detections = self.apply_tracking(frame)
            self.prev_dets = detections
            self.stride = self.stride + 1

        self.stride = self.stride + 1
        annotated = frame.copy()
        draw = ImageDraw.Draw(annotated)

        for j in range(len(detections)):
            detection_box = np.multiply(detections[j][:4], RESIZE_FACTOR)
            #detection_box = detections[j][:4]
            track_id = detections[j][4]

            num_frames = self.frames_per_track.get(track_id, 0) + 1
            self.frames_per_track[track_id] = num_frames
            label = self.face_labels.get(track_id)

            if label is None or num_frames % 7 == 0:
                embedding_list = self.track_embeddings.get(track_id, [])
                if len(embedding_list) < 5:
                    face = extract_face(frame, detection_box)
                    embedding = self.rec_model(face[None, :, :, :].to(self.device)).detach()
                    if embedding is not None:
                        embedding_list.append(embedding)
                        self.track_embeddings[track_id] = embedding_list
                        avg_embedding = torch.mean(torch.stack(embedding_list), dim=0)
                        name = self.identify_user(avg_embedding)
                        self.face_labels.update({track_id: name})

            draw.rectangle(
                [detection_box[0], detection_box[1], detection_box[2], detection_box[3]],
                outline=(0, 255, 0), width=box_size)

            draw.text(
                (detection_box[0], detection_box[3]),
                str(self.face_labels.get(track_id, "bad_embedding") + "_" + str(track_id)),
                font=ImageFont.truetype(TRUE_TYPE, font_size), fill=(0, 255, 0))

        return annotated
Пример #12
0
    def detectAndConvert(self, frame):
        """
        Function that handles the actual face detection. Detected faces are converted to tensors. 
        Amount of detected faces can be found with len(self.detected_person)
        """
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        boxes, probas = self.detector.detect(image)

        if boxes is not None:
            for box in boxes:
                face = extract_face(frame, box)
                prediction = self.recognizer.predict(
                    fixed_image_standardization(face))
                print(prediction)
                if self.__debug:
                    frame = cv2.rectangle(
                        frame, (box[0], box[1]), (box[2], box[3]),
                        (255, 0, 0))  # Draw a rectangle arround the face
                    cv2.putText(frame, f'{prediction[0]}',
                                (int(box[0]), int(box[1] - 10)),
                                cv2.FONT_HERSHEY_COMPLEX, 1, (200, 0, 0))
Пример #13
0
    def preprocessing(self, path):
        video = mmcv.VideoReader(path)
        frame0 = Image.fromarray(
            cv2.cvtColor(video[0], cv2.COLOR_BGR2RGB)
        )  # Assumption - First frame has all the speaker's faces.

        bouding_box, prob = self.mtcnn.detect(frame0)
        print(bouding_box)
        print(prob)

        self.total_people = 0
        for box in bouding_box:
            x1, y1, x2, y2 = box
            if (x1 > x2):
                x1, x2 = x2, x1
            if (y1 > y2):
                y1, y2 = y2, y1

            print(x1, y1)
            print(x2, y2)

            cropped = frame0.crop((x1, y1, x2, y2))

            cropped.save("preprocessing/faces/Cropped" +
                         str(self.total_people) + ".png")

            cropped_tensors = extract_face(frame0, (x1, y1, x2, y2)).to(
                self.device).view(-1, 3, 160, 160)
            cropped_tensors = self.resnet(cropped_tensors)
            cropped_tensors = cropped_tensors.detach()
            if self.device.type == "cuda":
                cropped_tensors = cropped_tensors.cpu()
            cropped_tensors = cropped_tensors.numpy()
            self.embeddings_initial.append(cropped_tensors)
            self.total_people += 1

        # Seperate video with no audio

        command = f'ffmpeg -i {path} -r 25 -c copy -an preprocessing/video/a.mp4'
        subprocess.call(command, shell=True)
Пример #14
0
    def detectAndCrop(self, frame):

        if isinstance(frame, np.ndarray):
            frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

        mtcnn_module = MTCNN(keep_all=True)
        boxes, prob = mtcnn_module.detect(frame)
        faces = []
        count = 0
        for box, prob in zip(boxes, prob):
            if prob > self.threshold:
                face = extract_face(frame, box)
                print("Face #{} detected with probability : {}".format(
                    count + 1, prob))
                faces.append({"bbox": box, "prob": prob})
                count = count + 1
                if self.saveIn is not None:
                    img = self.toPIL(face).convert('RGB')
                    img.save(
                        os.path.join(self.saveIn, "face_{}.jpg".format(count)))

        return faces
Пример #15
0
 def __call__(self, frame) -> NoReturn:
     mtcnn = MTCNN(
         keep_all=True,
         min_face_size=100,
         image_size=160,
         margin=14,
         selection_method="center_weighted_size",
         post_process=True,
         device=self.device,
     )
     boxes, probs = mtcnn.detect(frame)
     faces = []
     if boxes is None:
         return faces
     for i, box in enumerate(boxes):
         if probs[i] < 0.93:
             continue
         box = box.astype(int)
         faces.append(
             Face(box=box,
                  labels={},
                  image_tensor=fixed_image_standardization(
                      extract_face(frame, box))))
     return faces
Пример #16
0
    # Checar se há GPU disponível
    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))

    # Definir parâmetros do módulo MTCNN
    mtcnn = MTCNN(keep_all=False, device=device, post_process=False)

    # Obter lista de arquivos e diretorios
    fname, dname = listar_imagens(basedir)

    # Detectar faces e salvar na pasta facecrops
    inicio = time.time()
    print('Processamento iniciado')
    facecrop = [it.replace(basedir, basedir+'_faces') for it in fname]
    for f, filename in enumerate(fname):
        try:
            img = Image.open(filename)
            box, prob = mtcnn.detect(img)
        except:
            print('Falha no processamento do arquivo '+filename)
            continue
        if prob[0] and prob[0] >= 0.95:
            savepath = '/projects/jeff/TUMGAIDimage_facecrops3' + '' + \
                os.path.dirname(filename)[-9:]+'-'+os.path.basename(filename)
            extract_face(img, box[0], save_path=savepath)
    print('Processamento concluido')
    print(time.strftime('%H:%M:%S', time.localtime()))
    tempo_total = time.time() - inicio
    print("Tempo total: %02dm:%02ds" % divmod(tempo_total, 60))
Пример #17
0
    def detect(self,
               img_ls,
               crop_size=None,
               mode='Extract_largest',
               save_faces=False,
               save_annotate=False,
               save_path='face_result'):
        """face detection

        Args:
            img_ls (list): list of array
            crop_size (tuple, optional): crop images with (left, top, right, bottom). Defaults to None.
            mode (str, optional): There're 3 modes, 'Detect', 'Detect_bool', and 'Extract'. 
                                    If you only want to know whether there're any faces, use 'Detect_bool' mode. 
                                    If you want to get boxes and probs of faces, use 'Detect'.
                                    If you want to get all information about faces, use 'Extract'.
                                    Defaults to 'Detect_bool'.
            face_num (int, optional): Number of faces to be extracted. Defaults to 1.
            save_faces (bool, optional): For 'Extract' mode. Defaults to False.
            save_annotate (bool, optional): For 'Extract' mode. Save images with annotations. Defaults to False.

        Returns:
            tuple: depends on the mode.

        """
        if crop_size:
            for i, img in enumerate(img_ls):
                img_ls[i] = img.crop(crop_size)

        try:
            boxes, probs = self.mtcnn.detect(img_ls)
        except Exception as e:
            print(
                f'{e} \n...add crop_size=(left, top, right, bottom) to make images the same'
            )

        if mode == 'Detect_bool':
            return isinstance(boxes, np.ndarray)
        elif mode == 'Detect':
            return boxes, probs
        elif 'Extract' in mode:
            faces = []
            annotates = []
            boxes = boxes.tolist()
            probs = probs.tolist()
            for id_, img in enumerate(img_ls):
                face_batch = []
                img_annotate = img.copy()
                draw = ImageDraw.Draw(img_annotate)
                box_all = boxes[id_]
                if mode == 'Extract_largest':
                    for i, box in enumerate(box_all):
                        left = max(0, box[0])
                        top = max(0, box[1])
                        right = min(np.array(img_ls[id_]).shape[1], box[2])
                        down = min(np.array(img_ls[id_]).shape[0], box[3])
                        box_all[i] = [left, top, right, down]
                    area = list(map(self._cal_area, box_all))
                    max_id = area.index(max(area))
                    box = box_all[max_id]
                    box_head = [
                        box[0] - box[0] / 8, box[1] - box[1] / 5,
                        box[2] + box[2] / 8, box[3] + box[3] / 10
                    ]
                    boxes[id_] = [box_head]
                    probs[id_] = [probs[id_][max_id]]

                    draw.rectangle(box_head, width=5)
                    if save_faces:
                        if not os.path.exists(save_path):
                            os.mkdir(save_path)
                        if not os.path.exists(os.path.join(save_path,
                                                           'faces')):
                            os.mkdir(os.path.join(save_path, 'faces'))
                        face_batch.append(
                            extract_face(img,
                                         box_head,
                                         save_path=os.path.join(
                                             save_path,
                                             f'detected_face_{id_}-{0}.png')))
                    else:
                        face_batch.append(extract_face(img, box_head))
                elif mode == 'Extract_all':
                    for i, box in enumerate(box_all):
                        box_head = [
                            box[0] - box[0] / 3, box[1] - box[1] / 3,
                            box[2] + box[2] / 83, box[3] + box[3] / 10
                        ]
                        box_all[i] = box_head
                        draw.rectangle(box_head, width=5)  # box.tolist()
                        if save_faces:
                            if not os.path.exists(save_path):
                                os.mkdir(save_path)
                            if not os.path.exists(
                                    os.path.join(save_path, 'faces')):
                                os.mkdir(os.path.join(save_path, 'faces'))
                            face_batch.append(
                                extract_face(
                                    img,
                                    box_head,
                                    save_path=os.path.join(
                                        save_path,
                                        f'detected_face_{id_}-{i}.png')))
                        else:
                            face_batch.append(extract_face(img, box_head))
                else:
                    print(f"Error: there's no mode called {mode}")
                faces.append(face_batch)
                annotates.append(np.asarray(img_annotate))
                if save_annotate:
                    if not os.path.exists(save_path):
                        os.mkdir(save_path)
                    if not os.path.exists(
                            os.path.join(save_path, 'annotations')):
                        os.mkdir(os.path.join(save_path, 'annotations'))
                    img_annotate.save(
                        os.path.join(save_path, f'annotated_faces_{id_}.png'))
            return np.asarray(boxes), probs, annotates, faces
        else:
            print(f"Error: there's no mode called {mode}")
Пример #18
0
    def detect_live(self):
        
        mtcnn = MTCNN()
        faces = {}
        frameCount = 0

        vid = cv2.VideoCapture(0)

        if self.record_for is not None : 
            start_time = time.time()

        while vid.isOpened():

            if self.record_for is not None :
                curr_time = time.time() - start_time
                if curr_time > self.record_for :
                    break                 
        
            _, frame = vid.read()
            frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
            frameCount = frameCount + 1

            boxes, probs = mtcnn.detect(frame)

            frame_draw = frame.copy()
            draw = ImageDraw.Draw(frame_draw)
            if boxes is not None:

                faces["frame_{}".format(frameCount)] = []

                for box, p in zip(boxes, probs) : 
                    if p > 0.70 :
                        draw.rectangle(box.tolist(), outline = (255, 0, 0), width = 1)
                    if self.extract == True :
                        face = extract_face(frame, box.tolist())
                        faces["frame_{}".format(frameCount)].append(face)
                        if self.save == True :
                            img = self.tsfms(face)

                            if self.saveIn is None :
                                raise ValueError

                            else :
                                img.save(os.path.join(self.saveIn, "frame_{}.jpg".format(len(faces))))

                cv2.imshow("Tracking window", cv2.cvtColor(np.array(frame_draw), cv2.COLOR_RGB2BGR))
                if self.save_video == True : 
                    self.frames_tracked.append(frame_draw)                
                if cv2.waitKey(1) == ord("a") :
                    break
                

        
        vid.release()
        
        if self.save_video == True:
            print(len(self.frames_tracked))
            self.saveVideo(self.saveIn, self.frames_tracked, "trackedVid")

        if self.save == True :
            return len(faces.keys()), faces
        else :
            return None, None         
Пример #19
0
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

mtcnn = MTCNN(keep_all=True, device=device)

frames = []
#files = glob.glob("/home/jeff/datasets/TUM Gait/data_person1+2/image/p001/b01/*")
files = glob.glob("reid/b01/*")
for myFile in files:
    fileName = os.path.splitext(os.path.basename(myFile))[0]

    img = Image.open(myFile)

    boxes, probs, points = mtcnn.detect(img, landmarks=True)
    if boxes is not None:
        # Draw boxes and save faces
        img_draw = img.copy()
        draw = ImageDraw.Draw(img_draw)
        for i, (box, point) in enumerate(zip(boxes, points)):
            draw.rectangle(box.tolist(), width=5)
            for p in point:
                #draw.rectangle((p - 10).tolist() + (p + 10).tolist(), width=10)
                draw.point(p)
            extract_face(
                img,
                box,
                save_path='reid/output/detected_face_{}_{}.png'.format(
                    fileName, i))
        img_draw.save('reid/output/annotated_faces_{}.png'.format(fileName))
Пример #20
0
def main():
    # Read options
    opt = TestOptions().parse(save=False)
    # If demo directory to save generated frames is given
    if opt.demo_dir is not None and not os.path.exists(opt.demo_dir):
        os.makedirs(opt.demo_dir)

    # hardcoded constant values
    opt.nThreads = 0
    opt.batchSize = 1
    opt.serial_batches = True
    # GPU id to be used for mxnet/reconstructor
    opt.gpu_id = opt.gpu_ids[-1]
    # Device to be used for MTCNN face detector
    detector_device = 'cpu'
    # Face bounding box margin
    margin = 120
    # How many frames from the target's training video
    # to consider when gathering head pose and eye size statistics
    n_frames_target_used = 1000
    # How many of the first source frames to consider for eye size adaptation
    # between source and target.
    n_frames_init = 25
    # For cuda initialization errors.
    torch.multiprocessing.set_start_method('spawn', force=True)

    # Initialize video renderer.
    modelG = create_model(opt)
    # Initialize NMFC renderer.
    renderer = NMFCRenderer(opt)
    # Initialize face detector.
    detector = MTCNN(image_size=opt.loadSize,
                     margin=margin,
                     post_process=False,
                     device=detector_device)
    # Initialize landmark extractor.
    dlib_detector = dlib.get_frontal_face_detector()
    dlib_predictor = dlib.shape_predictor(
        'preprocessing/files/shape_predictor_68_face_landmarks.dat')

    # Read the identity parameters from the target person.
    id_params, _ = read_params(
        'id', os.path.join(opt.dataroot, 'train', 'id_coeffs'),
        opt.target_name)
    # Read camera parameters from target
    t_cam_params, _ = read_params('cam',
                                  os.path.join(opt.dataroot, 'train', 'misc'),
                                  opt.target_name)
    t_cam_params = t_cam_params[:n_frames_target_used]
    # Read eye landmarks from target's video.
    eye_landmarks_target = read_eye_landmarks(
        os.path.join(opt.dataroot, 'train', 'landmarks70'), opt.target_name)
    eye_landmarks_target[0] = eye_landmarks_target[0][:n_frames_target_used]
    eye_landmarks_target[1] = eye_landmarks_target[1][:n_frames_target_used]

    # Setup camera capturing
    window_name = 'Hea2Head Demo'
    video_capture = cv2.VideoCapture(0)
    video_capture.set(cv2.CAP_PROP_BUFFERSIZE,
                      2)  # set double buffer for capture
    fps = video_capture.get(cv2.CAP_PROP_FPS)
    print("Video capture at {} fps.".format(fps))

    proccesses = []

    # Face tracker / detector
    box_redecect_nframes = opt.box_redetect_nframes
    box = None  # Face bounding box, calculated by first frame

    # Face reconstructor / NMFC renderer
    nmfc = None  # Current nmfc image
    s_cam_params = []  # camera parameters of source video.
    adapted_cam_params = [
    ]  # camera parameters of source video, adapted to target.

    # Facial (eyes) landmarks detector
    prev_eye_centres = None  # Eye centres in previous frame
    eye_landmarks = None  # Final eye landmarks, send to video renderer.
    eye_landmarks_source = [
        [], []
    ]  # Eye landmarks from n_frames_init first frames of source video.
    eye_landmarks_source_queue = Queue(
    )  # Queue to write extracted eye landmarks from source video.
    landmarks_success_queue = Queue(
    )  # Queue to write whether eye landmark detection was successful
    frames_queue = Queue(
    )  # Queue for writing video frames, read by the landmark detector process.
    # Process for running 68 + 2 landmark detection in parallel with Face reconstruction / NMFC renderering
    proccess_eye_landmarks = Process(
        target=compute_eye_landmarks,
        args=(dlib_detector, dlib_predictor, eye_landmarks_source_queue,
              landmarks_success_queue, frames_queue))
    proccess_eye_landmarks.start()
    proccesses.append(proccess_eye_landmarks)
    print('Launced landmark extractor!')

    # Video renderer (GAN).
    input_queue = torchQueue()  # Queue of GAN's input
    output_queue = torchQueue()  # Queue of GAN's output
    # Process for running the video renderer without waiting NMFC + eye lands creation.
    proccess_video_renderer = torchProcess(target=compute_fake_video,
                                           args=(input_queue, output_queue,
                                                 modelG, opt))
    proccess_video_renderer.start()
    proccesses.append(proccess_video_renderer)
    print('Launced video renderer!')

    camera = None
    if opt.realtime:
        try:
            import pyfakewebcam
            stream_id = opt.realtime_cam_id
            webcam_width = webcam_height = opt.loadSize
            camera = pyfakewebcam.FakeWebcam(f'/dev/video{stream_id}',
                                             webcam_width, webcam_height)
            camera.print_capabilities()
            print(f'Fake webcam created on /dev/video{stream_id}.')
        except Exception as ex:
            print('Fake webcam initialization failed:')
            print(str(ex))

    iter = 0
    # Start main Process (Face reconstruction / NMFC renderering)
    while True:
        t0 = time.perf_counter()
        try:  # Read generated frames from video renderer's output Queue.
            # Non-blocking
            fake_frame, real_frame = output_queue.get_nowait()
            result = np.concatenate([real_frame, fake_frame[..., ::-1]],
                                    axis=1)
            # If output directory is specified save frames there.
            if opt.demo_dir is not None:
                result_path = os.path.join(opt.demo_dir,
                                           "{:06d}".format(iter) + '.png')
                cv2.imwrite(result_path, result)
            elif camera is not None:
                camera.schedule_frame(fake_frame)
            else:
                cv2.imshow(window_name, result)
                cv2.waitKey(1)
        except queue.Empty:  # If empty queue continue.
            pass
        # Read next frame
        _, frame = video_capture.read()
        # Crop the larger dimension of frame to make it square
        frame = make_frame_square(frame)

        if box_redecect_nframes > 0 and iter % box_redecect_nframes == 0:
            box = None
        # If no bounding box has been detected yet, run MTCNN (once in first frame)
        if box is None:
            box = detect_box(detector, frame)
        # If no face detected exit.
        if box is None:
            break
        # Crop frame at the point were the face was seen in the first frame.
        frame = extract_face(frame, box, opt.loadSize, margin)
        frame = tensor2npimage(frame)
        frame = np.transpose(frame, (1, 2, 0))
        # Send ROI frame to landmark detector, while the main Process performs face reconstruction.
        frames_queue.put(frame)
        # Get expression and pose, adapt pose and identity to target and render NMFC.
        success, s_cam_params, adapted_cam_params, new_nmfc = \
            compute_reconstruction(renderer, id_params, t_cam_params, s_cam_params,
                                   adapted_cam_params, frame)
        # Update the current NMFC if reconstruction was successful
        if success:
            nmfc = new_nmfc
        # If not, use previous nmfc. If it does not exist, exit.
        if not success and nmfc is None:
            break
        # Find eye centres using nmfc image.
        eye_centres, prev_eye_centres = search_eye_centres([nmfc[:, :, ::-1]],
                                                           prev_eye_centres)
        # Read Queue to get eye landmarks, if detection was successful.
        if landmarks_success_queue.get():
            eye_landmarks = eye_landmarks_source_queue.get()
        # If not, use previous eye landmarks. If they do not exist, exit.
        if eye_landmarks is None:
            break
        # If in first frames, determine the source-target eye size (height) ratio.
        if iter < n_frames_init:
            eye_landmarks_source[0].append(eye_landmarks[0])
            eye_landmarks_source[1].append(eye_landmarks[1])
            eye_ratios = compute_eye_landmarks_ratio(eye_landmarks_source,
                                                     eye_landmarks_target)
        # Adapt the eye landmarks to the target face, by placing to the eyes centre
        # and re-scaling their size to match the NMFC size and target eyes mean height (top-down distance).
        eye_lands = adapt_eye_landmarks(
            [[eye_landmarks[0]], [eye_landmarks[1]]], eye_centres, eye_ratios,
            s_cam_params[-1:], adapted_cam_params[-1:])
        # Send the conditional input to video renderer
        input_queue.put((nmfc, eye_lands[0], frame))
        iter += 1
        # Show frame rate.
        t1 = time.perf_counter()
        dt = t1 - t0
        print('fps: %0.2f' % (1 / dt))

    # Terminate proccesses and join
    for process in proccesses:
        process.terminate()
        process.join()

    renderer.clear()
    print('Main process exiting')
Пример #21
0
def input_face_embeddings(frames: Union[List[str], np.ndarray],
                          is_path: bool,
                          mtcnn: MTCNN,
                          resnet: InceptionResnetV1,
                          face_embed_cuda: bool,
                          use_half: bool,
                          coord: List,
                          name: str = None,
                          save_frames: bool = False) -> torch.Tensor:
    """
        Get the face embedding

        NOTE: If a face is not detected by the detector,
        instead of throwing an error it zeros the input
        for embedder.

        NOTE: Memory hungry function, hence the profiler.

        Args:
            frames: Frames from the video
            is_path: Whether to read from filesystem or memory
            mtcnn: face detector
            resnet: face embedder
            face_embed_cuda: use cuda for model
            use_half: use half precision

        Returns:
            emb: Embedding for all input frames
    """
    if face_embed_cuda:
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")
    result_cropped_tensors = []
    no_face_indices = []
    for i, f in enumerate(frames):
        if is_path:
            frame = Image.open(f)
        else:
            frame = Image.fromarray(f.astype("uint8"))

        with torch.no_grad():
            cropped_tensors = None
            height, width, c = f.shape
            bounding_box, prob = mtcnn.detect(frame)

            if bounding_box is not None:
                for box in bounding_box:
                    x1, y1, x2, y2 = box
                    if (x1 > x2):
                        x1, x2 = x2, x1
                    if (y1 > y2):
                        y1, y2 = y2, y1

                    #for point in coord:
                    x, y = coord[0], coord[1]
                    x *= width
                    y *= height
                    if (x >= x1 and y >= y1 and x <= x2 and y <= y2):
                        cropped_tensors = extract_face(frame, box)
                        #print("found", box, x, y, end='\r')
                        break

        if cropped_tensors is None:
            #Face not detected, for some reason
            cropped_tensors = torch.zeros((3, 160, 160))
            no_face_indices.append(i)

        if save_frames:
            name = name.replace(".mp4", "")
            saveimg = cropped_tensors.detach().cpu().numpy().astype("uint8")
            saveimg = np.squeeze(saveimg.transpose(1, 2, 0))
            Image.fromarray(saveimg).save(f"{name}_{i}.png")

        result_cropped_tensors.append(cropped_tensors.to(device))

    if len(no_face_indices) > 20:
        #few videos start with silence, allow 0.5 seconds of silence else remove
        return None
    del frames
    #Stack all frames
    result_cropped_tensors = torch.stack(result_cropped_tensors)
    #Embed all frames
    result_cropped_tensors = result_cropped_tensors.to(device)
    if use_half:
        result_cropped_tensors = result_cropped_tensors.half()

    with torch.no_grad():
        emb = resnet(result_cropped_tensors)
    if use_half:
        emb = emb.float()
    return emb.to(cpu_device)
Пример #22
0
def calcEmbedsRec(urlNew):

    #initialize identified names
    recognized_names = []

    print('Received url: ', urlNew)
    device = torch.device('cuda:0')
    print('Running on device: {}'.format(device))

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device)

    #Function takes 2 vectors 'a' and 'b'
    #Returns the cosine similarity according to the definition of the dot product
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    # Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    # Define a dataset and data loader
    dataset = datasets.ImageFolder('student_data/Test')
    dataset.idx_to_class = {i: c for c, i in dataset.class_to_idx.items()}
    loader = DataLoader(dataset, collate_fn=lambda x: x[0])

    #Perfom MTCNN facial detection
    #Detects the face present in the image and prints the probablity of face detected in the image.
    aligned = []
    names = []
    for x, y in loader:
        x_aligned, prob = mtcnn(x, return_prob=True)
        if x_aligned is not None:
            print('Face detected with probability: {:8f}'.format(prob))
            aligned.append(x_aligned)
            names.append(dataset.idx_to_class[y])

    # Calculate the 512 face embeddings
    aligned = torch.stack(aligned).to(device)
    embeddings = resnet(aligned).to(device)

    # Print distance matrix for classes.
    #The embeddings are plotted in space and cosine distace is measured.
    cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
    for i in range(0, len(names)):
        emb = embeddings[i].unsqueeze(0)
        # The cosine similarity between the embeddings is given by 'dist'.
        dist = cos(embeddings[0], emb)

    dists = [[cos(e1, e2).item() for e2 in embeddings] for e1 in embeddings]
    # The print statement below is
    #Helpful for analysing the results and for determining the value of threshold.
    print(pd.DataFrame(dists, columns=names, index=names))

    i = 1
    # Haarcascade Classifier is used to detect faces through webcam.
    #It is preffered over MTCNN as it is faster. Real time basic applications needs to be fast.
    classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

    #Takes 2 vectors 'a' and 'b' .
    #Returns the cosine similarity according to the definition of the dot product.
    def cos_sim(a, b):
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)

    #cos_sim returns real numbers,where negative numbers have different interpretations.
    #So we use this function to return only positive values.
    def cos(a, b):
        minx = -1
        maxx = 1
        return (cos_sim(a, b) - minx) / (maxx - minx)

    #This is the function for doing face recognition.
    def verify(embedding, start_rec_time):
        for i, k in enumerate(embeddings):
            for j, l in enumerate(embedding):
                #Computing Cosine distance.
                dist = cos(k, l)

                #Chosen threshold is 0.85
                #Threshold is determined after seeing the table in the previous cell.
                if dist > 0.8:
                    #Name of the person identified is printed on the screen, as well as below the detecetd face (below the rectangular box).
                    text = names[i]

                    #textOnImg = text + " - Time Elapsed: " +  str(int(time.time() - start_rec_time)) + " s"
                    cv2.putText(img1, text, (boxes[j][0].astype(int),
                                             boxes[j][3].astype(int) + 17),
                                cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0),
                                2)
                    #cv2.putText(img1, textOnImg, (20, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,0,0), 2)
                    print(text)

                    #if text in names:
                    recognized_names.append(text)
                #else:
                textOnImg = "Time Elapsed: " + str(
                    int(time.time() - start_rec_time)) + " s"
                cv2.putText(img1, textOnImg, (20, 20),
                            cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)

    #Define Inception Resnet V1 module (GoogLe Net)
    resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

    mtcnn = MTCNN(image_size=160,
                  margin=0,
                  min_face_size=20,
                  thresholds=[0.6, 0.7, 0.7],
                  factor=0.709,
                  prewhiten=True,
                  device=device,
                  keep_all=True)

    #Camera is opened. Webcam video streaming starts.
    #vs = WebcamVideoStream(src=0).start()
    print("Camera on")
    cv2.namedWindow("Detected faces")

    options = {
        "CAP_PROP_FRAME_WIDTH": 640,
        "CAP_PROP_FRAME_HEIGHT": 480,
        "CAP_PROP_FPS ": 30
    }
    output_params = {"-fourcc": "MJPG", "-fps": 30}
    writer = WriteGear(output_filename='Output.mp4',
                       compression_mode=False,
                       logging=True,
                       **output_params)
    #stream = VideoGear(source=0, time_delay=1, logging=True, **options).start()

    #url = "http://192.168.43.223:8080/shot.jpg"
    url = urlNew

    #run face recognition for 1 minute
    start_face_rec = time.time()
    end_face_rec = time.time() + 60

    while (time.time() < end_face_rec):

        # frm = stream.read()
        # if frm is None:
        #     break

        img_resp = requests.get(url)
        img_arr = np.array(bytearray(img_resp.content), dtype=np.uint8)

        img = cv2.imdecode(img_arr, -1)

        #im= vs.read()
        #Flip to act as a mirror

        im = cv2.flip(img, 1)

        #try:
        #The resize function of imutils maintains the aspect ratio
        #It provides the keyword arguments width and heightso the image can be resized to the intended width/height
        frame = imutils.resize(im, width=400)

        #Detecting faces using Haarcascade classifier.

        winlist = pcn.detect(frame)
        img1 = pcn.draw(frame, winlist)
        face = list(map(lambda win: crop_face(img1, win, 160), winlist))
        face = [f[0] for f in face]
        #cv2.imshow('Live Feed', img1)
        cnt = 1
        for f in face:
            #fc, u = crop_face(img, f)
            print('Printing Face no: ', cnt)
            cv2.imshow('Detected faces', f)
            cnt += 1

            #faces = classifier.detectMultiScale(face)
            path = "./student_data/Pics/".format(i)
            img_name = "image_{}.jpg".format(i)
            #The captured image is saved.
            cv2.imwrite(os.path.join(path, img_name), f)
            imgName = "./student_data/Pics/image_{}.jpg".format(i)

            # Get cropped and prewhitened image tensor
            img = Image.open(imgName)
            i = i + 1
            img_cropped = mtcnn(img)
            boxes, prob = mtcnn.detect(img)
            img_draw = img.copy()
            draw = ImageDraw.Draw(img_draw)
            #print(boxes)
            #Rectangular boxes are drawn on faces present in the image.
            #The detected and cropped faces are then saved.
            if (boxes is not None):
                for i, box in enumerate(boxes):
                    #draw.rectangle(box.tolist())
                    extract_face(
                        img,
                        box,
                        save_path='./student_data/Pics/Cropped_Face_{}.jpg'.
                        format(i))
                img_draw.save('./student_data/Pics/Faces_Detected.jpg')
                ima = cv2.imread('./student_data/Pics/Faces_Detected.jpg')

                #Calculate embeddings of each cropped face.
            if (img_cropped is not None):
                img_embedding = resnet(img_cropped.cuda()).to(device)

                #Call function verify.
                #Identify the person with the help of embeddings.
                cos_sim = nn.CosineSimilarity(dim=-1, eps=1e-6)
                verify(img_embedding, start_face_rec)
            #else:
            #textForImg = "Time Elapsed: " +  str(int(time.time() - start_face_rec)) + " s"
            #cv2.putText(frame, textForImg, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255,255,255), 2)

            #'Detecting..' window opens.
            #Rectangular boxes are drawn on detected faces.
            #The identified faces have their respective name below the box.
            cv2.imshow('Detecting...', img1)
            writer.write(img1)

        if (not face):
            #cv2.imshow(f"Time Elapsed: ${str(int(time.time() - start_face_rec))}  s" ,frame)
            textForImg = "Time Elapsed: " + str(
                int(time.time() - start_face_rec)) + " s"
            cv2.putText(img1, textForImg, (40, 40),
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 0, 0), 2)
            #print("no face")
            cv2.imshow('Detecting...', img1)
        # except:
        #     #In case 'try' doesn't work, "Get the image embedding" text is printed on the screen.
        #     #Run first cell
        #     text="Get the image embeddings"
        #     print(text)
        #     break

        key = cv2.waitKey(1)

        #13 is for 'Enter' key.
        #If 'Enter' key is pressed, all the windows are made to close forcefully.
        if key == 13:
            break

    print("calculating a list of all recognized faces...")

    rec_names_dict = {i: recognized_names.count(i) for i in recognized_names}

    filtered_names = []
    for key in rec_names_dict:
        if rec_names_dict[key] > 30:
            filtered_names.append(key)

    print("Total Recognized names: ", rec_names_dict)

    print("Filtered names: ", filtered_names)

    cv2.destroyAllWindows()
    writer.close()
    #vs.stop()
    #return {i:rec_names_dict[i] for i in filtered_names}
    return filtered_names
Пример #23
0
def upload_file():
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # if user does not select file, browser also
        # submit an empty part without filename
        if request.files['file'].filename == '':
            #flash('No selected file')
            return render_template('notselected.html',message = 'File is not selected')
            
        
        
        if 'jpg' in str(file.filename).lower() or 'jpeg' in str(file.filename).lower() or 'png' in str(file.filename).lower():
        
            if file and allowed_file(file.filename):
                filename = secure_filename(file.filename)        
                filedir = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                file.save(filedir)
                filek = '/static/uploads/'+filename
                img = Image.open(filedir)
                boxes, probs = mtcnn.detect(img)
                # Draw boxes and save faces
                img_draw = img.copy()
                draw = ImageDraw.Draw(img_draw)
                if boxes is not None:
                    for j, box in enumerate(boxes):
                        extract_face(img, box, save_path='detected/detected_face_{}.png'.format(j)) 
                        with open(str(APP_ROOT) +'/detected/detected_face_{}.png'.format(j), 'rb') as f:
                            image_bytes = f.read()
                            pred_idx = get_prediction(image_bytes=image_bytes)
                            if int(pred_idx) == 0:
                                draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green'))
                            else:
                                draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red'))
                    img_draw.save(filedir)

                
        if 'mp4' in str(file.filename).lower() or 'mov' in str(file.filename).lower():
        
            if file and allowed_file(file.filename):
                filename = secure_filename(file.filename)
                filedir = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                file.save(filedir)
                filek = '/static/uploads/'+filename
                
                video = mmcv.VideoReader(filedir)
                frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video]
                last_predict = []
                box_list = []
                frames_tracked = []
                # loop through frames 
                for i, frame in enumerate(frames):
                    
                    width, height = frame.size
                    # Frequency to detect
                    if i % frequency == 0:
                        last_predict.clear()
                        # Detect faces
                        boxes, prob = mtcnn.detect(frame)
                        
                        box_list.append(boxes)
                        # Draw boxes and save faces
                        frame_draw = frame.copy()
                        draw = ImageDraw.Draw(frame_draw)
                        # Check if there is detection
                        if boxes is not None:
                            #Loop through all detections
                            for j, box in enumerate(boxes):
                                extract_face(frame, box, save_path='detected/detected_face_{}.png'.format(j))
                                with open(str(APP_ROOT) +'/detected/detected_face_{}.png'.format(j), 'rb') as f:
                                    image_bytes = f.read()
                                    pred_idx = get_prediction(image_bytes=image_bytes)
                                    last_predict.append(pred_idx)
                                    #blist = box.tolist()
                                    
                                    if int(pred_idx) == 0:
                                        draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green'))
                                        #draw.text((blist[2], blist[3]), 'Mask', fill='green', font = ImageFont.truetype("/content/ArialMT.ttf",20))
                                        #draw.text((blist[2], blist[3]-30), str(prob), fill='green', font = ImageFont.truetype("/content/ArialMT.ttf",20))
                                    else:
                                        draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red'))
                                        #draw.text((blist[2], blist[3]), 'Without mask', fill='red', font = ImageFont.truetype("/content/ArialMT.ttf",20))
                                        #draw.text((blist[2], blist[3]-30), str(prob), fill='red', font = ImageFont.truetype("/content/ArialMT.ttf",20))
                                       
                            frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR))
                        # If not detected            
                        else:
                            frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR))
                            
                    # If this is not the frequency to detect
                    else:
                        # If there were detections in previous frame
                        if box_list[-1] is not None:
                            boxes = box_list[-1]
                            # Draw boxes and save faces
                            frame_draw = frame.copy()
                            draw = ImageDraw.Draw(frame_draw)
                            
                            # If there were detections in previous frame
                            if boxes is not None:
                                
                                for j, box in enumerate(boxes):
                                    
                                    if int(last_predict[j]) == 0:
                                        draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('green'))
                                    else:
                                        draw.rectangle(box.tolist(), width=10,outline=ImageColor.getrgb('red'))
                                        
                                    
                        
                                frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR))
                        else:      
                                
                            # if no detections in previous frame add just a frame
                            frames_tracked.append(frame_draw.resize((width, height), Image.BILINEAR))
                                
                
                dim = frames_tracked[0].size
                fourcc = cv2.VideoWriter_fourcc(*'FMP4')    
                video_tracked = cv2.VideoWriter(filename, fourcc, 25.0, dim)
                for frame in frames_tracked:
                    video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
                video_tracked.release()
                
                os.replace((str(APP_ROOT)+ '/' + filename), (str(APP_ROOT) +filek))
                
                                        
        return render_template('out.html', filek=filek)
            
    return render_template('index.html')
Пример #24
0
def _get_emb(image, box):
    """Return facial embeddings from given image inside the box."""
    cropped_face = extract_face(image, box)
    cropped_face = prewhiten(cropped_face)

    return resnet(cropped_face.unsqueeze(0))[0].detach()