Пример #1
0
    def face_detect(self, images):
        detector = face_detection.FaceAlignment(
            face_detection.LandmarksType._2D,
            flip_input=False,
            face_detector=self.args.face_detector)

        batch_size = self.args.face_det_batch_size

        while 1:
            predictions = []
            try:
                for i in tqdm(range(0, len(images), batch_size)):
                    predictions.extend(
                        detector.get_detections_for_batch(
                            np.array(images[i:i + batch_size])))
            except RuntimeError:
                if batch_size == 1:
                    raise RuntimeError(
                        'Image too big to run face detection on GPU. Please use the --resize_factor argument'
                    )
                batch_size //= 2
                print('Recovering from OOM error; New batch size: {}'.format(
                    batch_size))
                continue
            break

        results = []
        pady1, pady2, padx1, padx2 = self.args.pads
        for rect, image in zip(predictions, images):
            if rect is None:
                cv2.imwrite(
                    'temp/faulty_frame.jpg',
                    image)  # check this frame where the face was not detected.
                raise ValueError(
                    'Face not detected! Ensure the video contains a face in all the frames.'
                )

            y1 = max(0, rect[1] - pady1)
            y2 = min(image.shape[0], rect[3] + pady2)
            x1 = max(0, rect[0] - padx1)
            x2 = min(image.shape[1], rect[2] + padx2)

            results.append([x1, y1, x2, y2])

        boxes = np.array(results)
        if not self.args.nosmooth:
            boxes = self.get_smoothened_boxes(boxes, T=5)
        results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)]
                   for image, (x1, y1, x2, y2) in zip(images, boxes)]

        del detector
        return results
Пример #2
0
    def extract_bbox(self, image):
        detector = face_detection.FaceAlignment(
            face_detection.LandmarksType._2D,
            flip_input=False,
            face_detector=self.face_detector)

        frame = [image]
        predictions = detector.get_detections_for_image(np.array(frame))
        person_num = len(predictions)
        if person_num == 0:
            return np.array([])
        results = []
        face_boxs = []
        h, w, _ = image.shape
        for rect in predictions:
            bh = rect[3] - rect[1]
            bw = rect[2] - rect[0]
            cy = rect[1] + int(bh / 2)
            cx = rect[0] + int(bw / 2)
            margin = max(bh, bw)
            y1 = max(0, cy - margin)
            x1 = max(0, cx - int(0.8 * margin))
            y2 = min(h, cy + margin)
            x2 = min(w, cx + int(0.8 * margin))
            area = (y2 - y1) * (x2 - x1)
            results.append([x1, y1, x2, y2, area])
        # if a person has more than one bbox, keep the largest one
        # maybe greedy will be better?
        sorted(results, key=lambda area: area[4], reverse=True)
        results_box = [results[0]]
        for i in range(1, person_num):
            num = len(results_box)
            add_person = True
            for j in range(num):
                pre_person = results_box[j]
                iou = self.IOU(pre_person[0], pre_person[1], pre_person[2],
                               pre_person[3], pre_person[4], results[i][0],
                               results[i][1], results[i][2], results[i][3],
                               results[i][4])
                if iou > 0.5:
                    add_person = False
                    break
            if add_person:
                results_box.append(results[i])
        boxes = np.array(results_box)
        return boxes
    def extract_bbox(self, image):
        detector = face_detection.FaceAlignment(
            face_detection.LandmarksType._2D, flip_input=False)

        frame = [image]
        predictions = detector.get_detections_for_image(np.array(frame))
        results = []
        h, w, _ = image.shape
        for rect in predictions:
            bh = rect[3] - rect[1]
            bw = rect[2] - rect[0]
            cy = rect[1] + int(bh / 2)
            cx = rect[0] + int(bw / 2)
            margin = max(bh, bw)
            y1 = max(0, cy - margin)
            x1 = max(0, cx - int(0.8 * margin))
            y2 = min(h, cy + margin)
            x2 = min(w, cx + int(0.8 * margin))
            results.append([x1, y1, x2, y2])
        boxes = np.array(results)
        return boxes
Пример #4
0
                    type=int)
parser.add_argument('--batch_size',
                    help='Single GPU Face detection batch size',
                    default=32,
                    type=int)
parser.add_argument("--data_root",
                    help="Root folder of the LRS2 dataset",
                    required=True)
parser.add_argument("--preprocessed_root",
                    help="Root folder of the preprocessed dataset",
                    required=True)

args = parser.parse_args()

fa = [
    face_detection.FaceAlignment(face_detection.LandmarksType._2D,
                                 flip_input=False)
]

template = 'ffmpeg -loglevel panic -y -i {} -strict -2 {}'
# template2 = 'ffmpeg -hide_banner -loglevel panic -threads 1 -y -i {} -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 {}'


def process_video_file(vfile, args, gpu_id):
    video_stream = cv2.VideoCapture(vfile)

    frames = []
    while 1:
        still_reading, frame = video_stream.read()
        if not still_reading:
            video_stream.release()
            break