示例#1
0
def predict(args):
    """
    Solution 1 prediction script for directory with images for smile and open mouth detection
    :param args: argparse arguments
    :return: prints inference measured time and two lists of images that have passed the filter
    """
    mtcnn = MTCNN()
    image_names = os.listdir(args.images_directory)
    smile_faces = list()
    open_mouth_faces = list()
    inference_measurements = list()

    with CustomObjectScope({'f1_score': f1_score}):
        model = load_model(
            '../nn_models/best_mobilenetv2_multiclassification.h5')

    for image_name in tqdm(image_names):
        image = JPEG(os.path.join(args.images_directory, image_name)).decode()
        start_time = time.time()
        bboxes = safe_detect_face_bboxes(image, mtcnn)

        if bboxes.shape[0] == 0:
            continue
        else:
            cropped_image = crop_image(image,
                                       bboxes.clip(min=0),
                                       bbox_number=0)
            cropped_image = imresize(cropped_image,
                                     (args.height, args.width)) / 255.
            predictions = model.predict(np.expand_dims(cropped_image, axis=0))
            inference_measurements.append(time.time() - start_time)
            predictions = [float(prediction) for prediction in predictions]

            if predictions[0] >= args.smile_prediction_threshold:
                smile_faces.append(image_name)

            if predictions[1] >= args.mouth_open_prediction_threshold:
                open_mouth_faces.append(image_name)

    print('\nAverage end to end inference time: {0} sec.'.format(
        np.round(np.mean(inference_measurements), 3)))

    print('\nIMAGES WITH SMILE')
    print('-----------------')
    for image in smile_faces:
        print('  {0}'.format(image))

    print('\nIMAGES WITH OPEN MOUTH')
    print('----------------------')
    for image in open_mouth_faces:
        print('  {0}'.format(image))
    def __getitem__(self, index):
        """
        Batch creation by index. Find face landmarks mouth, eye and eyebrows and turns them into a feature vector
        :param index: batch index
        :return: batch with face features and labels
        """
        batch_pathways = self.pathways[index * self.batch_size:(index + 1) *
                                       self.batch_size]
        batch_x = list()
        batch_y_smile = list()
        batch_y_open_mouth = list()

        for i, pathway in enumerate(batch_pathways):
            image = JPEG(pathway).decode()
            bboxes = safe_detect_face_bboxes(image, self.mtcnn)

            if bboxes.shape[0] == 0:
                continue
            else:
                facemarks_coords = detect_facemarks_coords(
                    image,
                    bboxes.clip(min=0),
                    facemark_predictor_init=self.facemark_predictor)
                cropped_facemarks_coords = crop_facemarks_coords(
                    facemarks_coords, bboxes, bbox_number=0)
                resized_cropped_facemarks_coords = resize_facemarks_coords(
                    cropped_facemarks_coords,
                    original_crop_shape=(bboxes[0][3], bboxes[0][2]),
                    target_crop_shape=self.crop_shape)
                face_features = resized_cropped_facemarks_coords[
                    self.target_facemarks, :].ravel()
                batch_x.append(face_features)
                batch_y_smile.append(self.pathways_with_smile_labels[pathway])
                batch_y_open_mouth.append(
                    self.pathways_with_open_mouth_labels[pathway])

        batch_x = np.asarray(batch_x)
        batch_x = StandardScaler().fit_transform(batch_x)
        batch_y_smile = np.asarray(batch_y_smile)
        batch_y_open_mouth = np.asarray(batch_y_open_mouth)

        return batch_x, {
            'smile_output': batch_y_smile,
            'open_mouth_output': batch_y_open_mouth
        }
示例#3
0
    def predict(self, image_pathways):
        """
        Predict labels on test images by finding threshold
        :param image_pathways: list of absolute image pathways
        :return: list with predicted class
        """
        if self.smile_deviations_sum_threshold:
            self.facemark_inference_measurements = list()
            self.predict_inference_measurements = list()
            predictions = list()

            for image_pathway in tqdm(image_pathways):
                image = JPEG(image_pathway).decode()

                start_facemarks_time = time.time()
                bboxes = safe_detect_face_bboxes(image,
                                                 self.mtcnn,
                                                 include_cnn=True)
                if bboxes.shape[0] == 0:
                    predictions.append(0)
                else:
                    facemarks_coords = detect_facemarks_coords(
                        image, bboxes.clip(min=0), self.facemark_predictor)
                    self.facemark_inference_measurements.append(
                        time.time() - start_facemarks_time)

                    start_predict_time = time.time()
                    lower_lip_points = self._calculate_line_points(
                        facemarks_coords[0], self.lower_lip_point_pairs)
                    deviations_sum = self._get_deviations_sum(lower_lip_points)
                    self.predict_inference_measurements.append(
                        time.time() - start_predict_time)


                    predictions.append(0)\
                        if deviations_sum < self.smile_deviations_sum_threshold\
                        else predictions.append(1)

            return np.asarray(predictions)

        else:
            raise ValueError(
                'Train or set the smile_deviations_sum_threshold value')
示例#4
0
    def fit(self, image_pathways, smile_labels):
        """
        Optimal threshold searching for images in train set using only lower lip landmarks
        :param image_pathways: list of absolute image pathways
        :param mouth_open_labels: labels
        :return:
        """
        if not self.smile_deviations_sum_threshold:
            smile_deviations_sum = list()
            not_smile_deviations_sum = list()

            for image_pathway, smile_label in tqdm(
                    zip(image_pathways, smile_labels)):
                image = JPEG(image_pathway).decode()
                bboxes = safe_detect_face_bboxes(image,
                                                 self.mtcnn,
                                                 include_cnn=True)

                if bboxes.shape[0] == 0:
                    continue
                else:
                    facemarks_coords = detect_facemarks_coords(
                        image,
                        bboxes.clip(min=0),
                        facemark_predictor_init=self.facemark_predictor)
                    lower_lip_points = self._calculate_line_points(
                        facemarks_coords[0], self.lower_lip_point_pairs)
                    deviations_sum = self._get_deviations_sum(lower_lip_points)
                    smile_deviations_sum.append(deviations_sum)\
                        if smile_label == 1\
                        else not_smile_deviations_sum.append(deviations_sum)

            self.smile_deviations_sum_threshold = _get_optimal_threshold(
                smile_deviations_sum,
                not_smile_deviations_sum,
                hist_bins=15,
                clip=0.06,
                round_to=3)
    def __getitem__(self, index):
        """
        Batch creation by index. Сuts part with the face for each image and apply augmentations
        :param index: batch index
        :return: batch with cropped images and labels
        """
        batch_pathways = self.pathways[index * self.batch_size:(index + 1) *
                                       self.batch_size]
        batch_x = list()
        batch_y_smile = list()
        batch_y_open_mouth = list()

        for i, pathway in enumerate(batch_pathways):
            image = JPEG(pathway).decode()
            bboxes = safe_detect_face_bboxes(image, self.mtcnn)

            if bboxes.shape[0] == 0:
                continue
            else:
                cropped_image = crop_image(image,
                                           bboxes.clip(min=0),
                                           bbox_number=0)
                batch_x.append(imresize(cropped_image, self.shape))
                batch_y_smile.append(self.pathways_with_smile_labels[pathway])
                batch_y_open_mouth.append(
                    self.pathways_with_open_mouth_labels[pathway])

        batch_x = np.asarray(batch_x, dtype=np.uint8)
        batch_y_smile = np.asarray(batch_y_smile)
        batch_y_open_mouth = np.asarray(batch_y_open_mouth)

        if self.augmentations_pipline:
            batch_x = self.augmentations_pipline.augment_images(batch_x)

        return batch_x / 255., {
            'smile_output': batch_y_smile,
            'open_mouth_output': batch_y_open_mouth
        }
示例#6
0
    def fit(self, image_pathways, mouth_open_labels):
        """
        Optimal threshold searching for images in train set using mouth landmarks
        :param image_pathways: list of absolute image pathways
        :param mouth_open_labels: labels
        :return:
        """
        if not self.mouth_aspect_ratio_threshold:
            open_mouth_mars = list()
            close_mouth_mars = list()

            for image_pathway, mouth_open_label in tqdm(
                    zip(image_pathways, mouth_open_labels)):
                image = JPEG(image_pathway).decode()
                bboxes = safe_detect_face_bboxes(image,
                                                 self.mtcnn,
                                                 include_cnn=True)

                if bboxes.shape[0] == 0:
                    continue
                else:
                    facemarks_coords = detect_facemarks_coords(
                        image,
                        bboxes.clip(min=0),
                        facemark_predictor_init=self.facemark_predictor)
                    mouth_aspect_ratio = self._get_mouth_aspect_ratio(
                        facemarks_coords[0])
                    open_mouth_mars.append(mouth_aspect_ratio)\
                        if mouth_open_label == 1\
                        else close_mouth_mars.append(mouth_aspect_ratio)

            self.mouth_aspect_ratio_threshold = _get_optimal_threshold(
                open_mouth_mars,
                close_mouth_mars,
                hist_bins=15,
                clip=0.3,
                round_to=2)
def predict(args):
    """
    Solution 3 prediction script for directory with images for smile and open mouth detection
    :param args: argparse arguments
    :return: prints inference measured time and two lists of images that have passed the filter
    """
    smile_faces = list()
    open_mouth_faces = list()
    facemark_inference_measurements = list()
    prediction_inference_measurements = list()
    image_names = os.listdir(args.images_directory)

    crop_shape = (args.height, args.width)
    target_facemarks = list(range(17, 27)) + list(range(36, 68))
    mtcnn = MTCNN()
    scaler = joblib.load('../models/solution_3_scaler.save')
    facemark_predictor = dlib.shape_predictor('../models/shape_predictor_68_face_landmarks.dat')

    with CustomObjectScope({'f1_score': f1_score}):
        model = load_model('../nn_models/best_mlp_multiclassification.h5')

    for image_name in tqdm(image_names):
        image = JPEG(os.path.join(args.images_directory, image_name)).decode()

        start_facemarks_time = time.time()
        bboxes = safe_detect_face_bboxes(image, mtcnn)
        if bboxes.shape[0] == 0:
            continue
        else:
            facemarks_coords = detect_facemarks_coords(
                image,
                bboxes.clip(min=0),
                facemark_predictor_init=facemark_predictor
            )
            facemark_inference_measurements.append(time.time() - start_facemarks_time)

            start_predict_time = time.time()
            cropped_facemarks_coords = crop_facemarks_coords(
                facemarks_coords,
                bboxes,
                bbox_number=0
            )
            resized_cropped_facemarks_coords = resize_facemarks_coords(
                cropped_facemarks_coords,
                original_crop_shape=(bboxes[0][3], bboxes[0][2]),
                target_crop_shape=crop_shape
            )
            face_features = resized_cropped_facemarks_coords[target_facemarks, :].ravel()
            face_features = scaler.transform(face_features.reshape(1, -1))
            predictions = model.predict(face_features)
            prediction_inference_measurements.append(time.time() - start_predict_time)
            predictions = [float(prediction) for prediction in predictions]

            if predictions[0] >= 0.985:
                smile_faces.append(image_name)

            if predictions[1] >= 0.92:
                open_mouth_faces.append(image_name)

    print('\nAverage facemark searching inference time: {0} sec.'.format(
        np.round(np.mean(facemark_inference_measurements), 3))
    )
    print('\nAverage prediction inference time: {0} sec.'.format(
        np.round(np.mean(prediction_inference_measurements), 3))
    )

    print('\nIMAGES WITH SMILE')
    print('-----------------')
    for image in smile_faces:
        print('  {0}'.format(image))

    print('\nIMAGES WITH OPEN MOUTH')
    print('----------------------')
    for image in open_mouth_faces:
        print('  {0}'.format(image))