def predict(args): """ Solution 1 prediction script for directory with images for smile and open mouth detection :param args: argparse arguments :return: prints inference measured time and two lists of images that have passed the filter """ mtcnn = MTCNN() image_names = os.listdir(args.images_directory) smile_faces = list() open_mouth_faces = list() inference_measurements = list() with CustomObjectScope({'f1_score': f1_score}): model = load_model( '../nn_models/best_mobilenetv2_multiclassification.h5') for image_name in tqdm(image_names): image = JPEG(os.path.join(args.images_directory, image_name)).decode() start_time = time.time() bboxes = safe_detect_face_bboxes(image, mtcnn) if bboxes.shape[0] == 0: continue else: cropped_image = crop_image(image, bboxes.clip(min=0), bbox_number=0) cropped_image = imresize(cropped_image, (args.height, args.width)) / 255. predictions = model.predict(np.expand_dims(cropped_image, axis=0)) inference_measurements.append(time.time() - start_time) predictions = [float(prediction) for prediction in predictions] if predictions[0] >= args.smile_prediction_threshold: smile_faces.append(image_name) if predictions[1] >= args.mouth_open_prediction_threshold: open_mouth_faces.append(image_name) print('\nAverage end to end inference time: {0} sec.'.format( np.round(np.mean(inference_measurements), 3))) print('\nIMAGES WITH SMILE') print('-----------------') for image in smile_faces: print(' {0}'.format(image)) print('\nIMAGES WITH OPEN MOUTH') print('----------------------') for image in open_mouth_faces: print(' {0}'.format(image))
def __getitem__(self, index): """ Batch creation by index. Find face landmarks mouth, eye and eyebrows and turns them into a feature vector :param index: batch index :return: batch with face features and labels """ batch_pathways = self.pathways[index * self.batch_size:(index + 1) * self.batch_size] batch_x = list() batch_y_smile = list() batch_y_open_mouth = list() for i, pathway in enumerate(batch_pathways): image = JPEG(pathway).decode() bboxes = safe_detect_face_bboxes(image, self.mtcnn) if bboxes.shape[0] == 0: continue else: facemarks_coords = detect_facemarks_coords( image, bboxes.clip(min=0), facemark_predictor_init=self.facemark_predictor) cropped_facemarks_coords = crop_facemarks_coords( facemarks_coords, bboxes, bbox_number=0) resized_cropped_facemarks_coords = resize_facemarks_coords( cropped_facemarks_coords, original_crop_shape=(bboxes[0][3], bboxes[0][2]), target_crop_shape=self.crop_shape) face_features = resized_cropped_facemarks_coords[ self.target_facemarks, :].ravel() batch_x.append(face_features) batch_y_smile.append(self.pathways_with_smile_labels[pathway]) batch_y_open_mouth.append( self.pathways_with_open_mouth_labels[pathway]) batch_x = np.asarray(batch_x) batch_x = StandardScaler().fit_transform(batch_x) batch_y_smile = np.asarray(batch_y_smile) batch_y_open_mouth = np.asarray(batch_y_open_mouth) return batch_x, { 'smile_output': batch_y_smile, 'open_mouth_output': batch_y_open_mouth }
def predict(self, image_pathways): """ Predict labels on test images by finding threshold :param image_pathways: list of absolute image pathways :return: list with predicted class """ if self.smile_deviations_sum_threshold: self.facemark_inference_measurements = list() self.predict_inference_measurements = list() predictions = list() for image_pathway in tqdm(image_pathways): image = JPEG(image_pathway).decode() start_facemarks_time = time.time() bboxes = safe_detect_face_bboxes(image, self.mtcnn, include_cnn=True) if bboxes.shape[0] == 0: predictions.append(0) else: facemarks_coords = detect_facemarks_coords( image, bboxes.clip(min=0), self.facemark_predictor) self.facemark_inference_measurements.append( time.time() - start_facemarks_time) start_predict_time = time.time() lower_lip_points = self._calculate_line_points( facemarks_coords[0], self.lower_lip_point_pairs) deviations_sum = self._get_deviations_sum(lower_lip_points) self.predict_inference_measurements.append( time.time() - start_predict_time) predictions.append(0)\ if deviations_sum < self.smile_deviations_sum_threshold\ else predictions.append(1) return np.asarray(predictions) else: raise ValueError( 'Train or set the smile_deviations_sum_threshold value')
def fit(self, image_pathways, smile_labels): """ Optimal threshold searching for images in train set using only lower lip landmarks :param image_pathways: list of absolute image pathways :param mouth_open_labels: labels :return: """ if not self.smile_deviations_sum_threshold: smile_deviations_sum = list() not_smile_deviations_sum = list() for image_pathway, smile_label in tqdm( zip(image_pathways, smile_labels)): image = JPEG(image_pathway).decode() bboxes = safe_detect_face_bboxes(image, self.mtcnn, include_cnn=True) if bboxes.shape[0] == 0: continue else: facemarks_coords = detect_facemarks_coords( image, bboxes.clip(min=0), facemark_predictor_init=self.facemark_predictor) lower_lip_points = self._calculate_line_points( facemarks_coords[0], self.lower_lip_point_pairs) deviations_sum = self._get_deviations_sum(lower_lip_points) smile_deviations_sum.append(deviations_sum)\ if smile_label == 1\ else not_smile_deviations_sum.append(deviations_sum) self.smile_deviations_sum_threshold = _get_optimal_threshold( smile_deviations_sum, not_smile_deviations_sum, hist_bins=15, clip=0.06, round_to=3)
def __getitem__(self, index): """ Batch creation by index. Сuts part with the face for each image and apply augmentations :param index: batch index :return: batch with cropped images and labels """ batch_pathways = self.pathways[index * self.batch_size:(index + 1) * self.batch_size] batch_x = list() batch_y_smile = list() batch_y_open_mouth = list() for i, pathway in enumerate(batch_pathways): image = JPEG(pathway).decode() bboxes = safe_detect_face_bboxes(image, self.mtcnn) if bboxes.shape[0] == 0: continue else: cropped_image = crop_image(image, bboxes.clip(min=0), bbox_number=0) batch_x.append(imresize(cropped_image, self.shape)) batch_y_smile.append(self.pathways_with_smile_labels[pathway]) batch_y_open_mouth.append( self.pathways_with_open_mouth_labels[pathway]) batch_x = np.asarray(batch_x, dtype=np.uint8) batch_y_smile = np.asarray(batch_y_smile) batch_y_open_mouth = np.asarray(batch_y_open_mouth) if self.augmentations_pipline: batch_x = self.augmentations_pipline.augment_images(batch_x) return batch_x / 255., { 'smile_output': batch_y_smile, 'open_mouth_output': batch_y_open_mouth }
def fit(self, image_pathways, mouth_open_labels): """ Optimal threshold searching for images in train set using mouth landmarks :param image_pathways: list of absolute image pathways :param mouth_open_labels: labels :return: """ if not self.mouth_aspect_ratio_threshold: open_mouth_mars = list() close_mouth_mars = list() for image_pathway, mouth_open_label in tqdm( zip(image_pathways, mouth_open_labels)): image = JPEG(image_pathway).decode() bboxes = safe_detect_face_bboxes(image, self.mtcnn, include_cnn=True) if bboxes.shape[0] == 0: continue else: facemarks_coords = detect_facemarks_coords( image, bboxes.clip(min=0), facemark_predictor_init=self.facemark_predictor) mouth_aspect_ratio = self._get_mouth_aspect_ratio( facemarks_coords[0]) open_mouth_mars.append(mouth_aspect_ratio)\ if mouth_open_label == 1\ else close_mouth_mars.append(mouth_aspect_ratio) self.mouth_aspect_ratio_threshold = _get_optimal_threshold( open_mouth_mars, close_mouth_mars, hist_bins=15, clip=0.3, round_to=2)
def predict(args): """ Solution 3 prediction script for directory with images for smile and open mouth detection :param args: argparse arguments :return: prints inference measured time and two lists of images that have passed the filter """ smile_faces = list() open_mouth_faces = list() facemark_inference_measurements = list() prediction_inference_measurements = list() image_names = os.listdir(args.images_directory) crop_shape = (args.height, args.width) target_facemarks = list(range(17, 27)) + list(range(36, 68)) mtcnn = MTCNN() scaler = joblib.load('../models/solution_3_scaler.save') facemark_predictor = dlib.shape_predictor('../models/shape_predictor_68_face_landmarks.dat') with CustomObjectScope({'f1_score': f1_score}): model = load_model('../nn_models/best_mlp_multiclassification.h5') for image_name in tqdm(image_names): image = JPEG(os.path.join(args.images_directory, image_name)).decode() start_facemarks_time = time.time() bboxes = safe_detect_face_bboxes(image, mtcnn) if bboxes.shape[0] == 0: continue else: facemarks_coords = detect_facemarks_coords( image, bboxes.clip(min=0), facemark_predictor_init=facemark_predictor ) facemark_inference_measurements.append(time.time() - start_facemarks_time) start_predict_time = time.time() cropped_facemarks_coords = crop_facemarks_coords( facemarks_coords, bboxes, bbox_number=0 ) resized_cropped_facemarks_coords = resize_facemarks_coords( cropped_facemarks_coords, original_crop_shape=(bboxes[0][3], bboxes[0][2]), target_crop_shape=crop_shape ) face_features = resized_cropped_facemarks_coords[target_facemarks, :].ravel() face_features = scaler.transform(face_features.reshape(1, -1)) predictions = model.predict(face_features) prediction_inference_measurements.append(time.time() - start_predict_time) predictions = [float(prediction) for prediction in predictions] if predictions[0] >= 0.985: smile_faces.append(image_name) if predictions[1] >= 0.92: open_mouth_faces.append(image_name) print('\nAverage facemark searching inference time: {0} sec.'.format( np.round(np.mean(facemark_inference_measurements), 3)) ) print('\nAverage prediction inference time: {0} sec.'.format( np.round(np.mean(prediction_inference_measurements), 3)) ) print('\nIMAGES WITH SMILE') print('-----------------') for image in smile_faces: print(' {0}'.format(image)) print('\nIMAGES WITH OPEN MOUTH') print('----------------------') for image in open_mouth_faces: print(' {0}'.format(image))