def __init__(self): if self.recognition_object is None and self.detection_object is None: self.recognition_object = FaceRecognition() self.detection_object = FaceDetection() self.recognition_object.initialize() self.create_main_window(self.initialize_window_size(400, 400)) self.set_main_window_buttons()
def run_inference(args): feed = InputFeeder(input_type='video', input_file=args.input) feed.load_data() for batch in feed.next_batch(): cv2.imshow("Output", cv2.resize(batch, (500, 500))) key = cv2.waitKey(60) if (key == 27): break # getting face faceDetection = FaceDetection(model_name=args.face_detection_model) faceDetection.load_model() face = faceDetection.predict(batch) # getting eyes facialLandmarksDetection = FacialLandmarksDetection( args.facial_landmarks_detection_model) facialLandmarksDetection.load_model() left_eye, right_eye = facialLandmarksDetection.predict(face) # getting head pose angles headPoseEstimation = HeadPoseEstimation( args.head_pose_estimation_model) headPoseEstimation.load_model() head_pose = headPoseEstimation.predict(face) print("head pose angles: ", head_pose) # get mouse points gazeEstimation = GazeEstimation(args.gaze_estimation_model) gazeEstimation.load_model() mouse_coords = gazeEstimation.predict(left_eye, right_eye, head_pose) print("gaze output: ", mouse_coords) feed.close()
def compare(root, f1, f2): global face_det global face_recon global face_align if not face_det: face_det = FaceDetection(gpu_id) if not face_recon: face_recon = FaceRecogniton(gpu_id) if not face_align: face_align = FaceAlignment(gpu_id) time_start = time.time() img_a = cv2.imread(root + '/' + f1) img_b = cv2.imread(root + '/' + f2) bbox_list1, a_point = face_det.get_max_bounding_box_by_image(img_a) bbox_list2, b_point = face_det.get_max_bounding_box_by_image(img_b) similarity = 0 if bbox_list1 and bbox_list2: a_aligned_faces = face_align.affine_face(img_a, a_point) b_aligned_faces = face_align.affine_face(img_b, b_point) similarity = face_recon.face_compare(a_aligned_faces, b_aligned_faces) #print similarity time_end = time.time() time_use = int(1000 * (time_end - time_start)) #print 'time_used:' + str(time_use) return similarity, time_use
def __init__(self, args): ''' This method instances variables for the Facial Landmarks Detection Model. Args: args = All arguments parsed by the arguments parser function Return: None ''' init_start_time = time.time() self.output_path = args.output_path self.show_output = args.show_output self.total_processing_time = 0 self.count_batch = 0 self.inference_speed = [] self.avg_inference_speed = 0 if args.all_devices != 'CPU': args.face_device = args.all_devices args.face_landmark_device = args.all_devices args.head_pose_device = args.all_devices args.gaze_device = args.all_devices model_init_start = time.time() self.face_model = FaceDetection(args.face_model, args.face_device, args.face_device_ext, args.face_prob_threshold) self.landmarks_model = FacialLandmarksDetection( args.face_landmark_model, args.face_landmark_device, args.face_landmark_device_ext, args.face_landmark_prob_threshold) self.head_pose_model = HeadPoseEstimation( args.head_pose_model, args.head_pose_device, args.head_pose_device_ext, args.head_pose_prob_threshold) self.gaze_model = GazeEstimation(args.gaze_model, args.gaze_device, args.gaze_device_ext, args.gaze_prob_threshold) self.model_init_time = time.time() - model_init_start log.info('[ Main ] All required models initiallized') self.mouse_control = MouseController(args.precision, args.speed) log.info('[ Main ] Mouse controller successfully initialized') self.input_feeder = InputFeeder(args.batch_size, args.input_type, args.input_file) log.info('[ Main ] Initialized input feeder') model_load_start = time.time() self.face_model.load_model() self.landmarks_model.load_model() self.head_pose_model.load_model() self.gaze_model.load_model() self.model_load_time = time.time() - model_load_start self.app_init_time = time.time() - init_start_time log.info('[ Main ] All moadels loaded to Inference Engine\n') return None
def preprocess_output_face_detection(self, outputs, width, height, threshold, frame): """ Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. """ face_detection = FaceDetection() coords = [] coords, frame = face_detection.preprocess_output( outputs, width, height, threshold, frame, self.output_name) return coords, frame
def __init__(self, mode=None): self.servo_pos = (90, 90) self.allow_save = True self.json = Json('settings.json') self.settings = self.json.get_json() if mode == 'debug': self.serial_transmitter = SerialTransmitter(arduino_connect=False) else: self.serial_transmitter = SerialTransmitter(arduino_connect=True, move_threshold=0.05) self.ui = UICallibrate(self.settings, (640, 480), move_factor=0.5) self.face_detection = FaceDetection() self.calibrate()
def main(): """ Just put in the inputDirectory absolute path and the output directory absolute path """ inputDirectory = '/home/mr-paul/atmp/aaproject/scripts/surprised_raw' outputDirectory = '/home/mr-paul/atmp/aaproject/scripts/surprised_faces' # detects all faces from all images in inputDirectory and outputs # to outputDirectory FaceDetection.extractFaces(inputDirectory=inputDirectory, outputDirectory=outputDirectory)
def __init__(self): self._isOn = False self._isRun = False self._on = False self._run = False self._face = False self._forward = 0.0 self._rotation = 0.0 self._srvCmd = rospy.Service('pimouse_cmd', PiMouseCmd, self.CommandCallback) self._srvClientOn = rospy.ServiceProxy('motor_on', Trigger) self._srvClientOff = rospy.ServiceProxy('motor_off', Trigger) rospy.on_shutdown(self._srvClientOff.call) self._wallAround = WallAround() self._faceToFace = FaceToFace() self._faceDetection = FaceDetection()
class RunFaceID(object): def __init__(self): super(RunFaceID, self).__init__() self.face_detection = FaceDetection() self.face_recognition = FaceRecognition() def predict(self, array_embeddings, embeddings_source): return "unknown" def processing(self, images, embeddings_source): frame = copy.deepcopy(images) faces = self.face_detection.detect_faces(frame) if faces is None or len(faces) < 1: return None data = {} array_img = [] labels = [] for x, y, w, h in faces: if w > 0 and h > 0: img_crop = frame[y:y + h, x:x + w, :] array_img.append(img_crop) # labels.append("unknown") array_img = np.array(array_img) # data["labels"] = labels if count >= NUMBER_FRAME: array_embeddings = self.face_recognition.embedding_image(array_img) data["labels"] = self.predict_labels(array_embeddings, embeddings_source) data["bounding_boxs"] = faces return data
def __init__(self): self.frame_in = np.zeros((10, 10, 3), np.uint8) self.frame_ROI = np.zeros((10, 10, 3), np.uint8) self.frame_out = np.zeros((10, 10, 3), np.uint8) self.samples = [] self.buffer_size = 100 self.times = [] self.data_buffer = [] self.fps = 0 self.fft = [] self.freqs = [] self.t0 = time.time() self.bpm = 0 self.fd = FaceDetection() self.bpms = [] self.peaks = []
def main(): video_stream = cv.VideoCapture(0) detector = FaceDetection() while True: ret, frame = video_stream.read() rects, labels = detector.process_frame(frame) for (x, y, w, h), label in zip(rects, labels): cv.rectangle(frame, (x, y), (x + w, y + h), colourDict[label], 2) cv.putText(frame, label, (x, y), cv.FONT_HERSHEY_SIMPLEX, 1.0, (255, 255, 255)) cv.imshow("EyeTracker", frame) if cv.waitKey(1) & 0xFF == ord('q'): break video_stream.release() cv.destroyAllWindows()
def __init__(self, num_of_cameras=1, record_video=False): self.is_capturing_video = True self.video_cameras = VideoCameras(num_of_cameras) self.record_video = record_video self.face_detection = FaceDetection() self.face_frame_morpher = FaceFrameMorpher() self.video_recorder = VideoRecorder()
def models_handler(logger, args): ## put all path of model from args in to dict Dict_model_path = { 'Face': args.face_detection_path, 'Landmarks': args.facial_landmarks_path, 'Headpose': args.head_pose_path, 'Gaze': args.gaze_estimation_path } ## check if model exists in given path for model_key in Dict_model_path.keys(): # print(Dict_model_path[model_key]) if not os.path.isfile(Dict_model_path[model_key]): print("\n## " + model_key + " Model path not exists: " + Dict_model_path[model_key] + ' Please try again !!!') logger.error("## " + model_key + " Model path not exists: " + Dict_model_path[model_key] + ' Please try again !!!') exit(1) else: print('## '+model_key + " Model path is correct: " + Dict_model_path[model_key] + '\n') logger.info('## '+model_key + " Model path is correct: " + Dict_model_path[model_key]) ## initialize face detection mode model_fd = FaceDetection(Dict_model_path['Face'], args.device, args.cpu_extension) ## initialize facial landmarks detection model model_fld = FacialLandmarkDetection(Dict_model_path['Landmarks'], args.device, args.cpu_extension) ## initialize head pose estimation model model_hpe = HeadPoseEstimation(Dict_model_path['Headpose'], args.device, args.cpu_extension) ## initialize gaze estimation model model_ge = GazeEstimation(Dict_model_path['Gaze'], args.device, args.cpu_extension) return model_fd, model_fld, model_hpe, model_ge
def load_models(path): global gender, expression, multiple, face_detection, landmarks2d, landmarks3d gender = Gender(os.path.join(path, "gender.zip")) expression = Expression(os.path.join(path, "expression.zip")) multiple = Multiple(os.path.join(path, "multiple")) face_detection = FaceDetection(os.path.join(path, "face_detection")) landmarks2d = LandMarks2D(path) landmarks3d = LandMarks3D(path)
def main(): global GUI_HAS_FACE ap = argparse.ArgumentParser() ap.add_argument("-p", "--shape-predictor", required=True, #ili p ili shape predictor se koriste u commandlineu, true jer je obavezno help="path to facial landmark predictor") #kad upisemo help u cl nam to ispise args = ap.parse_args() p = Printer(value = False) p.start() gui = Gui(value = False) #konstruktor za gui + salje se pocetna vrijednost flag-a za lice #gui.start() face = FaceDetection(args.shape_predictor, face_callbacks=[p.face_update, gui.check_face]) #konstruktor za facedetection + salje flagove za lice u navedene funkcije (face_update, check_face) face.start() gui.mainloop() #uvijek mora bit na kraju
def __init__(self): self.serial_transmitter = SerialTransmitter() self.face_detection = FaceDetection() self.json = Json() self.settings = self.json.get_json() self.camera_fov = camera_fov self.resolution = self.face_detection.resolution self.pos = ()
def load_modules(self): self._speech = Speech(self.session) self._motion = Motion(self.session) self._tablet = Tablet(self.session) self._face_detection = FaceDetection(self.session, self) self._wave_detection = WavingDetection(self.session) self._audio_player = AudioPlayer(self.session) self._speech_recognition = SpeechRecognition(self.session) self._system = System(self.session)
def load_models(): print("loading face detection model") face_detection_model = FaceDetection() # load face validation model print("loading face validation model") face_validation_model = FaceValidation() # SyncNet print("loading speaker validation model") speaker_validation = SpeakerValidation() return face_detection_model, face_validation_model, speaker_validation
def face_detection(environ, start_response): from face_detection import FaceDetection face = FaceDetection() status = '200 OK' headers = [('Content-type', 'text/plain; charset=utf-8')] start_response(status, headers) params = environ['params'] image = params.get('image') try: faces = face.face_detection(image) if isinstance(faces, numpy.ndarray): data = faces.tolist() return [json.dumps(dict(status=0, data=data)).encode('utf-8')] else: return ['{"status":0,"data":[]}'.encode('utf-8')] except Exception as error: res = '{"status":-1,"data":"%s"}' % (str(error)) return [res.encode('utf-8')]
def copy_all2(self,file_infos, dst_dir): if self.face_detection: from face_detection import FaceDetection fd = FaceDetection() classses = [] counter=0 mean_image = np.zeros(self.input_shape,np.float32) for path, class_feature in file_infos: img = cv2.imread(path, cv2.IMREAD_COLOR) if self.face_detection: img = cv2.resize(img, (self.img_resize,self.img_resize), cv2.INTER_AREA) img = fd.crop_face(img) if img is not None: img = cv2.resize(img, (self.input_width,self.input_height), cv2.INTER_AREA) if dst_dir is self.db_age_train_folder_path or dst_dir is self.db_sex_train_folder_path: mean_image += img counter +=1 cv2.imwrite( os.path.join(os.path.join( dst_dir, class_feature ), os.path.basename(path)), img) classses.append( class_feature ) else: img = cv2.resize(img, (self.input_width,self.input_height), cv2.INTER_AREA) if dst_dir is self.db_age_train_folder_path or dst_dir is self.db_sex_train_folder_path: mean_image += img counter+=1 cv2.imwrite( os.path.join(os.path.join( dst_dir, class_feature ), os.path.basename(path)), img) classses.append( class_feature ) if dst_dir is self.db_age_train_folder_path: mean_image /= counter mean_image = np.asarray(mean_image,np.uint8) cv2.imwrite( self.age_mean_image_path ,mean_image) elif dst_dir is self.db_sex_train_folder_path: mean_image /= counter mean_image = np.asarray(mean_image,np.uint8) cv2.imwrite( self.sex_mean_image_path ,mean_image ) return classses
def prepare_processing_engines(): """ Loads all machine learning models for processing an image. Returns: Image processor, which detects faces on image and classify their life stage. """ res10_face_model = Res10FaceDetection( 'models/caffe/res10_300x300_ssd_iter_140000.caffemodel', 'models/caffe/deploy.prototxt') face_detection_backend = FaceDetection(res10_face_model) life_stage_backend = LifeStagePrediction('models/life_stage_model.h5') image_processor = FaceDetectionAndLifeStageClassification( face_detection_backend, life_stage_backend) return image_processor
class FaceFrameMorpher: def __init__(self): self.animated_scale = 0.0 self.animated_direction = 1 self.face_detection = FaceDetection() self.MAX_ANIMATION_SCALE = 50.0 def morph_frame_faces(self, frames): for frame in frames: faces = self.face_detection.detect_faces(frame) if len(faces) == 0: self.reset_animation() for (x, y, w, h) in faces: self.morph_pixels_in_area_animated(frame, x, y, w, h) return frames def animated_scale_tick(self): self.animated_scale += self.animated_direction if self.animated_scale == -self.MAX_ANIMATION_SCALE or self.animated_scale == self.MAX_ANIMATION_SCALE: self.animated_direction = -1 * self.animated_direction def reset_animation(self): self.animated_scale = 0.0 def morph_pixels_in_area_animated(self, img, x_pos, y_pos, width, height): section = img[y_pos:y_pos + height, x_pos:x_pos + width] self.animated_scale_tick() section = FaceFrameMorpher.vertical_wave(section, self.animated_scale) img[y_pos:y_pos + height, x_pos:x_pos + len(section)] = section @staticmethod def vertical_wave(img, wave_factor=20.0): img_output = np.zeros(img.shape, dtype=img.dtype) rows, cols = img.shape for i in range(rows): for j in range(cols): offset_x = int(wave_factor * math.sin(2 * 3.14 * i / 180)) offset_y = int(wave_factor * math.sin(2 * 3.14 * j / 180)) # print(offset_x, offset_y, i, rows) if j + offset_x < rows: img_output[i, j] = img[abs((i + offset_y) % cols), (j + offset_x) % cols] else: img_output[i, j] = img[i, j] return img_output
def load_models(p): """ Load the OpenVINO models in a dictionary to handle them more easily Input: `p`, a dictionary with the models' paths """ # Get the device ('CPU' will be selected if None) models = {} models['fd'] = FaceDetection() models['lm'] = Landmarks() models['hp'] = HeadPose() models['ge'] = GazeEstimator() # Load all the files with the relative device for label in ['fd','lm','hp','ge']: start = time.time() models[label].load_model(p[f'mod_{label}'], device=p[f'device_{label}']) print(f'Model: {MODELS[label]} --- Loading time: {1000*(time.time()-start):.1f} ms') return models
class RunFaceID(object): def __init__(self): super(RunFaceID, self).__init__() self.face_detection = FaceDetection() self.face_recognition = FaceRecognition() self.arr_embeddings = pickle.load(open("data_embeddings", "rb")) self.labels = pickle.load(open("labels", "rb")) def predict_labels(self, embeddings): dis_cs = cs(embeddings, self.arr_embeddings) index_list = np.argmax(dis_cs, axis=-1) label_pred = [] for i, index in enumerate(index_list): if dis_cs[i][index] > 0.6: label_pred.append(self.labels[index]) else: label_pred.append("unknown") return label_pred def processing(self, images, count): frame = copy.deepcopy(images) faces = self.face_detection.detect_faces(frame) if faces is None or len(faces) < 1: return None data = {} array_img = [] labels = [] for x, y, w, h in faces: if w > 0 and h > 0: img_crop = img_crop = frame[y:y + h, x:x + w, :] array_img.append(img_crop) labels.append("unknown") array_img = np.array(array_img) # data["labels"] = labels if count >= 5: array_embeddings = self.face_recognition.embedding_image(array_img) data["labels"] = self.predict_labels(array_embeddings) data["bounding_boxs"] = faces return data
class LivenessDetection(): def __init__(self, face_detection_model_path, liveness_model_path, threshold, image_size): self.model = None self.liveness_model_path = liveness_model_path self.face_detection_model_path = face_detection_model_path self.threshold = threshold self.image_size = image_size def initial(self): self.model = load_model(self.liveness_model_path) self.model._make_predict_function() self.face_detect = FaceDetection(self.face_detection_model_path) def process(self, image, mode): data = [] x, y, w, h = self.face_detect.process( cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) if w > 0 and h > 0: face_image = image[y:y + h, x:x + w] face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2GRAY) face_image = cv2.resize(face_image, (self.image_size, self.image_size)) else: face_image = np.zeros((self.image_size, self.image_size), dtype=int) image = cv2.resize(image, (self.image_size, self.image_size)) image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) image[:, :, 0] = face_image data.append(image) data = np.array(data, dtype="float") / 255.0 preds = self.model.predict(data) score = preds[0][0] predict = True if score > self.threshold else False return score, predict
class LivenessDetection(): def __init__(self, face_detection_model_path, liveness_model_path, threshold, image_size): self.model = None self.liveness_model_path = liveness_model_path self.face_detection_model_path = face_detection_model_path self.threshold = threshold self.image_size = image_size def initial(self): self.model = keras.models.load_model(self.liveness_model_path) self.model._make_predict_function() self.face_detect = FaceDetection(self.face_detection_model_path) def process(self, image, mode): data = [] image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # image = imutils.resize(image, width=800) x, y, w, h = self.face_detect.process(image) if w > 0 and h > 0: # img_face = image[y:y+h, x:x+w] new_pad = np.min( [x, image.shape[1] - (x + w), y, image.shape[0] - (y + h)]) image = image[y - new_pad:y + h + new_pad, x - new_pad:x + w + new_pad, :] image = cv2.resize(image, (self.image_size, self.image_size)) image = image.astype(np.float32) / 127.5 - 1 score = self.model.predict_on_batch(np.expand_dims(image, axis=0))[0] predict = True if score > self.threshold else False else: score = -999 predict = False return score, predict
def main(): args = build_argparser().parse_args() frame_num = 0 inference_time = 0 counter = 0 # Initialize the Inference Engine fd = FaceDetection() fld = Facial_Landmarks_Detection() ge = Gaze_Estimation() hp = Head_Pose_Estimation() # Load Models fd.load_model(args.face_detection_model, args.device, args.cpu_extension) fld.load_model(args.facial_landmark_model, args.device, args.cpu_extension) ge.load_model(args.gaze_estimation_model, args.device, args.cpu_extension) hp.load_model(args.head_pose_model, args.device, args.cpu_extension) # Mouse Controller precision and speed mc = MouseController('medium', 'fast') # feed input from an image, webcam, or video to model if args.input == "cam": feed = InputFeeder("cam") else: assert os.path.isfile(args.input), "Specified input file doesn't exist" feed = InputFeeder("video", args.input) feed.load_data() frame_count = 0 for frame in feed.next_batch(): frame_count += 1 inf_start = time.time() if frame is not None: try: key = cv2.waitKey(60) det_time = time.time() - inf_start # make predictions detected_face, face_coords = fd.predict( frame.copy(), args.prob_threshold) hp_output = hp.predict(detected_face.copy()) left_eye, right_eye, eye_coords = fld.predict( detected_face.copy()) new_mouse_coord, gaze_vector = ge.predict( left_eye, right_eye, hp_output) stop_inference = time.time() inference_time = inference_time + stop_inference - inf_start counter = counter + 1 # Visualization preview = args.visualization if preview: preview_frame = frame.copy() face_frame = detected_face.copy() draw_face_bbox(preview_frame, face_coords) display_hp(preview_frame, hp_output, face_coords) draw_landmarks(face_frame, eye_coords) draw_gaze(face_frame, gaze_vector, left_eye.copy(), right_eye.copy(), eye_coords) if preview: img = np.hstack((cv2.resize(preview_frame, (500, 500)), cv2.resize(face_frame, (500, 500)))) else: img = cv2.resize(frame, (500, 500)) cv2.imshow('Visualization', img) # set speed if frame_count % 5 == 0: mc.move(new_mouse_coord[0], new_mouse_coord[1]) # INFO log.info("NUMBER OF FRAMES: {} ".format(frame_num)) log.info("INFERENCE TIME: {}ms".format(det_time * 1000)) frame_num += 1 if key == 27: break except: print( 'Not supported image or video file format. Please send in a supported video format.' ) exit() feed.close()
class Process(object): def __init__(self): self.frame_in = np.zeros((10, 10, 3), np.uint8) self.frame_ROI = np.zeros((10, 10, 3), np.uint8) self.frame_out = np.zeros((10, 10, 3), np.uint8) self.samples = [] self.buffer_size = 100 self.times = [] self.data_buffer = [] self.fps = 0 self.fft = [] self.freqs = [] self.t0 = time.time() self.bpm = 0 self.fd = FaceDetection() self.bpms = [] self.peaks = [] # self.red = np.zeros((256,256,3),np.uint8) def extractColor(self, frame): g = np.mean(frame[:, :, 1]) return g def run(self): frame, face_frame, ROI1, ROI2, status, mask = self.fd.face_detect( self.frame_in) self.frame_out = frame self.frame_ROI = face_frame g1 = self.extractColor(ROI1) g2 = self.extractColor(ROI2) L = len(self.data_buffer) g = (g1 + g2) / 2 if ( abs(g - np.mean(self.data_buffer)) > 10 and L > 99 ): # remove sudden change, if the avg value change is over 10, use the mean of the data_buffer g = self.data_buffer[-1] self.times.append(time.time() - self.t0) self.data_buffer.append(g) if L > self.buffer_size: self.data_buffer = self.data_buffer[-self.buffer_size:] self.times = self.times[-self.buffer_size:] self.bpms = self.bpms[-self.buffer_size // 2:] L = self.buffer_size processed = np.array(self.data_buffer) if L == self.buffer_size: self.fps = float(L) / ( self.times[-1] - self.times[0] ) # calculate HR using a true fps of processor of the computer, not the fps the camera provide even_times = np.linspace(self.times[0], self.times[-1], L) processed = signal.detrend( processed ) # detrend the signal to avoid interference of light change interpolated = np.interp(even_times, self.times, processed) # interpolation by 1 interpolated = np.hamming( L ) * interpolated # make the signal become more periodic (advoid spectral leakage) # norm = (interpolated - np.mean(interpolated))/np.std(interpolated)#normalization norm = interpolated / np.linalg.norm(interpolated) raw = np.fft.rfft( norm * 30) # do real fft with the normalization multiplied by 10 self.freqs = float(self.fps) / L * np.arange(L / 2 + 1) freqs = 60. * self.freqs self.fft = np.abs(raw)**2 # get amplitude spectrum idx = np.where( (freqs > 50) & (freqs < 180) ) # the range of frequency that HR is supposed to be within pruned = self.fft[idx] pfreq = freqs[idx] self.freqs = pfreq self.fft = pruned idx2 = np.argmax(pruned) # max in the range can be HR self.bpm = self.freqs[idx2] self.bpms.append(self.bpm) processed = self.butter_bandpass_filter(processed, 0.8, 3, self.fps, order=3) self.samples = processed # multiply the signal with 5 for easier to see in the plot if (mask.shape[0] != 10): out = np.zeros_like(face_frame) mask = mask.astype(np.bool) out[mask] = face_frame[mask] if (processed[-1] > np.mean(processed)): out[mask, 2] = 180 + processed[-1] * 10 face_frame[mask] = out[mask] def reset(self): self.frame_in = np.zeros((10, 10, 3), np.uint8) self.frame_ROI = np.zeros((10, 10, 3), np.uint8) self.frame_out = np.zeros((10, 10, 3), np.uint8) self.samples = [] self.times = [] self.data_buffer = [] self.fps = 0 self.fft = [] self.freqs = [] self.t0 = time.time() self.bpm = 0 self.bpms = [] def butter_bandpass(self, lowcut, highcut, fs, order=5): nyq = 0.5 * fs low = lowcut / nyq high = highcut / nyq b, a = signal.butter(order, [low, high], btype='band') return b, a def butter_bandpass_filter(self, data, lowcut, highcut, fs, order=5): b, a = self.butter_bandpass(lowcut, highcut, fs, order=order) y = signal.lfilter(b, a, data) return y
def main(): # Grab command line args args = build_argparser().parse_args() flags = args.models_outputs_flags logger = logging.getLogger() input_file_path = args.input input_feeder = None if input_file_path.lower() == "cam": input_feeder = InputFeeder("cam") else: if not os.path.isfile(input_file_path): logger.error("Unable to find specified video file") exit(1) input_feeder = InputFeeder("video", input_file_path) model_path_dict = { 'FaceDetection': args.face_detection_model, 'FacialLandmarks': args.facial_landmarks_model, 'GazeEstimation': args.gaze_estimation_model, 'HeadPoseEstimation': args.head_pose_estimation_model } for file_name_key in model_path_dict.keys(): if not os.path.isfile(model_path_dict[file_name_key]): logger.error("Unable to find specified " + file_name_key + " xml file") exit(1) fdm = FaceDetection(model_path_dict['FaceDetection'], args.device, args.cpu_extension) flm = FacialLandmarks(model_path_dict['FacialLandmarks'], args.device, args.cpu_extension) gem = GazeEstimation(model_path_dict['GazeEstimation'], args.device, args.cpu_extension) hpem = HeadPoseEstimation(model_path_dict['HeadPoseEstimation'], args.device, args.cpu_extension) mc = MouseController('medium', 'fast') input_feeder.load_data() fdm.load_model() flm.load_model() hpem.load_model() gem.load_model() frame_count = 0 for ret, frame in input_feeder.next_batch(): if not ret: break frame_count += 1 if frame_count % 5 == 0: cv2.imshow('video', cv2.resize(frame, (500, 500))) key = cv2.waitKey(60) cropped_face, face_coords = fdm.predict(frame, args.prob_threshold) if type(cropped_face) == int: logger.error("Unable to detect any face.") if key == 27: break continue hp_output = hpem.predict(cropped_face) left_eye_img, right_eye_img, eye_coords = flm.predict(cropped_face) new_mouse_coord, gaze_vector = gem.predict(left_eye_img, right_eye_img, hp_output) if (not len(flags) == 0): preview_frame = frame if 'fd' in flags: preview_frame = cropped_face if 'fld' in flags: cv2.rectangle(cropped_face, (eye_coords[0][0] - 10, eye_coords[0][1] - 10), (eye_coords[0][2] + 10, eye_coords[0][3] + 10), (0, 255, 0), 3) cv2.rectangle(cropped_face, (eye_coords[1][0] - 10, eye_coords[1][1] - 10), (eye_coords[1][2] + 10, eye_coords[1][3] + 10), (0, 255, 0), 3) if 'hp' in flags: cv2.putText( preview_frame, "Pose Angles: yaw:{:.2f} | pitch:{:.2f} | roll:{:.2f}". format(hp_output[0], hp_output[1], hp_output[2]), (10, 20), cv2.FONT_HERSHEY_COMPLEX, 0.25, (0, 255, 0), 1) if 'ge' in flags: x, y, w = int(gaze_vector[0] * 12), int(gaze_vector[1] * 12), 160 left_eye = cv2.line(left_eye_img, (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(left_eye, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) right_eye = cv2.line(right_eye_img, (x - w, y - w), (x + w, y + w), (255, 0, 255), 2) cv2.line(right_eye, (x - w, y + w), (x + w, y - w), (255, 0, 255), 2) cropped_face[eye_coords[0][1]:eye_coords[0][3], eye_coords[0][0]:eye_coords[0][2]] = left_eye cropped_face[eye_coords[1][1]:eye_coords[1][3], eye_coords[1][0]:eye_coords[1][2]] = right_eye cv2.imshow("Visualization", cv2.resize(preview_frame, (500, 500))) if frame_count % 5 == 0: mc.move(new_mouse_coord[0], new_mouse_coord[1]) if key == 27: break logger.error("VideoStream ended...") cv2.destroyAllWindows() input_feeder.close()
class MoveMouse: ''' Main Class for the Mouse Controller app. This is the class where all the models are stitched together to control the mouse pointer ''' def __init__(self, args): ''' This method instances variables for the Facial Landmarks Detection Model. Args: args = All arguments parsed by the arguments parser function Return: None ''' init_start_time = time.time() self.output_path = args.output_path self.show_output = args.show_output self.total_processing_time = 0 self.count_batch = 0 self.inference_speed = [] self.avg_inference_speed = 0 if args.all_devices != 'CPU': args.face_device = args.all_devices args.face_landmark_device = args.all_devices args.head_pose_device = args.all_devices args.gaze_device = args.all_devices model_init_start = time.time() self.face_model = FaceDetection(args.face_model, args.face_device, args.face_device_ext, args.face_prob_threshold) self.landmarks_model = FacialLandmarksDetection( args.face_landmark_model, args.face_landmark_device, args.face_landmark_device_ext, args.face_landmark_prob_threshold) self.head_pose_model = HeadPoseEstimation( args.head_pose_model, args.head_pose_device, args.head_pose_device_ext, args.head_pose_prob_threshold) self.gaze_model = GazeEstimation(args.gaze_model, args.gaze_device, args.gaze_device_ext, args.gaze_prob_threshold) self.model_init_time = time.time() - model_init_start log.info('[ Main ] All required models initiallized') self.mouse_control = MouseController(args.precision, args.speed) log.info('[ Main ] Mouse controller successfully initialized') self.input_feeder = InputFeeder(args.batch_size, args.input_type, args.input_file) log.info('[ Main ] Initialized input feeder') model_load_start = time.time() self.face_model.load_model() self.landmarks_model.load_model() self.head_pose_model.load_model() self.gaze_model.load_model() self.model_load_time = time.time() - model_load_start self.app_init_time = time.time() - init_start_time log.info('[ Main ] All moadels loaded to Inference Engine\n') return None def draw_face_box(self, frame, face_coords): ''' Draws face's bounding box on the input frame Args: frame = Input frame from video or camera feed. It could also be an input image Return: frame = Frame with bounding box of faces drawn on it ''' start_point = (face_coords[0][0], face_coords[0][1]) end_point = (face_coords[0][2], face_coords[0][3]) thickness = 5 color = (255, 86, 0) frame = cv2.rectangle(frame, start_point, end_point, color, thickness) return frame def draw_eyes_boxes(self, frame, left_eye_coords, right_eye_coords): ''' Draws face's bounding box on the input frame Args: frame = Input frame from video or camera feed. It could also be an input image Return: frame = Frame with bounding box of left and right eyes drawn on it ''' left_eye_start_point = (left_eye_coords[0], left_eye_coords[1]) left_eye_end_point = (left_eye_coords[2], left_eye_coords[3]) right_eye_start_point = (right_eye_coords[0], right_eye_coords[1]) right_eye_end_point = (right_eye_coords[2], right_eye_coords[3]) thickness = 5 color = (0, 210, 0) frame = cv2.rectangle(frame, left_eye_start_point, left_eye_end_point, color, thickness) frame = cv2.rectangle(frame, right_eye_start_point, right_eye_end_point, color, thickness) return frame def draw_outputs(self, frame): ''' Draws the inference outputs (bounding boxes of the face and both eyes and the 3D head pose directions) of the four models onto the frames. Args: frame = Input frame from video or camera feed. It could also be an input image Return: frame = Frame with all inference outputs drawn on it ''' frame = self.draw_face_box(frame, self.face_coords) frame = self.draw_eyes_boxes(frame, self.left_eye_coords, self.right_eye_coords) frame_id = f'Batch id = {self.count_batch}' avg_inference_speed = f'Avg. inference speed = {self.avg_inference_speed:.3f}fps' total_processing_time = f'Total infer. time = {self.total_processing_time:.3f}s' cv2.putText(frame, frame_id, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.45, (255, 86, 0), 1) cv2.putText(frame, avg_inference_speed, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.45, (255, 86, 0), 1) cv2.putText(frame, total_processing_time, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.45, (255, 86, 0), 1) return frame def run_inference(self, frame): ''' Performs inference on the input video or image by passing it through all four models to get the desired coordinates for moving the mouse pointer. Args: frame = Input image, frame from video or camera feed Return: None ''' self.input_feeder.load_data() for frame in self.input_feeder.next_batch(): if self.input_feeder.frame_flag == True: log.info('[ Main ] Started processing a new batch') start_inference = time.time() self.face_coords, self.face_crop = self.face_model.predict( frame) if self.face_coords == []: log.info( '[ Main ] No face detected.. Waiting for you to stare at the camera' ) f.write('[ Error ] No face was detected') else: self.head_pose_angles = self.head_pose_model.predict( self.face_crop) self.left_eye_coords, self.left_eye_image, self.right_eye_coords, self.right_eye_image = self.landmarks_model.predict( self.face_crop) self.x, self.y = self.gaze_model.predict( self.left_eye_image, self.right_eye_image, self.head_pose_angles) log.info( f'[ Main ] Relative pointer coordinates: [{self.x:.2f}, {self.y:.2f}]' ) batch_process_time = time.time() - start_inference self.total_processing_time += batch_process_time self.count_batch += 1 log.info( f'[ Main ] Finished processing batch. Time taken = {batch_process_time}s\n' ) self.mouse_control.move(self.x, self.y) if self.show_output: self.draw_outputs(frame) cv2.imshow('Computer Pointer Controller Output', frame) self.inference_speed.append(self.count_batch / self.total_processing_time) self.avg_inference_speed = sum(self.inference_speed) / len( self.inference_speed) with open(os.path.join(self.output_path, 'outputs.txt'), 'w+') as f: f.write('INFERENCE STATS\n') f.write( f'Total model initialization time : {self.model_init_time:.2f}s\n' ) f.write( f'Total model load time: {self.model_load_time:.2f}s\n' ) f.write( f'App initialization time: {self.app_init_time:.2f}s\n' ) f.write( f'Total processing time: {self.total_processing_time:.2f}s\n' ) f.write( f'Average inference speed: {self.avg_inference_speed:.2f}FPS\n' ) f.write(f'Batch count: {self.count_batch}\n\n') f.write('LAST OUTPUTS\n') f.write(f'Face coordinates: {self.face_coords}\n') f.write(f'Left eye coordinates: {self.left_eye_coords}\n') f.write( f'Right eye coordinates: {self.right_eye_coords}\n') f.write(f'Head pose angles: {self.head_pose_angles}\n') f.write( f'Relative pointer coordinates/ Gaze vector: [{self.x:.2f}, {self.y:.2f}]' ) else: self.input_feeder.close() cv2.destroyAllWindows() log.info( f'[ Main ] All input Batches processed in {self.total_processing_time:.2f}s' ) log.info('[ Main ] Shutting down app...') log.info('[ Main ] Mouse controller app has been shut down.') break return
from face_detection import FaceDetection from util.db import DynamoDBUtils from util import misc import random import shutil import pprint import os OUTPUT_DIR = '../videos' if __name__ == '__main__': BUCKET_NAME = 'smart-cam' ''' if os.path.exists(OUTPUT_DIR): shutil.rmtree(OUTPUT_DIR) os.makedirs(OUTPUT_DIR) ret, local_file = misc.download_from_s3(BUCKET_NAME, 'videos/video_1.avi', OUTPUT_DIR) print ret, local_file ''' #local_file = '../videos/video_100_frames_2.mp4' local_file = '../videos/video_100_frames_1.mp4' #fd = FaceDetectionV1() fd = FaceDetection() report = fd.process(local_file, show_frame=False) pprint.pprint(report)