def track_faces(frames, fm): """ Track faces by comparing positions of face detection bounding boxes in consecutive frames :type frames: list :param frames: list of frames :type fm: LBPHFaceRecognizer :param fm: face model :rtype: list :returns: list of segments """ segments = [] track_frame_counter = 0 for frame in frames: faces = frame[c.FACES_KEY] elapsed_video_s = frame[c.ELAPSED_VIDEO_TIME_KEY] if len(faces) != 0: face_counter = 0 for face in faces: segment_dict = {} segment_frame_counter = 1 prev_bbox = face[c.BBOX_KEY] segment_frames_list = [] segment_frame_dict = {c.ELAPSED_VIDEO_TIME_KEY: elapsed_video_s, c.FRAME_COUNTER_KEY: track_frame_counter, c.ASSIGNED_TAG_KEY: face[ c.ASSIGNED_TAG_KEY], c.CONFIDENCE_KEY: face[c.CONFIDENCE_KEY], c.BBOX_KEY: prev_bbox} segment_frames_list.append(segment_frame_dict) del frames[track_frame_counter][c.FACES_KEY][face_counter] sub_frame_counter = track_frame_counter + 1 prev_frame_counter = track_frame_counter # Search face in subsequent frames # and add good bounding boxes to segment # Bounding boxes included in this segment # must not be considered by other segments for subsequent_frame in frames[sub_frame_counter:]: # Consider only successive frames # or frames whose maximum distance is # MAX_FRAMES_WITH_MISSED_DETECTION + 1 if( sub_frame_counter > ( prev_frame_counter + c.MAX_FR_WITH_MISSED_DET + 1)): segment_frame_counter = ( segment_frame_counter - c.MAX_FR_WITH_MISSED_DET - 1) break sub_faces = subsequent_frame[c.FACES_KEY] elapsed_video_s = subsequent_frame[c.ELAPSED_VIDEO_TIME_KEY] if len(sub_faces) != 0: sub_face_counter = 0 for sub_face in sub_faces: # Calculate differences between the two detections prev_bbox_x = prev_bbox[0] prev_bbox_y = prev_bbox[1] prev_bbox_w = prev_bbox[2] bbox = sub_face[c.BBOX_KEY] bbox_x = bbox[0] bbox_y = bbox[1] bbox_w = bbox[2] delta_x = (abs(bbox_x - prev_bbox_x) / float(prev_bbox_w)) delta_y = (abs(bbox_y - prev_bbox_y) / float(prev_bbox_w)) delta_w = (abs(bbox_w - prev_bbox_w) / float(prev_bbox_w)) # Check if delta is small enough if ((delta_x < ce.MAX_DELTA_PCT_X) and (delta_y < ce.MAX_DELTA_PCT_Y) and (delta_w < ce.MAX_DELTA_PCT_W)): prev_bbox = bbox segment_frame_dict = { c.ELAPSED_VIDEO_TIME_KEY: elapsed_video_s, c.FRAME_COUNTER_KEY: sub_frame_counter, c.ASSIGNED_TAG_KEY: sub_face[ c.ASSIGNED_TAG_KEY], c.CONFIDENCE_KEY: sub_face[c.CONFIDENCE_KEY], c.BBOX_KEY: bbox} segment_frames_list.append(segment_frame_dict) del frames[sub_frame_counter][c.FACES_KEY][sub_face_counter] prev_frame_counter = sub_frame_counter consecutive_frames_with_missed_detection = 0 # Do not consider other faces in the same frame break sub_face_counter += 1 sub_frame_counter += 1 segment_frame_counter += 1 # Aggregate results from all frames in segment [final_tag, final_confidence] = utils.aggregate_frame_results( segment_frames_list, fm) segment_dict[c.ASSIGNED_TAG_KEY] = final_tag segment_dict[c.CONFIDENCE_KEY] = final_confidence segment_dict[c.FRAMES_KEY] = segment_frames_list print('segment_frame_counter: ', segment_frame_counter) segment_dict[c.SEGMENT_TOT_FRAMES_NR_KEY] = segment_frame_counter segments.append(segment_dict) face_counter += 1 track_frame_counter += 1 return segments
def track_faces_with_LBP(frames, fm): """ Track faces by using LBP :type frames: list :param frames: list of frames :type fm: FaceModels :param fm: face models to be used for tracking :rtype: list :returns: list of face tracks """ segments = [] tracking_frame_counter = 0 for frame in frames: faces = frame[c.FACES_KEY] elapsed_video_s = frame[c.ELAPSED_VIDEO_TIME_KEY] if len(faces) != 0: face_counter = 0 for face in faces: segment_dict = {} segment_frame_counter = 1 prev_face = face[c.FACE_KEY] prev_bbox = face[c.BBOX_KEY] segment_frames_list = [] segment_frame_dict = { c.ELAPSED_VIDEO_TIME_KEY: elapsed_video_s, c.FRAME_COUNTER_KEY: tracking_frame_counter, c.ASSIGNED_TAG_KEY: face[c.ASSIGNED_TAG_KEY], c.CONFIDENCE_KEY: face[c.CONFIDENCE_KEY], c.BBOX_KEY: prev_bbox} segment_frames_list.append(segment_frame_dict) del frames[tracking_frame_counter][c.FACES_KEY][face_counter] # Calculate LBP histograms from face X = [np.asarray(prev_face, dtype=np.uint8)] l = [0] model = cv2.createLBPHFaceRecognizer( c.LBP_RADIUS, c.LBP_NEIGHBORS, c.LBP_GRID_X, c.LBP_GRID_Y) model.train(np.asarray(X), np.asarray(l)) sub_frame_counter = tracking_frame_counter + 1 prev_frame_counter = tracking_frame_counter # Search face in subsequent frames # and add good bounding boxes to segment # Bounding boxes included in this segment # must not be considered by other segments continue_tracking = True for subsequent_frame in frames[sub_frame_counter:]: # Consider only successive frames or frames whose # maximum distance is MAX_FRAMES_WITH_MISSED_DETECTION + 1 if sub_frame_counter > ( prev_frame_counter + ce.MAX_FR_WITH_MISSED_DET + 1): segment_frame_counter = ( segment_frame_counter - ce.MAX_FR_WITH_MISSED_DET_KEY - 1) break sub_faces = subsequent_frame[c.FACES_KEY] elapsed_video_s = subsequent_frame[c.ELAPSED_VIDEO_TIME_KEY] if len(sub_faces) != 0: sub_face_counter = 0 continue_tracking = False for sub_face in sub_faces: # Calculate differences between the two detections this_face = sub_face[c.FACE_KEY] [lbl, conf] = model.predict( np.asarray(this_face, dtype=np.uint8)) print 'conf =', conf # TEST ONLY # Check if confidence is low enough if conf < ce.STOP_TRACKING_THRESHOLD: # Calculate LBP histograms from face X = [np.asarray(this_face, dtype=np.uint8)] l = [0] model = cv2.createLBPHFaceRecognizer( c.LBP_RADIUS, c.LBP_NEIGHBORS, c.LBP_GRID_X, c.LBP_GRID_Y) model.train(np.asarray(X), np.asarray(l)) continue_tracking = True segment_frame_dict = { c.ELAPSED_VIDEO_TIME_KEY: elapsed_video_s, c.FRAME_COUNTER_KEY: sub_frame_counter, c.ASSIGNED_TAG_KEY: sub_face[ c.ASSIGNED_TAG_KEY], c.CONFIDENCE_KEY: sub_face[c.CONFIDENCE_KEY], c.BBOX_KEY: sub_face[c.BBOX_KEY]} segment_frames_list.append(segment_frame_dict) del frames[sub_frame_counter][c.FACES_KEY][sub_face_counter] prev_frame_counter = sub_frame_counter consecutive_frames_with_missed_detection = 0 # Do not consider other faces in the same frame break sub_face_counter += 1 sub_frame_counter += 1 segment_frame_counter += 1 if not continue_tracking: break # Aggregate results from all frames in segment [final_tag, final_confidence] = utils.aggregate_frame_results( segment_frames_list, fm) segment_dict[c.ASSIGNED_TAG_KEY] = final_tag segment_dict[c.CONFIDENCE_KEY] = final_confidence segment_dict[c.FRAMES_KEY] = segment_frames_list segment_dict[c.SEGMENT_TOT_FRAMES_NR_KEY] = segment_frame_counter segments.append(segment_dict) face_counter += 1 tracking_frame_counter += 1 return segments
def extract_faces_from_video(self, resource): """ Launch the face extractor on one video resource. This method returns a task handle. :type resource: string :param resource: resource file path :rtype: float :returns: handle for getting results """ # Set parameters load_ind_frame_results = ce.LOAD_IND_FRAMES_RESULTS sim_tracking = ce.SIM_TRACKING sliding_window_size = ce.SLIDING_WINDOW_SIZE used_fps = c.USED_FPS use_or_fps = c.USE_ORIGINAL_FPS use_sliding_window = ce.USE_SLIDING_WINDOW use_tracking = ce.USE_TRACKING if self.params is not None: if ce.LOAD_IND_FRAMES_RESULTS_KEY in self.params: load_ind_frame_results = ( self.params[ce.LOAD_IND_FRAMES_RESULTS_KEY]) if ce.SIM_TRACKING_KEY in self.params: sim_tracking = self.params[ce.SIM_TRACKING_KEY] if ce.SLIDING_WINDOW_SIZE in self.params: sliding_window_size = self.params[ce.SLIDING_WINDOW_SIZE_KEY] if c.USED_FPS_KEY in self.params: used_fps = self.params[c.USED_FPS_KEY] if c.USE_ORIGINAL_FPS_KEY in self.params: use_or_fps = self.params[c.USE_ORIGINAL_FPS_KEY] if ce.USE_SLIDING_WINDOW_KEY in self.params: use_sliding_window = self.params[ce.USE_SLIDING_WINDOW_KEY] if ce.USE_TRACKING_KEY in self.params: use_tracking = self.params[ce.USE_TRACKING_KEY] # Save processing time start_time = cv2.getTickCount() error = None frames = None segments = None capture = cv2.VideoCapture(resource) # Counter for all frames frame_counter = 0 # Counter for analyzed frames anal_frame_counter = 0 # Value of frame_counter for last analyzed frame last_anal_frame = 0 if capture is None or not capture.isOpened(): error = 'Error in opening video file' else: frames = [] if ((use_tracking or sim_tracking or use_sliding_window) and load_ind_frame_results): # Load frames by using pickle print 'Loading frames' resource_name = os.path.basename(resource) file_name = resource_name + '.pickle' file_path = os.path.join(ce.FRAMES_FILES_PATH, file_name) with open(file_path) as f: frames = pickle.load(f) anal_frame_counter = len(frames) else: video_fps = capture.get(cv2.cv.CV_CAP_PROP_FPS) tot_frames = capture.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT) while True: frame_dict = {} ret, frame = capture.read() if not ret: break # Next frame to be analyzed next_frame = last_anal_frame + (video_fps / used_fps) - 1 if use_or_fps or (frame_counter > next_frame): # Frame position in video in seconds elapsed_video_ms = capture.get( cv2.cv.CV_CAP_PROP_POS_MSEC) elapsed_video_s = elapsed_video_ms / 1000 self.progress = 100 * (frame_counter / tot_frames) # TEST ONLY print('progress: ' + str(self.progress) + '%') cv2.imwrite(ce.TMP_FRAME_FILE_PATH, frame) handle = self.extract_faces_from_image( ce.TMP_FRAME_FILE_PATH) frame_results = self.get_results(handle) frame_error = frame_results[c.ERROR_KEY] if frame_error: error = frame_results[c.ERROR_KEY] break else: frame_dict[c.ELAPSED_VIDEO_TIME_KEY] = elapsed_video_s frame_dict[c.FACES_KEY] = frame_results[c.FACES_KEY] frame_dict[c.FRAME_COUNTER_KEY] = frame_counter frames.append(frame_dict) anal_frame_counter += 1 last_anal_frame = frame_counter frame_counter += 1 frames_dict = {c.FRAMES_KEY: frames} # Save frames by using pickle resource_name = os.path.basename(resource) file_name = resource_name + '.pickle' file_path = os.path.join(ce.FRAMES_FILES_PATH, file_name) with open(file_path, 'w') as f: pickle.dump(frames, f) if use_tracking and (frames is not None): segments = track_faces_with_LBP(frames, self.face_models) elif use_sliding_window and (frames is not None): frame_rate = capture.get(cv2.cv.CV_CAP_PROP_FPS) frame_nr_in_window = frame_rate * sliding_window_size frame_nr_half_window = int(math.floor(frame_nr_in_window / 2)) sl_window_frame_counter = 0 for frame in frames: # Get faces from frame results faces = frame[c.FACES_KEY] if len(faces) != 0: # Select frames to be included in window first_frame_in_window = ( sl_window_frame_counter - frame_nr_half_window) # First frame in window is first frame # of all video if window exceeds video if first_frame_in_window < 0: first_frame_in_window = 0 last_frame_in_window = ( sl_window_frame_counter + frame_nr_half_window) if last_frame_in_window > (len(frames) - 1): last_frame_in_window = len(frames) - 1 window_frames = frames[first_frame_in_window: ( last_frame_in_window + 1)] window_frames_list = [] for window_frame in window_frames: # Get tag from first face faces = window_frame[c.FACES_KEY] if len(faces) != 0: first_face = faces[0] assigned_tag = first_face[c.ASSIGNED_TAG_KEY] confidence = first_face[c.CONFIDENCE_KEY] window_frame_dict = { c.ASSIGNED_TAG_KEY: assigned_tag, c.CONFIDENCE_KEY: confidence} window_frames_list.append(window_frame_dict) # Final tag for each frame depends # on assigned tags on all frames in window [frame_final_tag, frame_final_confidence] = aggregate_frame_results( window_frames_list, self.face_models) print('frame_final_tag: ', frame_final_tag) frame[c.FACES_KEY][0][c.ASSIGNED_TAG_KEY] = frame_final_tag frame[c.FACES_KEY][0][ c.CONFIDENCE_KEY] = frame_final_confidence sl_window_frame_counter += 1 processing_time_in_clocks = cv2.getTickCount() - start_time processing_time_in_seconds = ( processing_time_in_clocks / cv2.getTickFrequency()) # Populate dictionary with results results = {c.ELAPSED_CPU_TIME_KEY: processing_time_in_seconds, c.ERROR_KEY: error, ce.TOT_FRAMES_NR_KEY: anal_frame_counter} if use_tracking: results[c.SEGMENTS_KEY] = segments else: results[c.FRAMES_KEY] = frames self.progress = 100 handle = time.time() self.db_result4image[handle] = results return handle