def __init__(self, frames=None): if frames is None: frames = [] self.frames = frames self.facedetector = FaceDetector() self.color = (0, 255, 0)
def main(args): # Multiple Modes of Control ########################### ## 0 = No Control ## 1 = Gaze Angle Control ## 2 = Head Pose Control ## 3 = Sound Control ########################### controlMode = 0 modes = ['No Control', 'Gaze Control', 'Head Pose', 'Sound Control'] #################### # Control Commands # #################### # Left Click = Yawn # Right Click = Looking up # Increment Control Modes = Right Wink # Left Eye Wink and Smile are left unassigned # You can dictate text in Sound mode (Control mode = 3) ##################################################################### # Initializing the Speech Recognition Thread ##################################################################### # You can add more controls as you deem fit. numbers = ['zero', 'one', 'two', 'three', 'four', \ 'five', 'six', 'seven', 'eight', 'nine'] controls = ['left', 'right', 'up', 'down'] control_syn = {} for control in controls: control_syn.setdefault(control, []) # Need to account for similar sounding words as speech recog is on the edge! control_syn['left'].extend(['let', 'left', 'light', 'live', 'laugh']) control_syn['right'].extend( ['right', 'write', 'great', 'fight', 'might', 'ride']) control_syn['up'].extend(['up', 'hop', 'hope', 'out']) control_syn['down'].extend(['down', 'doubt', 'though']) device_list = load_device() stream_reader = audio_helper.StreamReader(device_list[1][0], received_frames) if not stream_reader.initialize(): print("Failed to initialize Stream Reader") speech.close() speech = None return speech = SpeechManager() print('speech config = ' + str(SPEECH_CONFIG)) if not speech.initialize(SPEECH_CONFIG, infer_device='CPU', batch_size=8): print("Failed to initialize ASR recognizer") speech.close() speech = None return stt = Queue() prevUtterance = '' reading_thread = Thread(target=stream_reader.read_stream, \ args=(speech, stt), daemon=True) reading_thread.start() ##################################################################### # Fixing 60x60 box as yaw and pitch boundaries to # correspond to head turning left and right (yaw) # and also moving up and down (pitch) headYawPitchBounds = [-30, 30] lastGaze = [0, 0] lastPose = [0, 0] # Set the stickiness value stickinessHead = 5 stickinessGaze = 10 eventText = "No Event" # init the logger logger = logging.getLogger() feeder = None feeder = InputFeeder(args.input_type, args.input) feeder.load_data() mc = MouseController("medium", "fast") # Loading all the gesture control models viz. face, head and gaze face_model = FaceDetector(args.face, args.device, args.cpu_extension) # face_model.check_model() face_model.load_model() logger.info("Face Detection Model Loaded...") head_model = HeadPoseDetect(args.headpose, args.device, args.cpu_extension) # head_model.check_model() head_model.load_model() logger.info("Head Pose Detection Model Loaded...") landmarks_model = LandmarksDetect(args.landmarks, args.device, args.cpu_extension) # landmarks_model.check_model() landmarks_model.load_model() logger.info("Landmarks Detection Model Loaded...") gaze_model = GazeDetect(args.gazeestimation, args.device, args.cpu_extension) # gaze_model.check_model() gaze_model.load_model() logger.info("Gaze Detection Model Loaded...") visualizeHeadPose = bool(distutils.util.strtobool(args.visualizeHeadPose)) visualizeGaze = bool(distutils.util.strtobool(args.visualizeGaze)) visualizeFace = bool(distutils.util.strtobool(args.visualizeFace)) pixelCount_leye = [] isEyeOpen_leye = [] pixelCount_reye = [] isEyeOpen_reye = [] isCalibrated = False isSmiling = False isMouthOpen = False moveEnabled = False islookingUp = False lastPoses = collections.deque(maxlen=20) lastGazes = collections.deque(maxlen=20) try: frame_count = 0 for ret, frame in feeder.next_batch(): ################################################################ # if any sound is deciphered from the spunned off thread then # check the last 3 words of the utterance for matching control word if (stt.qsize() > 0 and controlMode == 3): utterance = stt.get() print("From Parent: " + utterance) # need to process again only if change in utterance if (prevUtterance != utterance): control, lastWord = detectSoundEvent( utterance, controls, control_syn) if control is not None: direction = controls.index(control) mc.moveRelative(direction) else: if lastWord in numbers: lastWord = str(numbers.index(lastWord)) mc.write(lastWord) prevUtterance = utterance ################################################################ k = cv2.waitKey(1) & 0xFF # press 'q' to exit if k == ord('q'): break if not ret: break frame_count += 1 crop_face = None # inferenceBegin = time.time() crop_face, box = face_model.predict(frame.copy()) if crop_face is None: logger.error("Unable to detect the face.") continue # Draw the face box xmin, ymin, xmax, ymax = box if visualizeFace: cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 255), 3) orientation = head_model.predict(crop_face) box_left, box_right, \ left_eye, right_eye, \ p0, p1, p12, p13, p14, \ p2, p3, p15, p16, p17, \ p8, p9, p10, p11 = landmarks_model.predict(crop_face) # if any of the eye is not detected eye gesture and # gaze estimation are not executed if (left_eye.size * right_eye.size == 0): logger.error("Unable to detect eyes.") continue pad = 10 # Compute Right Eye: Close Snap right_eye_ball = frame[ymin + p1[1] - pad:ymin + p0[1] + pad, xmin + p1[0] - pad:xmin + p0[0] + pad] # Compute Left Eye: Close Snap left_eye_ball = frame[ymin + p3[1] - pad:ymin + p2[1] + pad, xmin + p2[0] - pad:xmin + p3[0] + pad] # pixelCount_leye_bk = pixelCount_leye #can delete this line pixelCount_reye, Rtrigger, probR = findClosurebyStats( 'Right', right_eye_ball, pixelCount_reye, frame_count) pixelCount_leye, Ltrigger, probL = findClosurebyStats( 'Left', left_eye_ball, pixelCount_leye, frame_count) print("probL: " + str(probL)) if probL < -30 and islookingUp is False: print('Click Right') controlMode = hikeControlMode(controlMode) ## to change # mc.clickRight() islookingUp = True eventText = 'Increment Control Mode' elif probL > 0: islookingUp = False if (eventText == 'Increment Control Mode'): eventText = 'No Event' # If both eyes are detected as pressed (as one eye # can shrink when the other eye is winked) then check # which eye has higher probability of closure. # Note: To close both eyes is not a gesture. if Ltrigger and Rtrigger: # print("probR = " + str(probR) + "probL = " + str(probL)) if probR > probL: Ltrigger = False else: Rtrigger = False # If you want to enable left and right wink actions, # then call corresponding functions here. if Ltrigger: print('left eye pressed') # controlMode = dipControlMode(controlMode) # writeList(pixelCount_leye_bk) # Dumping list for debugging purpose # mc.scroll(20) # you can pass the head pose up/down as param # mc.drag() if Rtrigger: print('right eye pressed') # controlMode = hikeControlMode(controlMode) # mc.clickRight() gaze, (x, y) = gaze_model.predict(left_eye, right_eye, orientation) # inferenceEnd = time.time() # inferenceTime = inferenceEnd - inferenceBegin # print("Inference Time of 4 models = " + str(inferenceTime)) yaw = orientation[0] pitch = orientation[1] roll = orientation[2] sinY = math.sin(yaw * math.pi / 180.0) sinP = math.sin(pitch * math.pi / 180.0) sinR = math.sin(roll * math.pi / 180.0) cosY = math.cos(yaw * math.pi / 180.0) cosP = math.cos(pitch * math.pi / 180.0) cosR = math.cos(roll * math.pi / 180.0) cH, cW = crop_face.shape[:2] arrowLength = 0.5 * max(cH, cW) # Drawing Eye Boxes (p0_x, p0_y) = box_left[:2] (p12_x, p12_y) = box_left[2:4] cv2.rectangle(frame, (p0_x + xmin, p0_y + ymin), (p12_x + xmin, p12_y + ymin - 5), (255, 0, 0), 3) (p2_x, p2_y) = box_right[:2] (p17_x, p17_y) = box_right[2:4] cv2.rectangle(frame, (p2_x + xmin, p2_y + ymin), (p17_x + xmin, p17_y + ymin - 5), (255, 0, 0), 3) # to draw the eye points as circles cv2.circle(frame, tuple(map(operator.add, p0, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p1, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p12, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p13, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p14, (xmin, ymin))), 1, (255, 0, 0), 2) # to draw the eye points as circles cv2.circle(frame, tuple(map(operator.add, p2, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p3, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p15, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p16, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p17, (xmin, ymin))), 1, (255, 0, 0), 2) # to draw mouth points cv2.circle(frame, tuple(map(operator.add, p8, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p9, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p10, (xmin, ymin))), 1, (255, 0, 0), 2) cv2.circle(frame, tuple(map(operator.add, p11, (xmin, ymin))), 1, (255, 0, 0), 2) # Finding Eye Center xCenter_left = int((p0_x + p12_x) / 2) + xmin yCenter_left = int((p0_y + p12_y) / 2) + ymin leftEye_Center = (xCenter_left, yCenter_left) # Finding Eye Center xCenter_right = int((p2_x + p17_x) / 2) + xmin yCenter_right = int((p2_y + p17_y) / 2) + ymin rightEye_Center = (xCenter_right, yCenter_right) ############# DRAWING DIRECTION ARROWS BASED ON HEAD POSITION ############ ## Euler angles to cartesian coordinates# # https://stackoverflow.com/questions/1568568/how-to-convert-euler-angles-to-directional-vector # Total rotation matrix is: (See correct matrix in blog) # | cos(yaw)cos(pitch) -cos(yaw)sin(pitch)sin(roll)-sin(yaw)cos(roll) -cos(yaw)sin(pitch)cos(roll)+sin(yaw)sin(roll)| # | sin(yaw)cos(pitch) -sin(yaw)sin(pitch)sin(roll)+cos(yaw)cos(roll) -sin(yaw)sin(pitch)cos(roll)-cos(yaw)sin(roll)| # | sin(pitch) cos(pitch)sin(roll) cos(pitch)sin(roll)| if visualizeHeadPose or controlMode == 2 or isCalibrated is False: # yaw and pitch are important for mouse control poseArrowX = orientation[0] #* arrowLength poseArrowY = orientation[1] #* arrowLength # Taking 2nd and 3rd row for 2D Projection ##############################LEFT EYE ################################### # cv2.arrowedLine(frame, leftEye_Center, # (int((xCenter_left + arrowLength * (cosR * cosY + sinY * sinP * sinR))), # int((yCenter_left + arrowLength * cosP * sinR))), (255, 0, 0), 4) # # center to top # cv2.arrowedLine(frame, leftEye_Center, # (int(((xCenter_left + arrowLength * (sinY * sinP * cosR - cosY * sinR)))), # int((yCenter_left + arrowLength * cosP * cosR))), (0, 0, 255), 4) # center to forward # cv2.arrowedLine(frame, leftEye_Center, \ # (int(((xCenter_left + arrowLength * sinY * cosP))), \ # int((yCenter_left - arrowLength * sinP))), (0, 255, 0), 4) ##############################RIGHT EYE ################################### # cv2.arrowedLine(frame, rightEye_Center, # (int((xCenter_right + arrowLength * (cosR * cosY + sinY * sinP * sinR))), # int((yCenter_right + arrowLength * cosP * sinR))), (255, 0, 0), 4) # # center to top # cv2.arrowedLine(frame, rightEye_Center, # (int(((xCenter_right + arrowLength * (sinY * sinP * cosR - cosY * sinR)))), # int((yCenter_right + arrowLength * cosP * cosR))), (0, 0, 255), 4) # center to forward # cv2.arrowedLine(frame, rightEye_Center, # (int(((xCenter_right + arrowLength * sinY * cosP))), # int((yCenter_right - arrowLength * sinP))), (0, 255, 0), 4) # gaze is required for calibration if visualizeGaze or controlMode == 1 or isCalibrated is False: gazeArrowX = gaze[0] * arrowLength gazeArrowY = -gaze[1] * arrowLength cv2.arrowedLine(frame, leftEye_Center, (int(leftEye_Center[0] + gazeArrowX), int(leftEye_Center[1] + gazeArrowY)), (0, 255, 0), 4) cv2.arrowedLine(frame, rightEye_Center, (int(rightEye_Center[0] + gazeArrowX), int(rightEye_Center[1] + gazeArrowY)), (0, 255, 0), 4) ############################### # Compute Mouth Aspect Ratio # ############################### mouthWidth = p9[0] - p8[0] mouthHeight = p11[1] - p10[1] if (mouthWidth != 0): mAspRatio = mouthHeight / mouthWidth else: mAspRatio = 0 # print('MAR RATIO = ' + str(mAspRatio)) # To validate face is properly facing the camera. # To avoid erroneous control mode switches coz of face turns. if (isFaceInBounds(yaw, pitch) and mAspRatio > 0): # These threshold constants need to either modified or made dynamic. # # when mouth is open if mAspRatio > 0.4 and isMouthOpen is False: # mouthHeights.clear() # isSoundControl = False print('clicking left') mc.clickLeft() isMouthOpen = True eventText = 'Click Left' elif mAspRatio < 0.35: isMouthOpen = False if (eventText == 'Click Left'): eventText = 'No Event' # when mouth is wide, i.e. smiling if mAspRatio < 0.26 and isSmiling == False: print('You are smiling...') eventText = 'Smiling' isSmiling = True elif mAspRatio > 0.3: # Reset the click flag once smile is over. isSmiling = False if (eventText == 'Smiling'): eventText = 'No Event' # controlMode = 3 # To debug a specific control mode. try: if frame_count % 5 == 0: if (mc.calibrated is False): isCalibrated = mc.captureCorners( gazeArrowX, gazeArrowY) else: # Face should be forward facing inorder to take comamnds. # if (isFaceInBounds(headYawPitchBounds, yaw, pitch)): if controlMode == 1: moveEnabled, lastGazes = \ isMoveEnabled(lastGaze, stickinessGaze, gazeArrowX, gazeArrowY, lastGazes) if moveEnabled: print('moving mouse with gaze') mc.moveWithGaze(gazeArrowX, gazeArrowY) lastGaze = [gazeArrowX, gazeArrowY ] #saving pos for stickiness elif controlMode == 2: moveEnabled, lastPoses = \ isMoveEnabled(lastPose, stickinessHead, poseArrowX, poseArrowY, lastPoses) if moveEnabled: print('moving mouse with head. Yaw: ' + str(poseArrowX) + " Pitch: " + str(poseArrowY) + " Roll: " + str(orientation[2])) mc.moveWithHead(poseArrowX, poseArrowY, headYawPitchBounds) lastPose = [poseArrowX, poseArrowY ] #saving pos for stickiness except Exception as err: print(traceback.format_exc()) PrintException() logger.error("Exception occurred while moving cursor!") # Display calibration status on video if isCalibrated: frame = cv2.putText(frame, 'Calibration is done.', (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) frame = cv2.putText(frame, 'Control Mode: ' + modes[controlMode], (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) frame = cv2.putText(frame, 'Event: ' + eventText, (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) frame = cv2.putText(frame, 'MAR: ' + str(round(mAspRatio, 2)), (20, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) frame = cv2.putText(frame, 'Mouse Loc: ' + str(mc.getLocation()), (20, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) imshow('frame', frame, width=800) # frameEnd = time.time() # frameTime = frameEnd - frameBegin # print("FPS = " + str(1/frameTime)) except Exception as err: print(traceback.format_exc()) PrintException() logger.error(err) cv2.destroyAllWindows() feeder.close()
class Magnification: def __init__(self, frames=None): if frames is None: frames = [] self.frames = frames self.facedetector = FaceDetector() self.color = (0, 255, 0) def real_time_process(self, frame): rects = self.facedetector.detect_face(frame.data) cpy = frame.data.copy() for x1, y1, x2, y2 in rects: width = abs(x2 - x1) height = abs(y1 - y2) cv2.rectangle(cpy, (x1 + int(width * 0.2), y1 ) , (x2 - int(width * 0.2), y2- int(height * 0.2)), self.color, 2) img = cv.fromarray(cpy) cv.ShowImage('face', img) return frame = frame.data X,Y, channels = frame.shape for x in range(X): for y in range(Y): frame[x,y,1] = 0 frame[x,y,2] = 0 img = cv.fromarray(frame) cv.ShowImage('red channel', img) def get_face(self, frame, rects): for x1, y1, x2, y2 in rects: width = abs(x1 - x2) height = abs(y1 - y2) rect = ( x1 + int(width * 0.2), y1 , x2 - int(width * 0.2), y2- int(height * 0.2)) return rect def process_frame(self, frame): rects = self.facedetector.detect_face(frame) if rects is None or len(rects) == 0: raise AttributeError() x1, y1, x2, y2= self.get_face(frame, rects) subframe = frame[x1:x2, y1:y2, :] pyramid = Pyramid(subframe, laplacian=False) pyramid.reds = [] for level in pyramid.levels: red = 0 X, Y, channels = level.shape red = 0 for x in range(x1, x2): for y in range(y1, y2): red += frame[x, y, 2] red /= float(X*Y) pyramid.reds.append(red) return pyramid def process_frames(self): frames = self.frames processed = [] for frame in progress.bar(frames): try: p = self.process_frame(frame.data) processed.append(p) except AttributeError: puts(colored.red('face not detected')) self.processed = processed def sample_expected(self, freq_step, length): return [expected_pulse_spectrum(freq_step * i) for i in range(length)] def show(self, tps): sampling_rate = len(self.frames) / (float(self.frames[-1].ticks - self.frames[0].ticks) /tps) print 'sampling_rate', sampling_rate max_fq = 0.5 * sampling_rate def filtr(data): hz_step = max_fq / len(data) self.step = hz_step fi = self.sample_expected(hz_step, len(data)) return apply_filter(data, fi) plt.axhline() plt.plot(fft([item.reds[0] for item in self.processed], filtr), 'r') plt.plot(fft([item.reds[1] for item in self.processed], filtr), 'y') plt.plot(fft([item.reds[2] for item in self.processed], filtr), 'b') plt.plot(fft([item.reds[3] for item in self.processed], filtr), 'g') plt.axvline(1/self.step) print 'freq step', self.step plt.show() #plt.axhline() #plt.plot(self.sample_expected(self.step, 10)) #plt.show() def sampling_rate(self, tps): return len(self.frames) / (float(self.frames[-1].ticks - self.frames[0].ticks) /tps) def get_result(self, tps): sampling_rate = self.sampling_rate(tps) max_fq = 0.5 * sampling_rate def filtr(data): hz_step = max_fq / len(data) self.step = hz_step fi = self.sample_expected(hz_step, len(data)) return apply_filter(data, fi) results = [] for i in range(4): results.append(fft([item.reds[i] for item in self.processed], filtr)) indexes = map(max_on_index, results) print 'indexes', indexes avg = sum(indexes) / len(indexes) value = int(avg * self.step * 60) print puts(colored.red(u' \u2764 '), newline=False) puts(colored.yellow('%d BPM' % value )) print