def run_action_recognition(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create a logistic regression classifier action_classifier = LogisticRegression(num_in=backbone_network.feature_dim, num_out=30) action_classifier.load_state_dict(weights['action_recognition']) action_classifier.eval() # Concatenate backbone network and logistic regression net = Pipe(backbone_network, action_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] border_size = 30 display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayClassnameOverlay( thresholds=LAB_THRESHOLDS, border_size_top=border_size if not title else border_size + 50), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()
gesture_classifier = LogisticRegression( num_in=feature_extractor.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(checkpoint_classifier) gesture_classifier.eval() print(gesture_classifier) # Concatenate feature extractor and met converter net = Pipe(feature_extractor, gesture_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.3), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], camera_id=camera_id, path_in=path_in, path_out=path_out, use_gpu=use_gpu) controller.run_inference()
def run_fitness_rep_counter(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version ) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load a logistic regression classifier rep_counter = LogisticRegression(num_in=backbone_network.feature_dim, num_out=5) rep_counter.load_state_dict(weights['rep_counter']) rep_counter.eval() # Concatenate backbone network and rep counter net = Pipe(backbone_network, rep_counter) postprocessor = [ AggregatedPostProcessors( post_processors=[ TwoPositionsCounter( pos0_idx=LAB2INT['counting - jumping_jacks_position=arms_down'], pos1_idx=LAB2INT['counting - jumping_jacks_position=arms_up'], threshold0=0.4, threshold1=0.4, out_key='Jumping Jacks', ), TwoPositionsCounter( pos0_idx=LAB2INT['counting - squat_position=high'], pos1_idx=LAB2INT['counting - squat_position=low'], threshold0=0.4, threshold1=0.4, out_key='squats', ), ], out_key='counting', ), PostprocessClassificationOutput(INT2LAB, smoothing=1) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayExerciseRepCounts() ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, border_size_top=100, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs ) controller.run_inference()
def run_custom_classifier(custom_classifier, camera_id=0, path_in=None, path_out=None, title=None, use_gpu=True, display_fn=None, stop_event=None): # Load backbone network according to config file backbone_model_config, backbone_weights = load_backbone_model_from_config( custom_classifier) try: # Load custom classifier checkpoint_classifier = torch.load( os.path.join(custom_classifier, 'best_classifier.checkpoint')) except FileNotFoundError: msg = ( "Error: No such file or directory: 'best_classifier.checkpoint'\n" "Hint: Provide path to 'custom_classifier'.\n") if display_fn: display_fn(msg) else: print(msg) return None # Update original weights in case some intermediate layers have been finetuned update_backbone_weights(backbone_weights, checkpoint_classifier) # Create backbone network backbone_network = build_backbone_network(backbone_model_config, backbone_weights) with open(os.path.join(custom_classifier, 'label2int.json')) as file: class2int = json.load(file) INT2LAB = {value: key for key, value in class2int.items()} gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(checkpoint_classifier) gesture_classifier.eval() # Concatenate feature extractor and met converter net = Pipe(backbone_network, gesture_classifier) postprocessor = [PostprocessClassificationOutput(INT2LAB, smoothing=4)] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.1), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], camera_id=camera_id, path_in=path_in, path_out=path_out, use_gpu=use_gpu, stop_event=stop_event, ) controller.run_inference()
def run_calorie_estimation(model_name: str, model_version: str, weight: Optional[float] = 70.0, height: Optional[float] = 170.0, age: float = 30.0, gender: Optional[str] = None, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param weight: Weight (in kilograms). Will be used to convert MET values to calories. Default to 70. :param height: Height (in centimeters). Will be used to convert MET values to calories. Default to 170. :param age: Age (in years). Will be used to convert MET values to calories. Default to 30. :param gender: Gender ("male" or "female" or "other"). Will be used to convert MET values to calories :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version ) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Load MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network and met converter net = Pipe(backbone_network, met_value_converter) post_processors = [ calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayDetailedMETandCalories(), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, display_fn=display_fn) # Run live inference controller = Controller( neural_network=net, post_processors=post_processors, results_display=display_results, callbacks=[], **kwargs ) controller.run_inference()
def run_gesture_control(model_name: str, model_version: str, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network( selected_config, weights['backbone'], weights_finetuned=weights['gesture_control']) # Create a logistic regression classifier gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=len(INT2LAB)) gesture_classifier.load_state_dict(weights['gesture_control']) gesture_classifier.eval() # Concatenate backbone network and logistic regression net = Pipe(backbone_network, gesture_classifier) postprocessor = [ PostprocessClassificationOutput(INT2LAB, smoothing=1), AggregatedPostProcessors( post_processors=[ EventCounter(key, LAB2INT[key], LAB_THRESHOLDS[key]) for key in ENABLED_LABELS ], out_key='counting', ), ] border_size_top = 0 border_size_right = 500 display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayClassnameOverlay( thresholds=LAB_THRESHOLDS, duration=1, border_size_top=border_size_top if not title else border_size_top + 50, border_size_right=border_size_right), sense.display.DisplayPredictionBarGraph(ENABLED_LABELS, LAB_THRESHOLDS, x_offset=900, y_offset=100, display_counts=True) ] display_results = sense.display.DisplayResults( title=title, display_ops=display_ops, border_size_top=border_size_top, border_size_right=border_size_right, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=postprocessor, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()
def run_fitness_tracker(model_name: str, model_version: str, weight: Optional[float] = 70.0, height: Optional[float] = 170.0, age: float = 30.0, gender: Optional[str] = None, title: Optional[str] = None, display_fn: Optional[Callable] = None, **kwargs): """ :param model_name: Model from backbone (StridedInflatedEfficientNet or StridedInflatedMobileNetV2). :param model_version: Model version (pro or lite) :param weight: Weight (in kilograms). Will be used to convert MET values to calories. Default to 70. :param height: Height (in centimeters). Will be used to convert MET values to calories. Default to 170. :param age: Age (in years). Will be used to convert MET values to calories. Default to 30. :param gender: Gender ("male" or "female" or "other"). Will be used to convert MET values to calories :param title: Title of the image frame on display. :param display_fn: Optional function to further process displayed image """ # Load weights selected_config, weights = get_relevant_weights( SUPPORTED_MODEL_CONFIGURATIONS, model_name, model_version) # Load backbone network backbone_network = build_backbone_network(selected_config, weights['backbone']) # Create fitness activity classifier gesture_classifier = LogisticRegression( num_in=backbone_network.feature_dim, num_out=81) gesture_classifier.load_state_dict(weights['fitness_activity_recognition']) gesture_classifier.eval() # Create MET value converter met_value_converter = calorie_estimation.METValueMLPConverter() met_value_converter.load_state_dict(weights['met_converter']) met_value_converter.eval() # Concatenate backbone network with downstream nets net = Pipe(backbone_network, feature_converter=[gesture_classifier, met_value_converter]) post_processors = [ PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]), calorie_estimation.CalorieAccumulator(weight=weight, height=height, age=age, gender=gender, smoothing=12, indices=[1]) ] display_ops = [ sense.display.DisplayFPS(expected_camera_fps=net.fps, expected_inference_fps=net.fps / net.step_size), sense.display.DisplayTopKClassificationOutputs(top_k=1, threshold=0.5), sense.display.DisplayMETandCalories(y_offset=40), ] display_results = sense.display.DisplayResults(title=title, display_ops=display_ops, border_size_top=50, display_fn=display_fn) # Run live inference controller = Controller(neural_network=net, post_processors=post_processors, results_display=display_results, callbacks=[], **kwargs) controller.run_inference()