示例#1
0
    def _load_net(self):
        # Load feature extractor
        feature_extractor = feature_extractors.StridedInflatedEfficientNet()
        checkpoint = engine.load_weights(self.feature_extractor_weights)
        feature_extractor.load_state_dict(checkpoint)
        feature_extractor.eval()

        # Load a logistic regression classifier
        gesture_classifier = LogisticRegression(
            num_in=feature_extractor.feature_dim, num_out=30)
        checkpoint = engine.load_weights(self.gesture_classifier_weights)
        gesture_classifier.load_state_dict(checkpoint)
        gesture_classifier.eval()

        # Combine both models
        self.net = Pipe(feature_extractor, gesture_classifier)
        num_in=feature_extractor.feature_dim, num_out=81)
    checkpoint = engine.load_weights(
        'resources/fitness_activity_recognition/mobilenet_logistic_regression.ckpt'
    )
    gesture_classifier.load_state_dict(checkpoint)
    gesture_classifier.eval()

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    checkpoint = torch.load(
        'resources/calorie_estimation/mobilenet_features_met_converter.ckpt')
    met_value_converter.load_state_dict(checkpoint)
    met_value_converter.eval()

    # Concatenate feature extractor with downstream nets
    net = Pipe(feature_extractor,
               feature_converter=[gesture_classifier, met_value_converter])

    post_processors = [
        PostprocessClassificationOutput(INT2LAB, smoothing=8, indices=[0]),
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12,
                                              indices=[1])
    ]

    display_ops = [
        realtimenet.display.DisplayFPS(expected_camera_fps=net.fps,
                                       expected_inference_fps=net.fps /
                                       net.step_size),
示例#3
0
    # Load feature extractor
    feature_extractor = feature_extractors.StridedInflatedMobileNetV2()
    checkpoint = engine.load_weights(
        'resources/backbone/strided_inflated_mobilenet.ckpt')
    feature_extractor.load_state_dict(checkpoint)
    feature_extractor.eval()

    # Load MET value converter
    met_value_converter = calorie_estimation.METValueMLPConverter()
    checkpoint = engine.load_weights(
        'resources/calorie_estimation/mobilenet_features_met_converter.ckpt')
    met_value_converter.load_state_dict(checkpoint)
    met_value_converter.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, met_value_converter)

    post_processors = [
        calorie_estimation.CalorieAccumulator(weight=weight,
                                              height=height,
                                              age=age,
                                              gender=gender,
                                              smoothing=12)
    ]

    display_ops = [
        realtimenet.display.DisplayFPS(expected_camera_fps=net.fps,
                                       expected_inference_fps=net.fps /
                                       net.step_size),
        realtimenet.display.DisplayDetailedMETandCalories(),
    ]
    # Load feature extractor
    feature_extractor = feature_extractors.StridedInflatedEfficientNet()
    checkpoint = engine.load_weights('resources/backbone/strided_inflated_efficientnet.ckpt')
    feature_extractor.load_state_dict(checkpoint)
    feature_extractor.eval()

    # Load a logistic regression classifier
    gesture_classifier = LogisticRegression(num_in=feature_extractor.feature_dim,
                                            num_out=30)
    checkpoint = engine.load_weights('resources/gesture_detection/efficientnet_logistic_regression.ckpt')
    gesture_classifier.load_state_dict(checkpoint)
    gesture_classifier.eval()

    # Concatenate feature extractor and met converter
    net = Pipe(feature_extractor, gesture_classifier)

    # Create inference engine, video streaming and display instances
    inference_engine = engine.InferenceEngine(net, use_gpu=use_gpu)

    video_source = camera.VideoSource(camera_id=camera_id,
                                      size=inference_engine.expected_frame_size,
                                      filename=path_in)

    video_stream = camera.VideoStream(video_source,
                                      inference_engine.fps)

    postprocessor = [
        PostprocessClassificationOutput(INT2LAB, smoothing=4)
    ]
                                        label2int,
                                        num_timesteps=num_timesteps)
    valid_loader = generate_data_loader(os.path.join(
        path_in,
        f"features_valid_num_layers_to_finetune={num_layers_to_finetune}"),
                                        label_names,
                                        label2int,
                                        num_timesteps=None,
                                        batch_size=1,
                                        shuffle=False)

    # modeify the network to generate the training network on top of the features
    gesture_classifier = LogisticRegression(
        num_in=feature_extractor.feature_dim, num_out=len(label_names))
    if num_layers_to_finetune > 0:
        net = Pipe(custom_classifier_bottom, gesture_classifier)
    else:
        net = gesture_classifier
    net.train()

    if use_gpu:
        net = net.cuda()

    lr_schedule = {0: 0.0001, 40: 0.00001}
    num_epochs = 60
    best_model_state_dict = training_loops(net, train_loader, valid_loader,
                                           use_gpu, num_epochs, lr_schedule)

    # Save best model
    if isinstance(net, Pipe):
        best_model_state_dict = {