Пример #1
0
    def load_featurizer_model(self, featurizer_path):
        """ load the given compiled ELL featurizer for use in processing subsequent audio input """
        if featurizer_path:
            self.featurizer = featurizer.AudioTransform(featurizer_path, 40)
            self.setup_spectrogram_image()

            self.vad = vad.VoiceActivityDetector(self.sample_rate,
                                                 self.featurizer.output_size)

            self.show_output("Feature input size: {}, output size: {}".format(
                self.featurizer.input_size, self.featurizer.output_size))

        self.init_data()
Пример #2
0
    def create_vad(self):

        vad_options = {
            "tau_up": self.get_entry(self.tau_up),
            "tau_down": self.get_entry(self.tau_down),
            "threshold_up": self.get_entry(self.threshold_up),
            "threshold_down": self.get_entry(self.threshold_down),
            "large_input": self.get_entry(self.large_input),
            "gain_att": self.get_entry(self.gain_att),
            "level_threshold": self.get_entry(self.level_threshold)
        }

        model = make_vad.make_vad("vad.ell", self.sample_rate,
                                  self.featurizer.input_size,
                                  self.featurizer.output_size, vad_options)
        self.vad = vad.VoiceActivityDetector(model)
Пример #3
0
    def __init__(self,
                 featurizer_model=None,
                 classifier_model=None,
                 auto_scale=True,
                 sample_rate=None,
                 channels=None,
                 input_device=None,
                 categories=None,
                 image_width=80,
                 threshold=None,
                 wav_file=None,
                 clear=5,
                 serial=None,
                 vad_model=None,
                 smoothing=None,
                 ignore_list=None):
        """ Initialize AudioDemo object
        featurizer_model - the path to the ELL featurizer
        classifier_model - the path to the ELL classifier
        auto_scale - auto scale audio input to range [-1, 1]
        sample_rate - sample rate to featurizer is expecting
        channels - number of channels featurizer is expecting
        input_device - optional id of microphone to use
        categories - path to file containing category labels
        image_width - width of the spectrogram image
        threshold - ignore predictions that have confidence below this number (e.g. 0.5)
        wav_file - optional wav_file to use  when you click Play
        serial - optional serial input, reading numbers from the given serial port.
        vad_model - optional ELL model containing VoiceActivityDetector
        smoothing - controls the size of the smoothing window (defaults to 0).
        ignore_list - list of category labels to ignore (like 'background' or 'silence')
        """
        super().__init__()

        self.CLASSIFIER_MODEL_KEY = "classifier_model"
        self.FEATURIZER_MODEL_KEY = "featurizer_model"
        self.WAV_FILE_KEY = "wav_file"
        self.CATEGORY_FILE_KEY = "categories"

        self.get_settings_file_name()
        self.load_settings()
        self.reading_input = False
        self.featurizer_model = None
        self.serial_port = serial
        self.smoothing = smoothing
        self.ignore_list = ignore_list

        if featurizer_model:
            self.featurizer_model = featurizer_model
            self.settings[self.FEATURIZER_MODEL_KEY] = featurizer_model
        elif self.FEATURIZER_MODEL_KEY in self.settings:
            self.featurizer_model = self.settings[self.FEATURIZER_MODEL_KEY]

        self.classifier_model = None
        if classifier_model:
            self.classifier_model = classifier_model
            self.settings[self.CLASSIFIER_MODEL_KEY] = classifier_model
        elif self.CLASSIFIER_MODEL_KEY in self.settings:
            self.classifier_model = self.settings[self.CLASSIFIER_MODEL_KEY]

        self.wav_filename = wav_file
        if self.wav_filename is None and self.WAV_FILE_KEY in self.settings:
            self.wav_filename = self.settings[self.WAV_FILE_KEY]

        self.wav_file_list = None
        self.auto_scale = auto_scale
        self.sample_rate = sample_rate if sample_rate is not None else 16000
        self.channels = channels if channels is not None else 1
        self.input_device = input_device
        self.num_classifier_features = None

        self.vad = None
        self.vad_reset = (vad_model is not None)
        self.previous_vad = 0
        self.vad_latch = 3  # only reset after 3 vad=0 signals to smooth vad signal a bit.
        if not categories and self.CATEGORY_FILE_KEY in self.settings:
            categories = self.settings[self.CATEGORY_FILE_KEY]

        self.categories = categories
        if categories:
            self.settings[self.CATEGORY_FILE_KEY] = categories

        self.save_settings()  # in case we just changed it.
        self.audio_level = 0
        self.min_level = 0
        self.max_level = 0
        self.threshold = threshold

        self.output_clear_time = int(clear * 1000) if clear else 5000

        self.featurizer = None
        self.classifier = None
        self.wav_file = None
        self.speaker = None
        self.microphone = None
        self.animation = None
        self.show_classifier_output = True
        self.last_prediction = None
        self.probability = 0

        # Threads
        self.read_input_thread = None
        self.lock = Lock()
        self.main_thread = get_ident()
        self.message_queue = []

        # UI components
        self.max_spectrogram_width = image_width
        self.features_entry = None
        self.classifier_feature_data = None
        self.spectrogram_image_data = None

        self.init_ui()

        if self.featurizer_model:
            self.load_featurizer_model(os.path.abspath(self.featurizer_model))
        else:
            self.show_output("Please specify and load a feature model")

        if smoothing == "vad":
            # smooth up to 1 second worth of predictions
            self.smoothing = int(self.sample_rate / self.featurizer.input_size)
            if vad_model is None:
                vad_model = make_vad.make_vad("vad.ell", self.sample_rate,
                                              self.featurizer.input_size,
                                              self.featurizer.output_size,
                                              None)

        if self.classifier_model:
            self.load_classifier(self.classifier_model)
            self.setup_spectrogram_image()
        else:
            self.show_output("Please specify and load a classifier model")

        if vad_model:
            self.vad = vad.VoiceActivityDetector(vad_model)