Exemplo n.º 1
0
 def load_classifier(self, classifier_path):
     """ load the given compiled ELL classifier for use in processing subsequent audio input """
     if classifier_path:
         self.classifier = classifier.AudioClassifier(classifier_path, self.categories, self.threshold)
         self.show_output("Classifier input size: {}, output size: {}".format(
             self.classifier.input_size,
             self.classifier.output_size))
         if self.classifier_entry.get() != classifier_path:
             self.classifier_entry.delete(0, END)
             self.classifier_entry.insert(0, classifier_path)
     self.init_data()
Exemplo n.º 2
0
def test_keyword_spotter(featurizer_model,
                         classifier_model,
                         categories,
                         wav_files,
                         threshold,
                         sample_rate,
                         output_speaker=False,
                         auto_scale=False,
                         reset=False):

    predictor = classifier.AudioClassifier(classifier_model, categories,
                                           threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model,
                                          predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    the_speaker = None
    if output_speaker:
        the_speaker = speaker.Speaker()

    results = []
    if wav_files:
        if not os.path.isdir(wav_files):
            raise Exception("--wav_files {} dir not found".format(wav_files))
        file_list = os.listdir(wav_files)
        file_list.sort()
        for filename in file_list:
            ext = os.path.splitext(filename)[1]
            if ext != ".wav":
                print("Skipping non-wav file: ", filename)
            else:
                reader = wav_reader.WavReader(sample_rate, CHANNELS,
                                              auto_scale)
                path = os.path.join(wav_files, filename)
                print("opening ", path)
                reader.open(path, transform.input_size, the_speaker)
                result = get_prediction(reader, transform, predictor,
                                        categories)
                results += [result]
                if reset:
                    predictor.reset()
    else:
        reader = microphone.Microphone(True, True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")
        result = get_prediction(reader, transform, predictor, categories)
        results += [result]

    return results
Exemplo n.º 3
0
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate,
                         output_speaker=False):
    predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model, predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    # set up inputs and outputs
    if wav_file:
        the_speaker = None
        if output_speaker:
            the_speaker = speaker.Speaker()
        reader = wav_reader.WavReader(sample_rate, CHANNELS)
        reader.open(wav_file, transform.input_size, the_speaker)
    else:
        reader = microphone.Microphone(True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")

    transform.open(reader)
    results = None
    try:
        while True:
            feature_data = transform.read()
            if feature_data is None:
                break
            else:
                prediction, probability, label = predictor.predict(feature_data)
                if probability is not None:
                    if not results or results[1] < probability:
                        results = (prediction, probability, label)
                    percent = int(100 * probability)
                    print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label))

    except KeyboardInterrupt:
        pass

    transform.close()

    average_time = predictor.avg_time() + transform.avg_time()
    print("Average processing time: {}".format(average_time))
    if results is None:
        raise Exception("test_keyword_spotter failed to find any predictions!")
    return tuple(list(results) + [average_time])
Exemplo n.º 4
0
    def RunTest(self, featurizer_model, classifier_model, list_file, dataset,
                categories, sample_rate, ignore_label):

        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               [ignore_label], THRESHOLD,
                                               SMOOTHING)
        transform = featurizer.AudioTransform(featurizer_model,
                                              predictor.input_size)

        print("Evaluation with transform input size {}, output size {}".format(
            transform.input_size, transform.output_size))
        print(
            "Evaluation with classifier input size {}, output size {}".format(
                predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            start = time.time()

            for name in testlist:
                # bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, "audio", name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction = self.get_prediction(transform, predictor)
                self.process_prediction(prediction, expected)

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                prediction = self.get_prediction(reader, predictor)
                self.process_prediction(prediction, expected)
                index += 1

        end = time.time()
        seconds = end - start

        print("Test completed in {:.2f} seconds".format(seconds))
        print("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        return self.rate
Exemplo n.º 5
0
parser = argparse.ArgumentParser("test the classifier and featurizer against mic or wav file input")
parser.add_argument("--wav_file", help="optional path to wav file to test", default=None)
parser.add_argument("--featurizer", "-f", required=True,
                    help="specify path to featurizer model (*.ell or compiled_folder/model_name)")
parser.add_argument("--classifier", "-c", required=True,
                    help="specify path to classifier model (*.ell or compiled_folder/model_name)")
parser.add_argument("--categories", "-cat", help="specify path to categories file", required=True)
parser.add_argument("--sample_rate", "-s", default=SAMPLE_RATE, type=int,
                    help="Audio sample rate expected by classifier")
parser.add_argument("--threshold", "-t", help="Classifier threshold (default 0.6)", default=THRESHOLD, type=float)
parser.add_argument("--speaker", help="Output audio to the speaker.", action='store_true')

args = parser.parse_args()

predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING)
transform = featurizer.AudioTransform(args.featurizer, predictor.input_size)

if transform.using_map != predictor.using_map:
    raise Exception("cannot mix .ell and compiled models")

# set up inputs and outputs
if args.wav_file:
    output_speaker = None
    if args.speaker:
        output_speaker = speaker.Speaker()
    reader = wav_reader.WavReader(args.sample_rate, CHANNELS)
    reader.open(args.wav_file, transform.input_size, output_speaker)
else:
    reader = microphone.Microphone(True)
    reader.open(transform.input_size, args.sample_rate, CHANNELS)
Exemplo n.º 6
0
    def run_test(self,
                 featurizer_model,
                 classifier_model,
                 list_file,
                 max_tests,
                 dataset,
                 categories,
                 sample_rate,
                 auto_scale,
                 output_file,
                 algorithm="max",
                 window_size=0):
        """
        Run the test using the given input models (featurizer and classifier) which may or may not be compiled.
        The test set is defined by a list_file or a dataset.  The list file lists .wav files which we will featurize
        using the given featurizer.  The dataset contains pre-featurized data as created by make_dataset.py.
        The categories define the names of the keywords detected by the classifier and the sample_rate defines the
        audio sample rate in Hertz -- all input audio is resampled at this rate before featurization.
        """
        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               THRESHOLD, SMOOTHING)
        if window_size == 0:
            window_size = predictor.input_size
        transform = featurizer.AudioTransform(featurizer_model, window_size)

        if not self.silent:
            self.logger.info(
                "Evaluation with transform input size {}, output size {}".
                format(transform.input_size, transform.output_size))
            self.logger.info(
                "Evaluation with classifier input size {}, output size {}".
                format(predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        results = []

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            if max_tests:
                testlist = np.random.choice(testlist, max_tests, replace=False)

            start = time.time()

            for name in testlist:
                # e.g. bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate, 1, auto_scale)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, transform, predictor, algorithm)
                self.process_prediction(name, prediction, expected, confidence)
                results += [prediction]
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                name = "row " + str(index)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, reader, predictor)
                self.process_prediction(name, prediction, expected, confidence)
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed
                index += 1
        else:
            raise Exception("Missing list_file and dataset arguments")

        end = time.time()
        seconds = end - start

        self.logger.info("Saving '{}'".format(output_file))
        with open(output_file, "w") as f:
            json.dump(results, f)

        self.logger.info("Test completed in {:.2f} seconds".format(seconds))
        self.logger.info("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        self.logger.info("Best prediction time was {} seconds".format(
            self.best_time))
        return self.rate, self.best_time