Exemplo n.º 1
0
def featurize_wav(wav_file, label, featurizer_path, sample_rate, window_size, shift, dump):
    
    transform = featurizer.AudioTransform(featurizer_path, 0)
    input_size = transform.input_size
    output_shape = transform.model.output_shape
    output_size = output_shape.Size()
    feature_size = output_shape.Size()
    feature_shape = (output_shape.rows, output_shape.columns, output_shape.channels)
    
    wav_log = None
    if dump:
      wav_log = open(wav_file + ".txt", "w")
      wav_log.write("{}:\n".format(wav_file))
      transform.set_log(wav_log)

    features = list(make_dataset.get_wav_features(wav_file, transform, sample_rate, window_size, shift))    
    # reshape features to record the fact that the window_size is a kind of batch size for this type of model
    features = [np.reshape(f, (window_size, 1, 1, int(len(f)/window_size))) for f in features]
    labels = [label] * len(features)

    output_file = os.path.basename(wav_file)
    output_file = os.path.splitext(output_file)[0] + ".npz"
    
    parameters = (sample_rate, input_size, output_size, window_size, shift) # remember these settings in the dataset
    np.savez(output_file, features=features, labels=labels, parameters=parameters)
    
    if dump:
      with open(wav_file + ".features.txt", "w") as f:
        f.write("{}:\n".format(wav_file))
        for a in features:
            f.write("{}\n".format(", ".join([str(x) for x in a.ravel()])))

      wav_log.close()    
Exemplo n.º 2
0
    def load_featurizer_model(self, featurizer_path):
        """ load the given compiled ELL featurizer for use in processing subsequent audio input """
        if featurizer_path:
            self.featurizer = featurizer.AudioTransform(featurizer_path, 40)
            self.setup_spectrogram_image()

            self.show_output("Feature input size: {}, output size: {}".format(
                self.featurizer.input_size, self.featurizer.output_size))

        self.init_data()
Exemplo n.º 3
0
    def load_featurizer_model(self, featurizer_model):
        """ load the given compiled ELL featurizer for use in processing subsequent audio input """
        if featurizer_model:
            self.featurizer = featurizer.AudioTransform(featurizer_model, 40)
            self.setup_spectrogram_image()

            self.show_output("Feature input size: {}, output size: {}".format(
                self.featurizer.input_size, self.featurizer.output_size))
            if self.features_entry.get() != featurizer_model:
                self.features_entry.delete(0, END)
                self.features_entry.insert(0, featurizer_model)

        self.init_data()
Exemplo n.º 4
0
def test_keyword_spotter(featurizer_model,
                         classifier_model,
                         categories,
                         wav_files,
                         threshold,
                         sample_rate,
                         output_speaker=False,
                         auto_scale=False,
                         reset=False):

    predictor = classifier.AudioClassifier(classifier_model, categories,
                                           threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model,
                                          predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    the_speaker = None
    if output_speaker:
        the_speaker = speaker.Speaker()

    results = []
    if wav_files:
        if not os.path.isdir(wav_files):
            raise Exception("--wav_files {} dir not found".format(wav_files))
        file_list = os.listdir(wav_files)
        file_list.sort()
        for filename in file_list:
            ext = os.path.splitext(filename)[1]
            if ext != ".wav":
                print("Skipping non-wav file: ", filename)
            else:
                reader = wav_reader.WavReader(sample_rate, CHANNELS,
                                              auto_scale)
                path = os.path.join(wav_files, filename)
                print("opening ", path)
                reader.open(path, transform.input_size, the_speaker)
                result = get_prediction(reader, transform, predictor,
                                        categories)
                results += [result]
                if reset:
                    predictor.reset()
    else:
        reader = microphone.Microphone(True, True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")
        result = get_prediction(reader, transform, predictor, categories)
        results += [result]

    return results
Exemplo n.º 5
0
def make_dataset(list_file,
                 outdir,
                 categories_path,
                 featurizer_path,
                 sample_rate,
                 window_size,
                 shift,
                 auto_scale=True,
                 noise_path=None,
                 max_noise_ratio=0.1,
                 noise_selection=0.1,
                 use_cache=False):
    """
    Create a dataset given the input list file, a featurizer, the desired .wav sample rate,
    classifier window_size and window shift amount.  The dataset is saved to the same file name
    with .npz extension.  This will do nothing if dataset is already created, unless use_cache=False.
    """
    dataset_name = os.path.basename(list_file)
    dataset_path = os.path.splitext(dataset_name)[0] + ".npz"
    dataset_path = os.path.join(outdir, dataset_path)
    if use_cache and os.path.isfile(dataset_path):
        return

    transform = featurizer.AudioTransform(featurizer_path, 0)

    entry_map = parse_list_file(list_file)
    if not categories_path:
        categories_path = "categories.txt"
    if not os.path.isfile(categories_path):
        raise Exception("{} file not found".format(categories_path))

    categories = [x.strip() for x in open(categories_path, 'r').readlines()]

    mixer = None
    if noise_path:
        noise_files = [
            os.path.join(noise_path, f) for f in os.listdir(noise_path)
            if os.path.splitext(f)[1] == ".wav"
        ]
        mixer = noise_mixer.AudioNoiseMixer(noise_files, max_noise_ratio,
                                            noise_selection)

    dataset = _get_dataset(entry_map, categories, transform, sample_rate,
                           window_size, shift, auto_scale, mixer)
    if len(dataset.features) == 0:
        print("No features found in list file")

    print("Saving: {}".format(dataset_path))
    dataset.save(dataset_path)
Exemplo n.º 6
0
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate,
                         output_speaker=False):
    predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model, predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    # set up inputs and outputs
    if wav_file:
        the_speaker = None
        if output_speaker:
            the_speaker = speaker.Speaker()
        reader = wav_reader.WavReader(sample_rate, CHANNELS)
        reader.open(wav_file, transform.input_size, the_speaker)
    else:
        reader = microphone.Microphone(True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")

    transform.open(reader)
    results = None
    try:
        while True:
            feature_data = transform.read()
            if feature_data is None:
                break
            else:
                prediction, probability, label = predictor.predict(feature_data)
                if probability is not None:
                    if not results or results[1] < probability:
                        results = (prediction, probability, label)
                    percent = int(100 * probability)
                    print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label))

    except KeyboardInterrupt:
        pass

    transform.close()

    average_time = predictor.avg_time() + transform.avg_time()
    print("Average processing time: {}".format(average_time))
    if results is None:
        raise Exception("test_keyword_spotter failed to find any predictions!")
    return tuple(list(results) + [average_time])
Exemplo n.º 7
0
def make_dataset(list_file, featurizer_path, sample_rate, window_size, shift):

    """
    Create a dataset given the input list file, a featurizer, the desired .wav sample rate,
    classifier window_size and window shift amount.  The dataset is saved to the same file name
    with .npz extension.
    """
    transform = featurizer.AudioTransform(featurizer_path, 0)

    entry_map = parse_list_file(list_file)

    dataset = _get_dataset(entry_map, transform, sample_rate, window_size, shift)
    if len(dataset.features) == 0:
        print("No features found in list file")

    dataset_name = os.path.basename(list_file)
    dataset_path = os.path.splitext(dataset_name)[0] + ".npz"
    print("Saving: {}".format(dataset_path))
    dataset.save(dataset_path)
Exemplo n.º 8
0
def multiprocess_data(file, e, featurizer_path, sample_rate, window_size,
                      shift, auto_scale, noise_path, max_noise_ratio,
                      noise_selection, label):
    data_rows = []
    label_rows = []
    transform = featurizer.AudioTransform(featurizer_path, 0)
    mixer = None
    if noise_path:
        noise_files = [
            os.path.join(noise_path, f) for f in os.listdir(noise_path)
            if os.path.splitext(f)[1] == ".wav"
        ]
        mixer = noise_mixer.AudioNoiseMixer(noise_files, max_noise_ratio,
                                            noise_selection)
    full_path = os.path.join(e, file)
    file_features = list(
        get_wav_features(full_path, transform, sample_rate, window_size, shift,
                         auto_scale, mixer))
    if len(file_features) > 0:
        labels = [label for r in file_features]
        data_rows.append(file_features)
        label_rows.append(labels)
    return data_rows, label_rows
Exemplo n.º 9
0
    def RunTest(self, featurizer_model, classifier_model, list_file, dataset,
                categories, sample_rate, ignore_label):

        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               [ignore_label], THRESHOLD,
                                               SMOOTHING)
        transform = featurizer.AudioTransform(featurizer_model,
                                              predictor.input_size)

        print("Evaluation with transform input size {}, output size {}".format(
            transform.input_size, transform.output_size))
        print(
            "Evaluation with classifier input size {}, output size {}".format(
                predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            start = time.time()

            for name in testlist:
                # bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, "audio", name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction = self.get_prediction(transform, predictor)
                self.process_prediction(prediction, expected)

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                prediction = self.get_prediction(reader, predictor)
                self.process_prediction(prediction, expected)
                index += 1

        end = time.time()
        seconds = end - start

        print("Test completed in {:.2f} seconds".format(seconds))
        print("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        return self.rate
Exemplo n.º 10
0
parser = argparse.ArgumentParser("test the classifier and featurizer against mic or wav file input")
parser.add_argument("--wav_file", help="optional path to wav file to test", default=None)
parser.add_argument("--featurizer", "-f", required=True,
                    help="specify path to featurizer model (*.ell or compiled_folder/model_name)")
parser.add_argument("--classifier", "-c", required=True,
                    help="specify path to classifier model (*.ell or compiled_folder/model_name)")
parser.add_argument("--categories", "-cat", help="specify path to categories file", required=True)
parser.add_argument("--sample_rate", "-s", default=SAMPLE_RATE, type=int,
                    help="Audio sample rate expected by classifier")
parser.add_argument("--threshold", "-t", help="Classifier threshold (default 0.6)", default=THRESHOLD, type=float)
parser.add_argument("--speaker", help="Output audio to the speaker.", action='store_true')

args = parser.parse_args()

predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING)
transform = featurizer.AudioTransform(args.featurizer, predictor.input_size)

if transform.using_map != predictor.using_map:
    raise Exception("cannot mix .ell and compiled models")

# set up inputs and outputs
if args.wav_file:
    output_speaker = None
    if args.speaker:
        output_speaker = speaker.Speaker()
    reader = wav_reader.WavReader(args.sample_rate, CHANNELS)
    reader.open(args.wav_file, transform.input_size, output_speaker)
else:
    reader = microphone.Microphone(True)
    reader.open(transform.input_size, args.sample_rate, CHANNELS)
    print("Please type 'x' and enter to terminate this app...")
Exemplo n.º 11
0
    def run_test(self,
                 featurizer_model,
                 classifier_model,
                 list_file,
                 max_tests,
                 dataset,
                 categories,
                 sample_rate,
                 auto_scale,
                 output_file,
                 algorithm="max",
                 window_size=0):
        """
        Run the test using the given input models (featurizer and classifier) which may or may not be compiled.
        The test set is defined by a list_file or a dataset.  The list file lists .wav files which we will featurize
        using the given featurizer.  The dataset contains pre-featurized data as created by make_dataset.py.
        The categories define the names of the keywords detected by the classifier and the sample_rate defines the
        audio sample rate in Hertz -- all input audio is resampled at this rate before featurization.
        """
        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               THRESHOLD, SMOOTHING)
        if window_size == 0:
            window_size = predictor.input_size
        transform = featurizer.AudioTransform(featurizer_model, window_size)

        if not self.silent:
            self.logger.info(
                "Evaluation with transform input size {}, output size {}".
                format(transform.input_size, transform.output_size))
            self.logger.info(
                "Evaluation with classifier input size {}, output size {}".
                format(predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        results = []

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            if max_tests:
                testlist = np.random.choice(testlist, max_tests, replace=False)

            start = time.time()

            for name in testlist:
                # e.g. bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate, 1, auto_scale)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, transform, predictor, algorithm)
                self.process_prediction(name, prediction, expected, confidence)
                results += [prediction]
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                name = "row " + str(index)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, reader, predictor)
                self.process_prediction(name, prediction, expected, confidence)
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed
                index += 1
        else:
            raise Exception("Missing list_file and dataset arguments")

        end = time.time()
        seconds = end - start

        self.logger.info("Saving '{}'".format(output_file))
        with open(output_file, "w") as f:
            json.dump(results, f)

        self.logger.info("Test completed in {:.2f} seconds".format(seconds))
        self.logger.info("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        self.logger.info("Best prediction time was {} seconds".format(
            self.best_time))
        return self.rate, self.best_time
Exemplo n.º 12
0
def _get_dataset(entry_map,
                 categories,
                 featurizer_path,
                 window_size,
                 shift,
                 noise_path,
                 multicore=False,
                 max_noise_ratio=0.1,
                 noise_selection=0.1):
    data_rows = []
    label_rows = []
    transform = featurizer.AudioTransform(featurizer_path, 0)
    sample_rate = transform.get_metadata("sample_rate")
    if not sample_rate:
        raise Exception("Featurizer is missing 'sample_rate' metadata")
    sample_rate = int(sample_rate)
    auto_scale = transform.get_metadata("auto_scale")
    if auto_scale:
        auto_scale = bool(auto_scale)
        if auto_scale:
            print(
                "Featurizer requires auto-scaling of audio input to float range [-1, 1]"
            )
    else:
        auto_scale = False
    mixer = None
    if noise_path:
        noise_files = [
            os.path.join(noise_path, f) for f in os.listdir(noise_path)
            if os.path.splitext(f)[1] == ".wav"
        ]
        mixer = noise_mixer.AudioNoiseMixer(noise_files, max_noise_ratio,
                                            noise_selection)
    for e in entry_map:
        label = os.path.basename(e)
        if label not in categories:
            raise Exception(
                "label {} not found in categories file".format(label))
        print("Transforming {} files from {} ... ".format(
            len(entry_map[e]), label),
              end='',
              flush=True)
        total = 0
        if multicore:
            p = multiprocessing.Pool(multiprocessing.cpu_count())
            temp_partial = partial(multiprocess_data,
                                   e=e,
                                   featurizer_path=featurizer_path,
                                   sample_rate=sample_rate,
                                   window_size=window_size,
                                   shift=shift,
                                   auto_scale=auto_scale,
                                   noise_path=noise_path,
                                   max_noise_ratio=max_noise_ratio,
                                   noise_selection=noise_selection,
                                   label=label)
            temp_file = p.map(temp_partial, entry_map[e])
            for temp_list in temp_file:
                temp1, temp2 = temp_list
                for x in temp1:
                    data_rows.append(x)
                for y in temp2:
                    label_rows.append(y)
            total += len(label_rows)
            print(" found {} rows".format(total))
            p.close()
            p.join()
        else:
            for file in entry_map[e]:
                full_path = os.path.join(e, file)
                file_features = list(
                    get_wav_features(full_path, transform, sample_rate,
                                     window_size, shift, auto_scale, mixer))
                if len(file_features) > 0:
                    labels = [label for r in file_features]
                    data_rows.append(file_features)
                    label_rows.append(labels)
                total += len(file_features)

            print(" found {} rows".format(total))

    features = np.concatenate(data_rows, axis=0)
    label_names = np.concatenate(label_rows, axis=0)
    # remember these settings in the dataset
    parameters = (sample_rate, transform.input_size, transform.output_size,
                  window_size, shift)
    return Dataset(features, label_names, categories, parameters)