Пример #1
0
    def start_playing(self, filename):
        """ Play a wav file, and classify the audio. Note we use a background thread to read the
        wav file and we setup a UI animation function to draw the sliding spectrogram image, this way
        the UI update doesn't interfere with the smoothness of the audio playback """
        if self.speaker is None:
            self.speaker = speaker.Speaker()

        self.stop()
        self.reading_input = False
        self.wav_file = wav_reader.WavReader(self.sample_rate, self.channels,
                                             self.auto_scale)
        self.wav_file.open(filename, self.featurizer.input_size, self.speaker)

        def update_func(frame_index):
            return self.on_ui_update()

        if self.animation:
            self.animation.event_source.stop()
        self.reading_input = True

        # Start animation timer for updating the UI (e.g. spectrogram image)
        self.animation = self.spectrogram_widget.begin_animation(update_func)

        # start background thread to read and classify the audio.
        self.featurizer.open(self.wav_file)
        self.read_input_thread = Thread(target=self.on_read_features, args=())
        self.read_input_thread.daemon = True
        self.read_input_thread.start()
Пример #2
0
    def start_playing(self, filename):
        """ Play a wav file, and classify the audio. Note we use a background thread to read the
        wav file and we setup a UI animation function to draw the sliding spectrogram image, this way
        the UI update doesn't interfere with the smoothness of the audio playback """
        if self.speaker is None:
            self.speaker = speaker.Speaker()
    
        self.stop()
        self.reading_input = False
        self.wav_file = wav_reader.WavReader(self.sample_rate, self.channels)
        self.wav_file.open(filename, self.featurizer.input_size, self.speaker)

        def update_func(frame_index):
            self.process_output()
            if not self.reading_input:
                self.after(1, self.on_stopped)
            self.set_spectrogram_image()
            return (self.spectrogram_image,)

        if self.animation:
            self.animation.event_source.stop()
        self.reading_input = True       

        # Start animation timer for updating the UI (e.g. spectrogram image) (30 fps is usually fine) 
        self.animation = animation.FuncAnimation(self.features_figure, update_func, interval=33, blit=True) 

        # start background thread to read and classify the audio.
        self.featurizer.open(self.wav_file)
        self.read_input_thread = Thread(target=self.on_read_features, args=())
        self.read_input_thread.daemon = True
        self.read_input_thread.start()        
Пример #3
0
def get_wav_features(input_filename, transform, sample_rate, window_size, shift):
    """
    Transform the given .wav input file into a set of features given the required sample rate
    window size and shift.  The window size is the number of features we need to give the 
    classifier and the shift is the amount by which that window slides as new transformed
    features are added.
    """
    transform_input_size = transform.input_size
    transform_output_size = transform.output_size
    channels = 1  # we only do mono audio right now...
    source = wav_reader.WavReader(sample_rate, channels)
    source.open(input_filename, transform_input_size)
    # apply the featurizing transform 
    transform.open(source)
    
    source = lazy_apply_transform(transform)

    # and apply the classifier window frame size
    source = sliding_window_frame(source, window_size, shift)
    
    rows_generated = 0
    for row in source:
        features = np.ravel(row)
        rows_generated += 1
        yield features
Пример #4
0
def play_sound(wavfile):
    import speaker
    import wav_reader
    reader = wav_reader.WavReader()
    reader.open(wavfile, 512, speaker.Speaker())
    while True:
        buffer = reader.read()
        if buffer is None:
            break
Пример #5
0
 def open_noise(self):
     self.mix = True
     self.count = 1
     if self.noise_reader is None:
         buffer_size = self.wav_reader.buffer_size
         self.noise_reader = wav_reader.WavReader(self.requested_rate, self.requested_channels)
         self.noise_reader.open(self.noise_files[self.noise_index], buffer_size)
         self.noise_index += 1
         if self.noise_index == len(self.noise_files):
             self.noise_index = 0
Пример #6
0
def test_keyword_spotter(featurizer_model,
                         classifier_model,
                         categories,
                         wav_files,
                         threshold,
                         sample_rate,
                         output_speaker=False,
                         auto_scale=False,
                         reset=False):

    predictor = classifier.AudioClassifier(classifier_model, categories,
                                           threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model,
                                          predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    the_speaker = None
    if output_speaker:
        the_speaker = speaker.Speaker()

    results = []
    if wav_files:
        if not os.path.isdir(wav_files):
            raise Exception("--wav_files {} dir not found".format(wav_files))
        file_list = os.listdir(wav_files)
        file_list.sort()
        for filename in file_list:
            ext = os.path.splitext(filename)[1]
            if ext != ".wav":
                print("Skipping non-wav file: ", filename)
            else:
                reader = wav_reader.WavReader(sample_rate, CHANNELS,
                                              auto_scale)
                path = os.path.join(wav_files, filename)
                print("opening ", path)
                reader.open(path, transform.input_size, the_speaker)
                result = get_prediction(reader, transform, predictor,
                                        categories)
                results += [result]
                if reset:
                    predictor.reset()
    else:
        reader = microphone.Microphone(True, True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")
        result = get_prediction(reader, transform, predictor, categories)
        results += [result]

    return results
Пример #7
0
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate,
                         output_speaker=False):
    predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING)
    transform = featurizer.AudioTransform(featurizer_model, predictor.input_size)

    if transform.using_map != predictor.using_map:
        raise Exception("cannot mix .ell and compiled models")

    # set up inputs and outputs
    if wav_file:
        the_speaker = None
        if output_speaker:
            the_speaker = speaker.Speaker()
        reader = wav_reader.WavReader(sample_rate, CHANNELS)
        reader.open(wav_file, transform.input_size, the_speaker)
    else:
        reader = microphone.Microphone(True)
        reader.open(transform.input_size, sample_rate, CHANNELS)
        print("Please type 'x' and enter to terminate this app...")

    transform.open(reader)
    results = None
    try:
        while True:
            feature_data = transform.read()
            if feature_data is None:
                break
            else:
                prediction, probability, label = predictor.predict(feature_data)
                if probability is not None:
                    if not results or results[1] < probability:
                        results = (prediction, probability, label)
                    percent = int(100 * probability)
                    print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label))

    except KeyboardInterrupt:
        pass

    transform.close()

    average_time = predictor.avg_time() + transform.avg_time()
    print("Average processing time: {}".format(average_time))
    if results is None:
        raise Exception("test_keyword_spotter failed to find any predictions!")
    return tuple(list(results) + [average_time])
Пример #8
0
def get_wav_features(input_filename, transform, sample_rate, window_size,
                     shift, auto_scale, mixer):
    """
    Transform the given .wav input file into a set of features given the required sample rate
    window size and shift.  The window size is the number of features we need to give the
    classifier and the shift is the amount by which that window slides as new transformed
    features are added.
    """
    transform_input_size = transform.input_size
    channels = 1  # we only do mono audio right now...
    source = wav_reader.WavReader(sample_rate, channels, auto_scale)
    source.open(input_filename, transform_input_size)

    if mixer:
        mixer.open(source)
        source = mixer

    # apply the featurizing transform
    transform.open(source)

    source = lazy_apply_transform(transform)

    # and apply the classifier window frame size
    try:
        source = sliding_window_frame(transform, source, window_size, shift,
                                      mixer)

        rows_generated = 0
        for row in source:
            features = np.ravel(row)
            rows_generated += 1
            yield features
    except Exception as e:
        print("### error transforming input file {}: {}".format(
            input_filename, e))

    if rows_generated == 0:
        print(
            "### no rows generated for input file: {}".format(input_filename))
Пример #9
0
    def RunTest(self, featurizer_model, classifier_model, list_file, dataset,
                categories, sample_rate, ignore_label):

        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               [ignore_label], THRESHOLD,
                                               SMOOTHING)
        transform = featurizer.AudioTransform(featurizer_model,
                                              predictor.input_size)

        print("Evaluation with transform input size {}, output size {}".format(
            transform.input_size, transform.output_size))
        print(
            "Evaluation with classifier input size {}, output size {}".format(
                predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            start = time.time()

            for name in testlist:
                # bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, "audio", name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction = self.get_prediction(transform, predictor)
                self.process_prediction(prediction, expected)

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                prediction = self.get_prediction(reader, predictor)
                self.process_prediction(prediction, expected)
                index += 1

        end = time.time()
        seconds = end - start

        print("Test completed in {:.2f} seconds".format(seconds))
        print("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        return self.rate
Пример #10
0
    def is_closed(self):
        return self.wav_reader1 is None


if __name__ == "__main__":
    parser = argparse.ArgumentParser("Test the AudioNoiseMixer class")
    parser.add_argument("--wav_file", "-w", help=".wav file to process")
    parser.add_argument("--noise_dir", "-n", help="directory of .wav files containing noise")
    parser.add_argument("--mix_ratio", "-r", type=float, default=0.1, help="how much noise to add")
    args = parser.parse_args()

    noise_files = []
    noise_dir = args.noise_dir
    for f in os.listdir(noise_dir):
        if os.path.splitext(f)[1] == ".wav":
            noise_files += [os.path.join(noise_dir, f)]

    speaker = speaker.Speaker()
    mixer = AudioNoiseMixer(noise_files, mix_ratio=args.mix_ratio, mix_percent=1)

    reader = wav_reader.WavReader(16000, 1)
    reader.open(args.wav_file, 512)
    mixer.open(reader, speaker)

    while True:
        data = mixer.read()
        if data is None:
            break

    print("finished")
Пример #11
0
                        help="Audio channels to use",
                        default=1,
                        type=int)
arg_parser.add_argument("--buffer_size",
                        help="Read buffer size",
                        default=512,
                        type=int)
arg_parser.add_argument("--code",
                        help="Output c-code for sample data",
                        action="store_true")

args = arg_parser.parse_args()

# First tell the WavReader what sample rate and channels we want the audio converted to
reader = wav_reader.WavReader(args.sample_rate,
                              args.channels,
                              auto_scale=False)

# Create a speaker object which we will give to the WavReader.  The WavReader will pass
# the re-sampled audio to the Speaker so you can hear what it sounds like
speaker = speaker.Speaker()

# open the reader asking for size chunks of audio, converted to floating point between -1 and 1.
reader.open(args.filename, args.buffer_size, speaker)

code = args.code
# pump the reader until it returns None.  In a real app you would assign the results of read() to
# a variable so you can process the audio chunks returned.
while True:
    buffer = reader.read()
    if buffer is None:
Пример #12
0
parser.add_argument("--speaker", help="Output audio to the speaker.", action='store_true')

args = parser.parse_args()

predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING)
transform = featurizer.AudioTransform(args.featurizer, predictor.input_size)

if transform.using_map != predictor.using_map:
    raise Exception("cannot mix .ell and compiled models")

# set up inputs and outputs
if args.wav_file:
    output_speaker = None
    if args.speaker:
        output_speaker = speaker.Speaker()
    reader = wav_reader.WavReader(args.sample_rate, CHANNELS)
    reader.open(args.wav_file, transform.input_size, output_speaker)
else:
    reader = microphone.Microphone(True)
    reader.open(transform.input_size, args.sample_rate, CHANNELS)
    print("Please type 'x' and enter to terminate this app...")

transform.open(reader)

try:
    while True:
        feature_data = transform.read()
        if feature_data is None:
            break
        else:
            prediction, probability, label = predictor.predict(feature_data)
Пример #13
0
    def run_test(self,
                 featurizer_model,
                 classifier_model,
                 list_file,
                 max_tests,
                 dataset,
                 categories,
                 sample_rate,
                 auto_scale,
                 output_file,
                 algorithm="max",
                 window_size=0):
        """
        Run the test using the given input models (featurizer and classifier) which may or may not be compiled.
        The test set is defined by a list_file or a dataset.  The list file lists .wav files which we will featurize
        using the given featurizer.  The dataset contains pre-featurized data as created by make_dataset.py.
        The categories define the names of the keywords detected by the classifier and the sample_rate defines the
        audio sample rate in Hertz -- all input audio is resampled at this rate before featurization.
        """
        predictor = classifier.AudioClassifier(classifier_model, categories,
                                               THRESHOLD, SMOOTHING)
        if window_size == 0:
            window_size = predictor.input_size
        transform = featurizer.AudioTransform(featurizer_model, window_size)

        if not self.silent:
            self.logger.info(
                "Evaluation with transform input size {}, output size {}".
                format(transform.input_size, transform.output_size))
            self.logger.info(
                "Evaluation with classifier input size {}, output size {}".
                format(predictor.input_size, predictor.output_size))

        if transform.using_map != predictor.using_map:
            raise Exception("cannot mix .ell and compiled models")

        results = []

        if list_file:
            with open(list_file, "r") as fp:
                testlist = [e.strip() for e in fp.readlines()]

            wav_dir = os.path.dirname(list_file)

            if max_tests:
                testlist = np.random.choice(testlist, max_tests, replace=False)

            start = time.time()

            for name in testlist:
                # e.g. bed/28497c5b_nohash_0.wav
                expected = name.split('/')[0]
                wav_file = os.path.join(wav_dir, name)
                # open the wav file.
                reader = wav_reader.WavReader(sample_rate, 1, auto_scale)
                reader.open(wav_file, transform.input_size, None)
                transform.open(reader)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, transform, predictor, algorithm)
                self.process_prediction(name, prediction, expected, confidence)
                results += [prediction]
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed

        elif dataset:
            if type(dataset) is str:
                ds = np.load(dataset)
                features = ds['features']
                labels = ds['labels']
            else:
                features = dataset.features
                labels = dataset.label_names

            index = 0

            start = time.time()

            for f in features:
                expected = labels[index]
                reader = FeatureReader(f, predictor.input_size)
                name = "row " + str(index)
                prediction, confidence, _, elapsed = self.get_prediction(
                    name, reader, predictor)
                self.process_prediction(name, prediction, expected, confidence)
                if self.best_time is None or elapsed < self.best_time:
                    self.best_time = elapsed
                index += 1
        else:
            raise Exception("Missing list_file and dataset arguments")

        end = time.time()
        seconds = end - start

        self.logger.info("Saving '{}'".format(output_file))
        with open(output_file, "w") as f:
            json.dump(results, f)

        self.logger.info("Test completed in {:.2f} seconds".format(seconds))
        self.logger.info("{} passed, {} failed, pass rate of {:.2f} %".format(
            self.passed, self.failed, self.rate * 100))
        self.logger.info("Best prediction time was {} seconds".format(
            self.best_time))
        return self.rate, self.best_time
Пример #14
0
arg_parser.add_argument("filename", help="wav file to play ")
arg_parser.add_argument("--sample_rate",
                        "-s",
                        help="Audio sample rate to use",
                        default=16000,
                        type=int)
arg_parser.add_argument("--channels",
                        "-c",
                        help="Audio channels to use",
                        default=1,
                        type=int)

args = arg_parser.parse_args()

# First tell the WavReader what sample rate and channels we want the audio converted to
reader = wav_reader.WavReader(args.sample_rate, args.channels)

# Create a speaker object which we will give to the WavReader.  The WavReader will pass
# the re-sampled audio to the Speaker so you can hear what it sounds like
speaker = speaker.Speaker()

# open the reader asking for 256 size chunks of audio, converted to floating point betweeo -1 and 1.
reader.open(args.filename, 256, speaker)

print("wav file contains sample rate {} and {} channels".format(
    reader.actual_rate, reader.actual_channels))

# pump the reader until it returns None.  In a real app you would assign the results of read() to
# a variable so you can process the audio chunks returned.
while reader.read() is not None:
    pass