Пример #1
0
def scan(filepath):
    # sox package has an info() but it should not be used:
    #  it's just a wrapper that calls a set of functions
    #  and returns them in a dict
    # ...and the functions are the wrong ones :)

    info_dictionary = {
        'file_type': file_info.file_type(filepath),
        'sample_rate': round(file_info.sample_rate(filepath)),
        'channels': file_info.channels(filepath),
        'duration': file_info.duration(filepath),
        #        'bit_rate': file_info.bitrate(filepath),
        'encoding': file_info.encoding(filepath),
    }

    # bitrate is currently broken (1.37) but we can fake it
    info_dictionary['bit_rate'] = round(
        getsize(filepath) / info_dictionary['duration'] * 8)

    # get comments too
    comments = file_info.comments(filepath).splitlines()
    if comments:
        info_dictionary['tags'] = {}
        for comment in comments:
            key, value = comment.strip().split('=', 1)
            info_dictionary['tags'][key.lower()] = value

    return info_dictionary
Пример #2
0
    def read(self, audio_metadata):
        """Read an audio file.

        :param audio_metadata: metadata info of an audio
        :return: raw audio data as float32 array and duration in seconds.
        """
        fd = temp_path = None
        # Convert it to a wav file.
        if not audio_metadata.path.endswith('.wav'):
            original_sample_rate = file_info.sample_rate(audio_metadata.path)
            assert self._sample_rate <= original_sample_rate
            transformer = Transformer()
            transformer.convert(samplerate=self._sample_rate,
                                n_channels=self._channels,
                                bitdepth=self._bits_per_sample)
            fd, temp_path = tempfile.mkstemp(suffix='.wav')
            transformer.build(audio_metadata.path, temp_path)

        if temp_path:
            path = temp_path
        else:
            path = audio_metadata.path

        # Read the audio file.
        with SoundFile(path) as soundfile:
            # make sure the audio properties are as expected.
            assert soundfile.samplerate == self._sample_rate
            assert soundfile.channels == self._channels
            duration_sec = len(soundfile) / self._sample_rate
            pcm = soundfile.read(dtype='float32')

            # Add 0.5 second silence to the end of files containing keyword as in occasionally the user stopped
            # recording right after uttering the keyword. If the detector needs some time after seeing the keyword to
            # make a decision (e.g. endpointing) this is going to artificially increase the miss rates.
            if audio_metadata.is_keyword:
                pcm = np.append(pcm, np.zeros(self._sample_rate // 2))

            if temp_path:
                os.close(fd)
                os.remove(temp_path)

            return pcm, duration_sec
Пример #3
0
 def test_empty(self):
     actual = file_info.sample_rate(EMPTY_FILE)
     expected = 44100
     self.assertEqual(expected, actual)
Пример #4
0
 def test_aiff(self):
     actual = file_info.sample_rate(INPUT_FILE2)
     expected = 8000
     self.assertEqual(expected, actual)
Пример #5
0
 def test_wav(self):
     actual = file_info.sample_rate(INPUT_FILE)
     expected = 44100
     self.assertEqual(expected, actual)
Пример #6
0
 def test_empty(self):
     actual = file_info.sample_rate(EMPTY_FILE)
     expected = 44100
     self.assertEqual(expected, actual)
Пример #7
0
 def test_aiff(self):
     actual = file_info.sample_rate(INPUT_FILE2)
     expected = 8000
     self.assertEqual(expected, actual)
Пример #8
0
 def test_wav(self):
     actual = file_info.sample_rate(INPUT_FILE)
     expected = 44100
     self.assertEqual(expected, actual)