def scan(filepath): # sox package has an info() but it should not be used: # it's just a wrapper that calls a set of functions # and returns them in a dict # ...and the functions are the wrong ones :) info_dictionary = { 'file_type': file_info.file_type(filepath), 'sample_rate': round(file_info.sample_rate(filepath)), 'channels': file_info.channels(filepath), 'duration': file_info.duration(filepath), # 'bit_rate': file_info.bitrate(filepath), 'encoding': file_info.encoding(filepath), } # bitrate is currently broken (1.37) but we can fake it info_dictionary['bit_rate'] = round( getsize(filepath) / info_dictionary['duration'] * 8) # get comments too comments = file_info.comments(filepath).splitlines() if comments: info_dictionary['tags'] = {} for comment in comments: key, value = comment.strip().split('=', 1) info_dictionary['tags'][key.lower()] = value return info_dictionary
def read(self, audio_metadata): """Read an audio file. :param audio_metadata: metadata info of an audio :return: raw audio data as float32 array and duration in seconds. """ fd = temp_path = None # Convert it to a wav file. if not audio_metadata.path.endswith('.wav'): original_sample_rate = file_info.sample_rate(audio_metadata.path) assert self._sample_rate <= original_sample_rate transformer = Transformer() transformer.convert(samplerate=self._sample_rate, n_channels=self._channels, bitdepth=self._bits_per_sample) fd, temp_path = tempfile.mkstemp(suffix='.wav') transformer.build(audio_metadata.path, temp_path) if temp_path: path = temp_path else: path = audio_metadata.path # Read the audio file. with SoundFile(path) as soundfile: # make sure the audio properties are as expected. assert soundfile.samplerate == self._sample_rate assert soundfile.channels == self._channels duration_sec = len(soundfile) / self._sample_rate pcm = soundfile.read(dtype='float32') # Add 0.5 second silence to the end of files containing keyword as in occasionally the user stopped # recording right after uttering the keyword. If the detector needs some time after seeing the keyword to # make a decision (e.g. endpointing) this is going to artificially increase the miss rates. if audio_metadata.is_keyword: pcm = np.append(pcm, np.zeros(self._sample_rate // 2)) if temp_path: os.close(fd) os.remove(temp_path) return pcm, duration_sec
def test_empty(self): actual = file_info.sample_rate(EMPTY_FILE) expected = 44100 self.assertEqual(expected, actual)
def test_aiff(self): actual = file_info.sample_rate(INPUT_FILE2) expected = 8000 self.assertEqual(expected, actual)
def test_wav(self): actual = file_info.sample_rate(INPUT_FILE) expected = 44100 self.assertEqual(expected, actual)