def _get_track_metadata(path):
    # use mono at 44kHz as reference. For any other settings data won't be perfectly
    # normalized but it should be good enough.
    audio = AudioFile(path)
    mix = audio.read(streams=0, channels=1, samplerate=44100)
    return {
        "duration": audio.duration,
        "std": mix.std().item(),
        "mean": mix.mean().item()
    }
Exemplo n.º 2
0
def test_recognize(key, filename):
    """
    Microsoft Bing Speech APIを使って音声認識するテスト用関数
    """
    from audio import AudioData, AudioFile
    af = AudioFile(filename)
    af.__enter__()
    ad = AudioData(
        open(filename, 'rb').read(), af.SAMPLE_RATE, af.SAMPLE_WIDTH)
    bs = Bing()
    r = bs.recognize(ad, key=key, show_all=True)
    print(r)
Exemplo n.º 3
0
def test_recognize(key, filename):
    """
    Google Cloud Speech APIを使って音声認識するテスト用関数
    """
    from audio import AudioData, AudioFile
    af = AudioFile(filename)
    af.__enter__()
    ad = AudioData(
        open(filename, 'rb').read(), af.SAMPLE_RATE, af.SAMPLE_WIDTH)
    gs = Google()
    r = gs.recognize(ad, key=key, show_all=True)
    print(r)
Exemplo n.º 4
0
 def __getitem__(self, index):
     track = self.mus.tracks[index]
     return (track.name,
             AudioFile(track.path).read(channels=self.channels,
                                        seek_time=0,
                                        streams=self.streams,
                                        samplerate=self.samplerate))
Exemplo n.º 5
0
 def censor(self):
     """ Creates a clean/new version of a file by removing explicits """
     audio_file = AudioFile(self.file_path)
     # Define the CLI progress bar
     p_bar, p_bar_step = self.__progress_bar(audio_file.normal_chunks)
     async_iter = zip(repeat(p_bar), repeat(p_bar_step),
                      audio_file.normal_chunks)
     # Censor each audio chunk file asynchronously
     censored_chunks = ThreadPool(6).map(self.__censor_chunk, async_iter)
     clean_file = self.__create_clean_segment(censored_chunks)
     p_bar.close()
     self.create_clean_file(clean_file)
 def __getitem__(self, index):
     for meta in self.metadata:
         examples = self._examples_count(meta)
         if index >= examples:
             index -= examples
             continue
         streams = AudioFile(meta["path"]).read(seek_time=index *
                                                self.stride,
                                                duration=self.duration,
                                                channels=self.channels,
                                                samplerate=self.samplerate)
         return (streams - meta["mean"]) / meta["std"]
Exemplo n.º 7
0
def transcribe_file(audio_path):
    # split_audio_file parameter variable settings
    batchsize = 128
    # Aggressiveness has to be within 0-3 for VAD split to work.
    aggressive = 1
    outlierdurms = 20
    outlierbatchsize = 64

    # from .... import  DS model

    # Makes sure there are cores present in a system to be used
    try:
        num_processes = cpu_count()

    except NotImplementedError:
        num_processes = 1

    with AudioFile(audio_path, as_path=True) as wav_path:
        dataset = split_audio_file(wav_path,
                                   batch_size=batchsize,
                                   aggressiveness=aggressive,
                                   outlier_duration_ms=outlierdurms,
                                   outlier_batch_size=outlierbatchsize)
        print("dataset:", dataset)