def _load_wav_with_speed(wav_file, speed): """ Load the wave from file and apply speed perpturbation Args: wav_file: input feature, T * F 2D Returns: augmented feature """ if speed == 1.0: return torchaudio.load_wav(wav_file) else: si, _ = torchaudio.info(wav_file) # get torchaudio version ta_no = torchaudio.__version__.split(".") ta_version = 100 * int(ta_no[0]) + 10 * int(ta_no[1]) if ta_version < 80: # Note: deprecated in torchaudio>=0.8.0 E = sox_effects.SoxEffectsChain() E.append_effect_to_chain('speed', speed) E.append_effect_to_chain("rate", si.rate) E.set_input_file(wav_file) wav, sr = E.sox_build_flow_effects() else: # Note: enable in torchaudio>=0.8.0 wav, sr = sox_effects.apply_effects_file( wav_file, [['speed', str(speed)], ['rate', str(si.rate)]]) # sox will normalize the waveform, scale to [-32768, 32767] wav = wav * (1 << 15) return wav, sr
def _load_wav_with_speed(wav_file, speed): """ Load the wave from file and apply speed perpturbation Args: wav_file: input feature, T * F 2D Returns: augmented feature """ if speed == 1.0: wav, sr = torchaudio.load(wav_file) else: sample_rate = torchaudio.backend.sox_io_backend.info( wav_file).sample_rate # get torchaudio version ta_no = torchaudio.__version__.split(".") ta_version = 100 * int(ta_no[0]) + 10 * int(ta_no[1]) if ta_version < 80: # Note: deprecated in torchaudio>=0.8.0 E = sox_effects.SoxEffectsChain() E.append_effect_to_chain('speed', speed) E.append_effect_to_chain("rate", sample_rate) E.set_input_file(wav_file) wav, sr = E.sox_build_flow_effects() else: # Note: enable in torchaudio>=0.8.0 wav, sr = sox_effects.apply_effects_file( wav_file, [['speed', str(speed)], ['rate', str(sample_rate)]]) return wav, sr
def _load_wav_with_speed(wav, speed): """ # reference: wenet Load the wave from file and apply speed perpturbation """ if speed == 1.0: wav, sr = torchaudio.load(wav) else: sample_rate = torchaudio.backend.sox_io_backend.info(wav).sample_rate ta_no = torchaudio.__version__.split(".") ta_version = 100 * int(ta_no[0]) + 10 * int(ta_no[1]) # 0.8.0 if ta_version < 80: E = sox_effects.SoxEffectsChain() E.append_effect_to_chain('speed', speed) E.append_effect_to_chain('rate', sample_rate) E.set_input_file(wav) wav, sr = E.sox_build_flow_effects() else: wav, sr = sox_effects.apply_effects_file( wav, [['speed', str(speed)], ['rate', str(sample_rate)]]) return wav, sr