def _get_wav_and_melspec(wav_file, length, is_training=True): ''' the range of values of wav is [-1, 1]. ''' wav = read_wav(wav_file, sr=hp.signal.sr) wav = trim_wav(wav) # divide wav into chunks that have the given length and one is randomly selected in training, but first chunk in generation. n_clips = math.ceil(len(wav) / length) if is_training else 1 idx = random.randrange(n_clips) start, end = length * idx, length * (idx + 1) wav = wav[start:end] assert (len(wav) <= length) wav = fix_length(wav, length) # padding in case of last chunk. melspec = wav2melspec_db(wav, sr=hp.signal.sr, n_fft=hp.signal.n_fft, win_length=hp.signal.win_length, hop_length=hp.signal.hop_length, n_mels=hp.signal.n_mels, min_db=hp.signal.min_db, max_db=hp.signal.max_db) wav = np.expand_dims(wav, -1) return wav, melspec.astype(np.float32)
def get_random_wav_and_label(self, tar_wavfiles, ntar_wavfiles): """ :return: wav: raw wave. float32. shape=(t, ), label: 1 if target, 0 otherwise. int32. melspec: mel-spectrogram. float32. shape=(t, n_mels) """ wavfiles, label = ( tar_wavfiles, self.tar_labels) if np.random.sample(1) <= self.tar_ratio else ( ntar_wavfiles, self.ntar_labels) wavfile = wavfiles[np.random.randint(0, len(wavfiles))] if type(wavfile) == bytes: wavfile = wavfile.decode() if wavfile.endswith('arr'): # pyarrow format wav = read_wav_from_arr(wavfile) else: wav = read_wav(wavfile, sr=hp.signal.sr) wav = trim_wav(wav) wav = crop_random_wav(wav, self.length) wav = augment_volume(wav) wav = fix_length(wav, self.length) # padding melspec = wav2melspec_db(wav, sr=hp.signal.sr, n_fft=hp.signal.n_fft, win_length=hp.signal.win_length, hop_length=hp.signal.hop_length, n_mels=hp.signal.n_mels, min_db=hp.signal.min_db, max_db=hp.signal.max_db) melspec = np.float32(melspec) label = np.float32(label) return wav, melspec, label
def do_task(nthreads, audio): print 'Thread-{} start.\n'.format(nthreads) try: while True: src_path, tar_path = audio.next() wav, sr = librosa.load(src_path) wav = trim_wav(wav) write_wav(wav, sr, tar_path) except StopIteration: print 'Thread-{} done.\n'.format(nthreads)
def get_random_wav(self, wavfile): """ :param: wavfile: a raw wave file. :return: wav: raw wave. float32. shape=(t, ), melspec: mel-spectrogram. float32. shape=(t, n_mels), wavfile: the raw wave file. """ wav = read_wav(wavfile, sr=hp.signal.sr) wav = trim_wav(wav) wav = fix_length(wav, self.length) # crop from the beginning. melspec = wav2melspec_db(wav, sr=hp.signal.sr, n_fft=hp.signal.n_fft, win_length=hp.signal.win_length, hop_length=hp.signal.hop_length, n_mels=hp.signal.n_mels, min_db=hp.signal.min_db, max_db=hp.signal.max_db) melspec = np.float32(melspec) return wav, melspec, wavfile
def _get_wav_and_melspec(wav_file, length=None, is_training=True): wav = read_wav(wav_file, sr=hp.signal.sr) wav = trim_wav(wav) if length: n_clips = math.ceil(len(wav) / length) if is_training else 1 idx = random.randrange(n_clips) start, end = length * idx, length * (idx + 1) wav = wav[start:end] assert (len(wav) <= length) wav = fix_length(wav, length) # padding melspec = wav2melspec_db(wav, sr=hp.signal.sr, n_fft=hp.signal.n_fft, win_length=hp.signal.win_length, hop_length=hp.signal.hop_length, n_mels=hp.signal.n_mels, min_db=hp.signal.min_db, max_db=hp.signal.max_db) wav = np.expand_dims(wav, -1) return wav, melspec.astype(np.float32)