Пример #1
0
    def _get_wav_and_melspec(wav_file, length, is_training=True):
        '''
        the range of values of wav is [-1, 1].
        '''

        wav = read_wav(wav_file, sr=hp.signal.sr)
        wav = trim_wav(wav)
        # divide wav into chunks that have the given length and one is randomly selected in training, but first chunk in generation.
        n_clips = math.ceil(len(wav) / length) if is_training else 1
        idx = random.randrange(n_clips)
        start, end = length * idx, length * (idx + 1)
        wav = wav[start:end]
        assert (len(wav) <= length)
        wav = fix_length(wav, length)  # padding in case of last chunk.

        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        wav = np.expand_dims(wav, -1)
        return wav, melspec.astype(np.float32)
Пример #2
0
    def get_random_wav_and_label(self, tar_wavfiles, ntar_wavfiles):
        """

        :return: wav: raw wave. float32. shape=(t, ),
                 label: 1 if target, 0 otherwise. int32.
                 melspec: mel-spectrogram. float32. shape=(t, n_mels)
        """
        wavfiles, label = (
            tar_wavfiles,
            self.tar_labels) if np.random.sample(1) <= self.tar_ratio else (
                ntar_wavfiles, self.ntar_labels)
        wavfile = wavfiles[np.random.randint(0, len(wavfiles))]
        if type(wavfile) == bytes:
            wavfile = wavfile.decode()
        if wavfile.endswith('arr'):  # pyarrow format
            wav = read_wav_from_arr(wavfile)
        else:
            wav = read_wav(wavfile, sr=hp.signal.sr)
        wav = trim_wav(wav)

        wav = crop_random_wav(wav, self.length)
        wav = augment_volume(wav)
        wav = fix_length(wav, self.length)  # padding
        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        melspec = np.float32(melspec)
        label = np.float32(label)
        return wav, melspec, label
Пример #3
0
def do_task(nthreads, audio):
    print 'Thread-{} start.\n'.format(nthreads)
    try:
        while True:
            src_path, tar_path = audio.next()
            wav, sr = librosa.load(src_path)
            wav = trim_wav(wav)
            write_wav(wav, sr, tar_path)
    except StopIteration:
        print 'Thread-{} done.\n'.format(nthreads)
Пример #4
0
 def get_random_wav(self, wavfile):
     """
     :param: wavfile: a raw wave file.
     :return: wav: raw wave. float32. shape=(t, ),
              melspec: mel-spectrogram. float32. shape=(t, n_mels),
              wavfile: the raw wave file.
     """
     wav = read_wav(wavfile, sr=hp.signal.sr)
     wav = trim_wav(wav)
     wav = fix_length(wav, self.length)  # crop from the beginning.
     melspec = wav2melspec_db(wav,
                              sr=hp.signal.sr,
                              n_fft=hp.signal.n_fft,
                              win_length=hp.signal.win_length,
                              hop_length=hp.signal.hop_length,
                              n_mels=hp.signal.n_mels,
                              min_db=hp.signal.min_db,
                              max_db=hp.signal.max_db)
     melspec = np.float32(melspec)
     return wav, melspec, wavfile
    def _get_wav_and_melspec(wav_file, length=None, is_training=True):
        wav = read_wav(wav_file, sr=hp.signal.sr)
        wav = trim_wav(wav)
        if length:
            n_clips = math.ceil(len(wav) / length) if is_training else 1
            idx = random.randrange(n_clips)
            start, end = length * idx, length * (idx + 1)
            wav = wav[start:end]
            assert (len(wav) <= length)
            wav = fix_length(wav, length)  # padding

        melspec = wav2melspec_db(wav,
                                 sr=hp.signal.sr,
                                 n_fft=hp.signal.n_fft,
                                 win_length=hp.signal.win_length,
                                 hop_length=hp.signal.hop_length,
                                 n_mels=hp.signal.n_mels,
                                 min_db=hp.signal.min_db,
                                 max_db=hp.signal.max_db)
        wav = np.expand_dims(wav, -1)
        return wav, melspec.astype(np.float32)