def gain(self, level, mode: 'multiply'): ''' Increases the volume of the audio by a specified amount ''' amplifier = { 'multiply': lambda d, l: d * l, 'percent': lambda d, l: d * l / 100.0, 'additive': lambda d, l: d + l, 'subtractive': lambda d, l: d - l, 'dB_additive': lambda d, l: librosa.db_to_amplitude( (librosa.amplitude_to_db(d) + l)), 'dB_subtractive': lambda d, l: librosa.db_to_amplitude( (librosa.amplitude_to_db(d) - l)), 'dB_multiply': lambda d, l: (10**(l / 20)) * d } data = amplifier[mode](self.data, level) return Audio().populate( data, self.sample_rate, self.source_path, self.operations + [_gainOperation(level, mode)])
def pesq_on_batch(y_denoised, ytest, test_phase, sr=16000): pesqvalue = 1 try: y_denoised = np.squeeze(y_denoised, axis=3) y_denoised = np.squeeze(y_denoised, axis=0) y_denoised = librosa.db_to_amplitude(y_denoised) ytest = librosa.db_to_amplitude(ytest) denoised = y_denoised * test_phase original = ytest * test_phase denoised = librosa.istft(denoised) original = librosa.istft(original) #print(denoised) #print(original) denoised = librosa.util.normalize(denoised) original = librosa.util.normalize(original) #pesqvalue=pesq(sr, original, denoised, 'wb') pesqvalue = pesq(sr, original, denoised, 'wb') #print(pesqvalue) except: print("pesq didnt work") presqvalue = 1 return pesqvalue
def stoi_on_batch(y_denoised,ytest,test_phase,sr=16000): stoivalue=0 y_denoised = np.squeeze(y_denoised,axis=3) y_denoised = np.squeeze(y_denoised,axis=0) y_denoised = librosa.db_to_amplitude(y_denoised) ytest = librosa.db_to_amplitude(ytest) denoised = y_denoised*test_phase original = ytest*test_phase denoised = librosa.istft(denoised) original = librosa.istft(original) #print(denoised) #print(original) denoised = librosa.util.normalize(denoised) original = librosa.util.normalize(original) #pesqvalue=pesq(sr, original, denoised, 'wb') stoivalue=stoi( original, denoised,sr, 'wb') #print(pesqvalue) #print("stoi didnt work") #stoivalue=0 return stoivalue
def pesq_from_fft(noisy,phase_noisy,clean,phase_clean,out=False): """ Calculate PESQ Metric on stft batch """ phase_noisy=np.array(phase_noisy) noisy=librosa.db_to_amplitude(noisy) noisy=noisy*phase_noisy noisy = librosa.istft(noisy) clean=np.array(clean) phase_clean=np.array(phase_clean) clean=librosa.db_to_amplitude(clean) clean=clean*phase_clean clean = librosa.istft(clean) if out==True: global cn scipy.io.wavfile.write(path+'\\gvepre\\predictGVE'+str(cn)+'.wav',16000,noisy) cn=cn+1 sr =16000 pesqvalue=pesq(sr, clean, noisy, 'wb') #print(pesqvalue) return pesqvalue
def source_to_distortion(batch_predicted,target_gt): batch_predicted = librosa.db_to_amplitude(batch_predicted) target_gt = librosa.db_to_amplitude(target_gt) distortion = (batch_predicted-target_gt)**2 return 10*np.log10(np.divide(target_gt, distortion, out=(np.ones_like(Noisy))*50, where=distortion!=0))
def extract_audio( Z, feature, params): # if normalized Z: unnormalize first, then pass to func. # convert to audio if feature == "Stft": # undo log-magnitude scaling S = librosa.db_to_amplitude(Z) # upsample S = _upsample_fft(S, params["fft_sample_rate"], params["stft_window_length"]) yhat = librosa.griffinlim(S, hop_length=params["stft_hop_length"]) elif feature == "Mel": # undo log-power scaling S = librosa.db_to_power(Z) yhat = librosa.feature.inverse.mel_to_audio( S, sr=params["fft_sample_rate"], n_fft=params["stft_window_length"], hop_length=params["stft_hop_length"], ) elif feature == "Cqt": # undo log-amplitude scaling S = librosa.db_to_amplitude(Z) yhat = librosa.griffinlim_cqt( S, sr=params["fft_sample_rate"], hop_length=params["stft_hop_length"], fmin=librosa.note_to_hz(params["cqt_min_frequency"]), ) elif feature == "Mfcc": yhat = librosa.feature.inverse.mfcc_to_audio( Z, n_mels=params["frequency_bins"], sr=params["fft_sample_rate"], n_fft=params["stft_window_length"], hop_length=params["stft_hop_length"], ) else: print("Error: feature invalid") # throw/raise something return -1 return yhat, params["fft_sample_rate"]
def spectrogram_to_audio_db(data, data_recon, output_dir): data_np = (data.squeeze(0).to(torch.device("cpu"))).detach().numpy() data_np = librosa.db_to_amplitude(data_np) data_griffin_lim = librosa.griffinlim(data_np) data_recon_np = (data_recon.squeeze(0).to( torch.device("cpu"))).detach().numpy() data_recon_np = librosa.db_to_amplitude(data_recon_np) data_recon_griffin_lim = librosa.griffinlim(data_recon_np) source_aud_path = output_dir + '_input_' + '.wav' target_aud_path = output_dir + '_output_' + '.wav' librosa.output.write_wav(source_aud_path, data_griffin_lim, 16384) librosa.output.write_wav(target_aud_path, data_recon_griffin_lim, 16384) return source_aud_path, target_aud_path
def main(): sample_rate = 44100 n_fft = 512 n_frame = 1000 # create arbitrary spectrogram as Numpy array # for convenience, create spectrogram in range of dB and convert to power spec = np.random.normal(-20, 2, [1 + n_fft // 2, n_frame]) spec[20:30] += 50 spec = librosa.db_to_amplitude(spec) # plot created spectrogram librosa.display.specshow(librosa.amplitude_to_db(spec)) plt.colorbar() plt.show() # inverse STFT and playback wave = librosa.istft(spec) print('wave shape: {}'.format(wave.shape)) sd.play(wave, samplerate=sample_rate, blocking=True)
def mel_spectro2wav(mel_spectro, preemphasize=hparams.PREEMPHASIZE, ref_db=hparams.REF_DB, max_db=hparams.MAX_DB, n_iter_griffin_lim=hparams.N_ITER_GRIFFIN_LIM, gl_power=hparams.GL_POWER, sample_rate=hparams.SAMPLE_RATE, n_fft=hparams.N_FFT, n_mels=hparams.SYNTHESIZER_N_MELS, hop_length=hparams.HOP_LENGTH, win_length=hparams.WIN_LENGTH, window=hparams.WINDOW): mel_spectro = mel_spectro.T mel_spectro = (np.clip(mel_spectro, 0, 1) * max_db) - max_db + ref_db amp_mel = librosa.db_to_amplitude(mel_spectro) inv_mel_basis = np.linalg.pinv(librosa.filters.mel(sample_rate, n_fft=n_fft, n_mels=n_mels)) mag_spectro = np.maximum(1e-10, np.dot(inv_mel_basis, amp_mel)) mag_spectro = mag_spectro ** gl_power wav = griffin_lim(mag_spectro, n_iter_griffin_lim=n_iter_griffin_lim, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window=window) wav = signal.lfilter([1], [1, -preemphasize], wav) wav, _ = librosa.effects.trim(wav) return wav.astype(np.float32)
def random_eq(audio, fs, fraction, order, limits_freq, distribtion): bands, _, _ = filterbank(audio, fs, fraction, order, limits_freq) bands = np.stack(bands, axis=0) coeffs = distribtion(size=(bands.shape[0], 1)) eq_coeffs = librosa.db_to_amplitude(coeffs) bands *= eq_coeffs return librosa.util.normalize(np.sum(bands, axis=0))
def fetch_spectrogram_image(row): """ Fetch spectrogram at specific path from cloud storage """ try: filename = row['filename'].replace('.mp3', '.wav') + ".png" # filename = row['filename'] logging.info("Fetching " + filename) blob = pool_bucket.get_blob(filename) if not blob: logging.error(filename + " not found") tmp = tempfile.NamedTemporaryFile(suffix=".png") blob.download_to_file(tmp) tmp.seek(0) img = np.asarray(Image.open(tmp)) assert blob.metadata['db_min'] assert blob.metadata['db_max'] img_mapped = np.interp( img, (0, 255), (int(blob.metadata['db_min']), int(blob.metadata['db_max']))) cqt = librosa.db_to_amplitude(img_mapped) cqt = np.flipud(cqt).T tmp.close() return [cqt, blob.metadata] except Exception as e: logging.error("Could not download " + filename) logging.error(e) return
def spectrogram2wav(mag): '''# Generate wave file from spectrogram''' # transpose mag = mag.T # de-noramlize mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db # to amplitude mag = librosa.db_to_amplitude(mag) # print(np.max(mag), np.min(mag), mag.shape) # (1025, 812, 16) # wav reconstruction wav = griffin_lim(mag) #wav = butter_bandpass_filter(wav, hp.lowcut, hp.highcut, hp.sr, order=6) # de-preemphasis wav = signal.lfilter([1], [1, -hp.preemphasis], wav) # trim wav, _ = librosa.effects.trim(wav) return wav
def postpro_and_gen(S, phase, returnS=0, dBscale=1, denormalize=1, complex_phase=0, clip_phase=0): # T, F if dBscale: if denormalize: # denormalization S = S * hparams.max_db - hparams.max_db + hparams.ref_db S = librosa.db_to_amplitude(S) # pad with 0 Sfull = np.concatenate((S, np.zeros(shape=(S.shape[0], 1))), axis=-1) if clip_phase: phase = np.concatenate((phase, np.zeros(shape=(2, phase.shape[1], 1))), axis=-1) # generate waveform wav = genWaveclip(Sfull, phase, complex_phase) if not returnS: return wav else: return wav, Sfull, phase
def extract_loudness(self, audio): S = librosa.stft(audio) power = np.abs(S)**2 p_mean = np.sum(power, axis=0, keepdims=True) db = librosa.power_to_db(p_mean, ref=np.max(power)) amp = librosa.db_to_amplitude(db) return amp[0]
def make_test_folder(X_in, X_out, name, step, n_samples=5): path_to_step_folder = mkdir(path_to_log, str(step + 1)) path_to_step_folder_name = mkdir(path_to_step_folder, name) for i in range(n_samples): plt.subplot(2, n_samples, 1 + i) plt.axis('off') plt.imshow(X_in[i].reshape(spec_dim, spec_dim), cmap='gray') imageio.imwrite( path.join(path_to_step_folder_name, str(i) + "_real_" + name.split('_')[0] + ".jpg"), X_in[i].reshape(spec_dim, spec_dim)) im = cv2.imread( path.join(path_to_step_folder_name, str(i) + "_real_" + name.split('_')[0] + ".jpg"), -1) im = im[:fft_len // 2 + 1, :fft_len // 2 + 1] im = (im * 80.0 / 255.0) - 80.0 im = librosa.db_to_amplitude(im) y2 = griffinlim(im, hop_length=hop_length) write( path.join(path_to_step_folder_name, str(i) + "_real_" + name.split('_')[0] + ".wav"), 16000, y2 * 1.5) # plot translated image for i in range(n_samples): plt.subplot(2, n_samples, 1 + n_samples + i) plt.axis('off') plt.imshow(X_out[i].reshape(spec_dim, spec_dim), cmap='gray') imageio.imwrite( path.join(path_to_step_folder_name, str(i) + "_generated_" + name.split('_')[2] + ".jpg"), X_out[i].reshape(spec_dim, spec_dim)) im = cv2.imread( path.join(path_to_step_folder_name, str(i) + "_generated_" + name.split('_')[2] + ".jpg"), -1) im = im[:fft_len // 2 + 1, :fft_len // 2 + 1] im = (im * 80.0 / 255.0) - 80.0 im = librosa.db_to_amplitude(im) y2 = griffinlim(im, hop_length=hop_length) write( path.join(path_to_step_folder_name, str(i) + "_generated_" + name.split('_')[2] + ".wav"), 16000, y2 * 1.5) # save plot to file filename1 = '%s_generated_plot_%06d.png' % (name, (step + 1)) plt.savefig(path.join(path_to_step_folder, filename1), dpi=300) plt.close()
def extend_envelope(envelope_estimated, noise_floor_onset): # Fit straight line in dB scaling, return exponential, extended envelope. env_db = librosa.amplitude_to_db(envelope_estimated, ref=1.0) a = (env_db[noise_floor_onset] - env_db[0]) / np.max( (noise_floor_onset, 1.)) b = env_db[0] return librosa.db_to_amplitude(a * np.arange(len(envelope_estimated)) + b)
def inv_spectrogram_librosa(spectrogram, fs, hparams): """Converts spectrogram to waveform using librosa""" S_denorm = _denormalize(spectrogram, hparams) S = librosa.db_to_amplitude( S_denorm + hparams.ref_level_db ) # Convert back to linear # Reconstruct phase return griffinlim_librosa(S, fs, hparams)
def SDR(original, predicted): original = librosa.db_to_amplitude(original) predicted = librosa.db_to_amplitude(predicted) distortion = predicted - original original = original**2 distortion = distortion**2 sdr = np.divide(original, distortion) sdr = np.nan_to_num(sdr, nan=60.0, posinf=60.0, neginf=60.0) #print(sdr) sdr = 10 * np.log10(sdr) return np.mean(sdr)
def SNR(Noisy, Clean, mask=True, Bark=False): """Function to Calculate Signal-to-Noise Ratio, mask==True puts out IBM""" m_ibm = [] if Bark == False: Noisy = librosa.db_to_amplitude(Noisy) Clean = librosa.db_to_amplitude(Clean) N = np.subtract(Noisy, Clean) m_ibm = 20 * np.log10( np.divide(Clean, N, out=np.zeros_like(Noisy), where=N != 0)) print("masking output") if mask == True: m_ibm = (m_ibm >= 0).astype(int) return m_ibm
def generateWavFromSTFT(amp, phase, wl, hl): Mdb_inormed = np.interp(amp, (amp.min(), amp.max()), (-15, 65)) iM = librosa.db_to_amplitude(Mdb_inormed) iC = iM * np.exp(1j * phase) iy = librosa.istft(iC, hop_length=hl, win_length=wl) iy = librosa.util.normalize(iy) return iy
def spec_to_audio(X_out, name_gen): imageio.imwrite(path.join(path_to_results, name_gen + ".jpg"), X_out.reshape(260, 260)) im = cv2.imread(path.join(path_to_results, name_gen + ".jpg"), -1) im = im[:257, :257] im = (im * 80.0 / 255.0) - 80.0 im = librosa.db_to_amplitude(im) y2 = griffinlim(im, hop_length=256) write(path.join(path_to_results, name_gen + ".wav"), 16000, y2 * 3)
def do_convert(predictor, input_name, logdir2): convert_s = datetime.datetime.now() # Load input audio input_audio, _ = librosa.load(input_name, sr=hp.default.sr, dtype=np.float64) # Extract F0 from input audio first input_f0, t_table = pw.dio(input_audio, hp.default.sr) input_f0 = pw.stonemask(input_audio, input_f0, t_table, hp.default.sr) # Get MFCC, Spectral Envelope, and Aperiodicity mfcc = _get_mfcc(input_audio, hp.default.n_fft, hp.default.win_length, hp.default.hop_length) mfcc = np.expand_dims(mfcc, axis=0) input_ap = pw.d4c(input_audio, input_f0, t_table, hp.default.sr, fft_size=hp.default.n_fft) input_sp_en = _get_spectral_envelope(preemphasis(input_audio, coeff=hp.default.preemphasis), hp.default.n_fft) plt.imsave('./converted/debug/input_sp_en_original.png', input_sp_en, cmap='binary') input_sp_en = np.expand_dims(input_sp_en, axis=0) # Convert Spectral Envelope output_sp_en, ppgs = convert_spectral_envelope(predictor, mfcc, input_sp_en) output_sp_en = np.squeeze(output_sp_en.astype(np.float64), axis=0) preproc_s = datetime.datetime.now() # Denormalization output_sp_en = denormalize_db(output_sp_en, hp.default.max_db, hp.default.min_db) # Db to amp output_sp_en = librosa.db_to_amplitude(output_sp_en) # Emphasize the magnitude output_sp_en = np.power(output_sp_en, hp.convert.emphasis_magnitude) preproc_e = datetime.datetime.now() preproc_t = preproc_e - preproc_s print("Pre-Processing time:{}s".format(preproc_t.seconds)) # F0 transformation with WORLD Vocoder output_f0 = f0_adapt(input_f0, logdir2) # Synthesize audio and de-emphasize output_audio = pw.synthesize(output_f0, output_sp_en, input_ap, hp.default.sr) output_audio = inv_preemphasis(output_audio, coeff=hp.default.preemphasis) # Saving output_audio to 32-bit Float wav file output_audio = output_audio.astype(np.float32) librosa.output.write_wav(path="./converted/"+input_name,y=output_audio,sr=hp.default.sr) # Saving PPGS data to Grayscale Image and raw binary file ppgs = np.squeeze(ppgs, axis=0) plt.imsave('./converted/debug/'+input_name+'.png', ppgs, cmap='binary') np.save('./converted/debug/'+input_name+'.npy', ppgs) convert_e = datetime.datetime.now() convert_time = convert_e - convert_s print("Total Converting Time:{}s".format(convert_time.seconds))
def mag(self): if self._mag is None: if self._D is not None and self._ph is not None: if self._ref_mag is None: self._ref_mag = 1.0 self._mag = librosa.db_to_amplitude(self._D, ref=self._ref_mag) else: self._mag, self._ph = librosa.core.magphase(self.F) return self._mag
def SDR(original,predicted): """ This function calculated the source-to-distortion ratio on a batchwise stft basis. Herefore SDR on all tf-units is found and the mean is put out. """ original = librosa.db_to_amplitude(original) predicted = librosa.db_to_amplitude(predicted) distortion = predicted-original # power spectrum: original = original**2 distortion = distortion**2 sdr=np.divide(original,distortion) # Fixing NAN Values: sdr= np.nan_to_num(sdr,nan=60.0,posinf=60.0,neginf=60.0) sdr = 10*np.log10(sdr) return np.mean(sdr)
def convert_specs_to_audio(self, spectrograms, min_max_values): signals = [] for spec, min_max_value in zip(spectrograms, min_max_values): log_spec = spec[:, :, 0] denorm_log_spec = self.min_max_normalizer.denormalise( log_spec, min_max_value["min"], min_max_value["max"]) spectr = librosa.db_to_amplitude(denorm_log_spec) signal = librosa.istft(spectr, hop_length=self.hop_length) signals.append(signal) return signals
def img2audio(self, mode): # DECODER magma_dif_list = self.readMagmaDiff() mtx_rgb_sum = self.readImg(self.trans_img_path) mtx_value = self.curver(mtx_rgb_sum, magma_dif_list) mtx_unit = np.ones(mtx_value.shape) mtx_db = (mtx_value / np.max(mtx_value) - mtx_unit) * float( abs(self.limit)) # get "normalized" db matrix mtx_amp = librosa.db_to_amplitude(mtx_db, ref=1.0) return self.reconstructer(mtx_amp, mode)
def generateWavFromCQT(amp, phase, sr, hl): Mdb_inormed = np.interp(amp, (amp.min(), amp.max()), (-15, 65)) iM = librosa.db_to_amplitude(Mdb_inormed) bins_per_octave = 12 * 12 D = iM * np.exp(1j*phase) iy = librosa.icqt(C=D, sr=sr, hop_length=hl, bins_per_octave=bins_per_octave) iy = librosa.util.normalize(iy) return iy
def pesq_on_batch(y_denoised,ytest,test_phase,sr=16000): pesqvalue=1 try: y_denoised = np.squeeze(y_denoised,axis=3) y_denoised = np.squeeze(y_denoised,axis=0) y_denoised = librosa.db_to_amplitude(y_denoised) ytest = librosa.db_to_amplitude(ytest) denoised = y_denoised*test_phase original = ytest*test_phase denoised = librosa.istft(denoised) original = librosa.istft(original) #print(denoised) #print(original) denoised = librosa.util.normalize(denoised) original = librosa.util.normalize(original) #pesqvalue=pesq(sr, original, denoised, 'wb') pmsqe.init_constants(Fs=sr, Pow_factor=pmsqe.perceptual_constants.Pow_correc_factor_Hann, apply_SLL_equalization=True, apply_bark_equalization=True, apply_on_degraded=True, apply_degraded_gain_correction=True) #pesqvalue=pesq(sr, original, denoised, 'wb') pesqvalue=per_frame_PMSQE(original,denoised) #print(pesqvalue) except: print("pesq didnt work") presqvalue=1 return pesqvalue
def magnitude_db_and_phase_to_audio(frame_length, hop_length_fft, stftaudio_magnitude_db, stftaudio_phase): stftaudio_magnitude_rev = librosa.db_to_amplitude(stftaudio_magnitude_db, ref=1.0) audio_reverse_stft = stftaudio_magnitude_rev * stftaudio_phase audio_reconstruct = librosa.core.istft(audio_reverse_stft, hop_length=hop_length_fft, length=frame_length) return audio_reconstruct
def rebuild_audio_from_spectro_clips(spectrogram_clips, is_dB_format=False): """rebuild waveform solely from magnitude spectrogram""" # audio spectrogram format: # 1. normal stft spectromgram # 2. dB-scaled spectrogram log(epilon + S*2) spectrogram = np.concatenate(spectrogram_clips, axis=1) if is_dB_format: spectrogram = librosa.db_to_amplitude(spectrogram) waveform = librosa.istft(spectrogram, hop_length=HOP_LEN, win_length=WIN_LEN) return waveform
def test_db_to_amplitude(): srand() NOISE_FLOOR = 1e-6 # Make some noise x = np.abs(np.random.randn(1000)) + NOISE_FLOOR db = librosa.amplitude_to_db(x, top_db=None) x2 = librosa.db_to_amplitude(db) assert np.allclose(x, x2)
def spectrogram2wav(mag): '''# Generate wave file from spectrogram''' # transpose mag = mag.T # de-noramlize mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db # to amplitude mag = librosa.db_to_amplitude(mag) # print(np.max(mag), np.min(mag), mag.shape) # (1025, 812, 16) # wav reconstruction wav = griffin_lim(mag) # de-preemphasis wav = signal.lfilter([1], [1, -hp.preemphasis], wav) # trim wav, _ = librosa.effects.trim(wav) return wav
def __test(ref): db = librosa.amplitude_to_db(xp, ref=ref, top_db=None) xp2 = librosa.db_to_amplitude(db, ref=ref) assert np.allclose(xp, xp2)