def test_srmr(): fs = 16000 s = loadmat("test/test.mat")["s"][:,0] correct_ratios = loadmat("test/correct_ratios.mat")['correct_ratios'][0] ratio, avg_energy = srmr(s, fs) assert np.allclose(ratio, correct_ratios[1], rtol=1e-6, atol=1e-12) ratio_norm_fast, avg_energy_norm_fast = srmr(s, fs, fast=True, norm=True, max_cf=30) assert np.allclose(ratio_norm_fast, correct_ratios[2], rtol=1e-6, atol=1e-12) ratio_slow, avg_energy_slow = srmr(s, fs, fast=False) assert np.allclose(ratio_slow, correct_ratios[0], rtol=1e-6, atol=1e-12) ratio_norm, avg_energy_norm = srmr(s, fs, fast=False, norm=True, max_cf=30) assert np.allclose(ratio_norm, correct_ratios[3], rtol=1e-6, atol=1e-12)
def test_srmr_fast(): ratio_norm_fast, avg_energy_norm_fast = srmr(s, fs, fast=True, norm=True, max_cf=30) assert np.allclose(ratio_norm_fast, correct_ratios[2], rtol=1e-6, atol=1e-12)
def srmr(speech, fs, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=False, norm=False): if fs == 8000: srmRatio, energy = srmrpy.srmr(speech, fs, n_cochlear_filters=n_cochlear_filters, low_freq=low_freq, min_cf=min_cf, max_cf=max_cf, fast=fast, norm=norm) return srmRatio elif fs == 16000: srmRatio, energy = srmrpy.srmr(speech, fs, n_cochlear_filters=n_cochlear_filters, low_freq=low_freq, min_cf=min_cf, max_cf=max_cf, fast=fast, norm=norm) return srmRatio else: numSamples = round(len(speech) / fs * 16000) fs = 16000 srmRatio, energy = srmrpy.srmr(resample(speech, numSamples), fs, n_cochlear_filters=n_cochlear_filters, low_freq=low_freq, min_cf=min_cf, max_cf=max_cf, fast=fast, norm=norm) return srmRatio
def read_spectrum(path, noise, reducenoise): y, fs = librosa.load(path, sr=16000) if reducenoise: y = nr.reduce_noise(audio_clip=y, noise_clip=noise) modspec = srmr(y, fs, n_cochlear_filters=60, norm=False, low_freq=125, min_cf=4, max_cf=128)[1] modspec = np.mean(modspec, axis=2) return modspec
def test_srmr(): fs = 16000 s = loadmat("test/test.mat")["s"][:, 0] correct_ratios = loadmat("test/correct_ratios.mat")['correct_ratios'][0] ratio, avg_energy = srmr(s, fs) assert np.allclose(ratio, correct_ratios[1], rtol=1e-6, atol=1e-12) ratio_norm_fast, avg_energy_norm_fast = srmr(s, fs, fast=True, norm=True, max_cf=30) assert np.allclose(ratio_norm_fast, correct_ratios[2], rtol=1e-6, atol=1e-12) ratio_slow, avg_energy_slow = srmr(s, fs, fast=False) assert np.allclose(ratio_slow, correct_ratios[0], rtol=1e-6, atol=1e-12) ratio_norm, avg_energy_norm = srmr(s, fs, fast=False, norm=True, max_cf=30) assert np.allclose(ratio_norm, correct_ratios[3], rtol=1e-6, atol=1e-12)
def srmrpy_eval_valid(predict, target): """ Note target_wav is not used in the srmr function !!! Show the unnormalized score for valid and test set. """ return float( srmr( predict.numpy(), fs=16000, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False, )[0])
def srmrpy_eval(predict, target): """ Note target_wav is not used in the srmr function !!! Normalize the score to 0~1 for training. """ return float( sigmoid(0.1 * srmr( predict.numpy(), fs=16000, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False, )[0]))
def srmr_audio(self, path, file, FS=16000): """ http://stft.readthedocs.io/en/latest/index.html Receive specific folder and file to extract Modulation Features All audio are resample to 16 kHz if FS is not specified """ fs, s = wav.read('%s/%s' % (path, file)) dim = len(s.shape) if (dim>1): s = s[:, 0] if (fs != FS): n_s = round(len(s) * (FS / fs)) s = signal.resample(s, n_s) ratio, energy = srmr.srmr(s, FS, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=True, norm=False) return energy
def srmr_audio(path, file, pad, FS=16000): """ http://stft.readthedocs.io/en/latest/index.html Receive specific folder and file to extract Modulation Features All audio are resample to 16 kHz if FS is not specified """ fs, s = wav.read('%s/%s' % (path, file)) dim = len(s.shape) if (dim > 1): s = s[:, 0] if (fs != FS): n_s = round(len(s) * (FS / fs)) s = signal.resample(s, n_s) nPad = int((pad * fs) / 1000) np.pad(s, (nPad), 'constant') ratio, energy = srmr(s, FS) return energy
def test_srmr_slow_norm(): ratio_norm, avg_energy_norm = srmr(s, fs, fast=False, norm=True, max_cf=30) assert np.allclose(ratio_norm, correct_ratios[3], rtol=1e-6, atol=1e-12)
def test_srmr_slow(): ratio_slow, avg_energy_slow = srmr(s, fs, fast=False) assert np.allclose(ratio_slow, correct_ratios[0], rtol=1e-6, atol=1e-12)
def test_srmr(): ratio, avg_energy = srmr(s, fs) assert np.allclose(ratio, correct_ratios[1], rtol=1e-6, atol=1e-12)
#pip install git+https://github.com/detly/gammatone.git import librosa from srmrpy import srmr import sys x, sr = librosa.core.load(sys.argv[1]) a, b = srmr(x, sr) print(a)
def srmr( signal, sample_rate, n_cochlear_filters=23, low_freq=125, min_cf=4, max_cf=128, fast=False, norm=False, ): """ Wrapper around the SRMRpy package to allow an independent axis Note: The results of this implementation are slightly different from the Matlab implementation, but a high correlation between the behavior of both implementations is still present. However, activating the fast implementation or norm drastically changes the absolute values of the results due to changes in the gammatone package. Please make sure to check the correlation between the Matlab implementation and this implementation before activating either the normalized or fast SRMR. >>> import paderbox as pb >>> a = pb.testing.testfile_fetcher.get_file_path('speech_bab_0dB.wav') >>> a = pb.io.load_audio(a) >>> srmr(a, 16000) # doctest: +ELLIPSIS 1.8659610077... >>> srmr([a, a], 16000) array([1.86596101, 1.86596101]) """ try: import srmrpy except ImportError: raise AssertionError( 'To use this srmr implementation, install the SRMRpy package from\n' 'https://github.com/jfsantos/SRMRpy\n') signal = np.asarray(signal) if signal.ndim >= 2: for i in range(signal.ndim - 1): assert signal.shape[i] < 30, (i, signal.shape) # NOQA srmrs = [] for i in np.ndindex(*signal.shape[:-1]): # TODO: Add option to also return the SRMR per gammatone filterbank (typically unused in evaluations) srmrs.append( srmrpy.srmr(signal[i], sample_rate, n_cochlear_filters=n_cochlear_filters, low_freq=low_freq, min_cf=min_cf, max_cf=max_cf, fast=fast, norm=norm)[0]) return np.array(srmrs).reshape(signal.shape[:-1]) elif signal.ndim == 1: # TODO: Add option to also return the SRMR per gammatone filterbank (typically unused in evaluations) return srmrpy.srmr(signal, sample_rate, n_cochlear_filters=n_cochlear_filters, low_freq=low_freq, min_cf=min_cf, max_cf=max_cf, fast=fast, norm=norm)[0] else: raise NotImplementedError(signal.ndim)