def time_axis(args): sig = get_sig(args) fs = unroll_args(args, ['fs']) length = len(sig) t_end_sec = length / fs time = np.linspace(0, t_end_sec, length) return time
def energy_envelope(args): sig = get_sig(args) nfft = unroll_args(args, ['nfft']) sig = np.abs(sig) hann_window = _cached_get_window('hanning', nfft) envelope = np.convolve(sig, hann_window, 'same') return envelope.reshape((len(envelope), 1))
def total_energy(args): fs, nfft = unroll_args(args, ['fs', 'nfft']) psd = get_psd(args) # This is a little bit unclear. Eq (6.1) of Raven is the calculation below, but then it says it is in decibels, # which this is not! energy = np.sum(psd) * (fs / nfft) return energy
def lp_coefficients(args): sig = get_sig(args) nfft, fs, noverlap, win_length, order = unroll_args( args, ['nfft', 'fs', 'noverlap', 'win_length', 'order']) hann_window = _cached_get_window('hanning', nfft) window = unroll_args(args, [('window', hann_window)]) siglen = len(sig) nsegs, segs = split_segments(siglen, win_length, noverlap, incltail=False) lp_coeffs = np.zeros((order, nsegs), dtype=np.float32) for i in range(nsegs): seg_beg, seg_end = segs[i] frame = sig[seg_beg:seg_end] lp_coeffs[:, i] = lp_coefficients_frame(frame * window, order) return lp_coeffs
def lpc_spectrum(args): sig = get_sig(args) nfft, fs, noverlap, win_length, order = unroll_args( args, ['nfft', 'fs', 'noverlap', 'win_length', 'order']) hann_window = _cached_get_window('hanning', nfft) window = unroll_args(args, [('window', hann_window)]) siglen = len(sig) nsegs, segs = split_segments(siglen, win_length, noverlap, incltail=False) lpcs = np.zeros((nfft, nsegs), dtype=np.complex64) for i in range(nsegs): seg_beg, seg_end = segs[i] frame = sig[seg_beg:seg_end] lpcs[:, i] = lpc_spectrum_frame(frame * window, order, nfft) return np.log10(abs(lpcs))
def spectral_flatness(args): psd = get_psd(args) nfft, noverlap = unroll_args(args, ['nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.spectral_flatness(y=None, S=psd, n_fft=nfft, hop_length=hopsize)
def mean_frequency(args): fs, nfft = unroll_args(args, ['fs', 'nfft']) s = mtspect(args) freq_range = nfft // 2 + 1 idx = np.arange(freq_range) tmp = s * idx.reshape((freq_range, 1)) x = np.sum(tmp, axis=0) / np.sum(s, axis=0) * fs / nfft return x
def _harmonic_and_pitch(args): """ Computes harmonic ratio and pitch """ sig = get_sig(args) fs, noverlap, win_length = unroll_args(args, ['fs', 'noverlap', 'win_length']) siglen = len(sig) nsegs, segs = split_segments(siglen, win_length, noverlap, incltail=False) HRs = [] F0s = [] for i in range(nsegs): seg_beg, seg_end = segs[i] frame = sig[seg_beg:seg_end] M = int(np.round(0.016 * fs) - 1) R = np.correlate(frame, frame, mode='full') g = R[len(frame) - 1] R = R[len(frame):-1] # estimate m0 (as the first zero crossing of R) [ a, ] = np.nonzero(np.diff(np.sign(R))) if len(a) == 0: m0 = len(R) - 1 else: m0 = a[0] if M > len(R): M = len(R) - 1 Gamma = np.zeros(M, dtype=np.float64) CSum = np.cumsum(frame**2) Gamma[m0:M] = R[m0:M] / (np.sqrt((g * CSum[M:m0:-1])) + eps) if len(Gamma) == 0: hr = 1.0 f0 = 0.0 else: # Find the first 3 candidates, since there's lots of noise that can distort the result if we # only consider the max blags = np.argsort(Gamma)[-3:][::-1] f0_candidates = fs / (blags + eps) # The FF should be the smallest of all candidates smallest_f0_index = np.argmin(f0_candidates) f0 = f0_candidates[smallest_f0_index] blag = blags[smallest_f0_index] hr = Gamma[blag] HRs.append(hr) F0s.append(f0) return np.array(HRs), np.array(F0s)
def chroma_stft(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.chroma_stft(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize)
def spectral_rolloff(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.spectral_rolloff(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize)
def zero_crossing_rate(args): sig = get_sig(args) nfft, noverlap = unroll_args(args, ['nfft', 'noverlap']) hopsize = nfft - noverlap zcr = rosaft.zero_crossing_rate(y=sig, frame_length=nfft, hop_length=hopsize, center=False) return zcr.reshape((zcr.size, 1))
def mfc(args): psd = get_psd(args)**2 fs, nfft, ncep, fmin, fmax = unroll_args( args, ['fs', 'nfft', ('ncep', 20), ('fmin', 0.0), ('fmax', None)]) if fmax is None: fmax = fs // 2 # Build a Mel filter mel_basis = _cached_get_mel_filter(sr=fs, n_fft=nfft, n_mels=ncep * 2, fmin=fmin, fmax=fmax) melspect = np.dot(mel_basis, psd) return power_to_db(melspect)
def spectral_contrast(args): psd = get_psd(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap if fs < 12800: n_bands = 6 fmin = int(fs / 2.0**(n_bands)) else: fmin = 200 return rosaft.spectral_contrast(y=None, sr=fs, S=psd, n_fft=nfft, hop_length=hopsize, fmin=fmin)
def mfcc(args): ncep = unroll_args(args, [('ncep', 20)]) S = mfc(args) librosa_dct = dct(ncep, S.shape[0]) return np.dot(librosa_dct, S)
def mfcc(args): ncep = unroll_args(args, [('ncep', 20)]) S = mfc(args) return np.dot(filters.dct(ncep, S.shape[0]), S)
def duration(args): start, end = unroll_args(args, ['start', 'end']) retval = np.ndarray((1, 1), dtype=np.float32) retval[0] = end - start return retval
def chroma_cens(args): sig = get_sig(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.chroma_cens(y=sig, sr=fs, hop_length=hopsize)
def _harmonic_and_pitch(args): """ Computes harmonic ratio and pitch """ sig = get_sig(args) fs, noverlap, win_length = unroll_args(args, ['fs', 'noverlap', 'win_length']) siglen = len(sig) nsegs, segs = split_segments(siglen, win_length, noverlap, incltail=False) HRs = [] F0s = [] for i in range(nsegs): seg_beg, seg_end = segs[i, :] frame = sig[seg_beg:seg_end] M = np.round(0.016 * fs) - 1 R = np.correlate(frame, frame, mode='full') g = R[len(frame) - 1] R = R[len(frame):-1] # estimate m0 (as the first zero crossing of R) [a, ] = np.nonzero(np.diff(np.sign(R))) if len(a) == 0: m0 = len(R) - 1 else: m0 = a[0] if M > len(R): M = len(R) - 1 Gamma = np.zeros(M, dtype=np.float64) CSum = np.cumsum(frame ** 2) Gamma[m0:M] = R[m0:M] / (np.sqrt((g * CSum[M:m0:-1])) + eps) ZCR = frame_zcr(Gamma) if ZCR > 0.15: HR = 0.0 f0 = 0.0 else: if len(Gamma) == 0: HR = 1.0 blag = 0.0 Gamma = np.zeros(M, dtype=np.float64) else: HR = np.max(Gamma) blag = np.argmax(Gamma) # Get fundamental frequency: f0 = fs / (blag + eps) if f0 > 5000: f0 = 0.0 if HR < 0.1: f0 = 0.0 HRs.append(HR) F0s.append(f0) return np.array(HRs), np.array(F0s)